cointrader/src/backtester.py

"""
독립 백테스트 엔진.
봇 코드(src/bot.py)를 수정하지 않고, 기존 모듈을 재활용하여
풀 파이프라인(지표 → 시그널 → ML 필터 → 진입/청산)을 동기 루프로 시뮬레이션한다.
"""
from __future__ import annotations

import json
import warnings
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path

import joblib
import lightgbm as lgb
import numpy as np
import pandas as pd
from loguru import logger

# 크립토 24/7 시장: 15분봉 × 96봉/일 × 365일 = 35,040
_ANNUALIZE_FACTOR = 35_040


def _calc_trade_stats(trades: list[dict], initial_balance: float) -> dict:
    """거래 리스트에서 통계 요약을 계산한다. Backtester와 WalkForward 공통 사용."""
    if not trades:
        return {
            "total_trades": 0, "total_pnl": 0.0, "return_pct": 0.0,
            "win_rate": 0.0, "avg_win": 0.0, "avg_loss": 0.0,
            "payoff_ratio": 0.0, "max_consecutive_losses": 0,
            "profit_factor": 0.0, "max_drawdown_pct": 0.0,
            "sharpe_ratio": 0.0, "total_fees": 0.0, "close_reasons": {},
        }

    pnls = [t["net_pnl"] for t in trades]
    wins = [p for p in pnls if p > 0]
    losses = [p for p in pnls if p <= 0]

    total_pnl = sum(pnls)
    total_fees = sum(t["entry_fee"] + t["exit_fee"] for t in trades)
    gross_profit = sum(wins) if wins else 0.0
    gross_loss = abs(sum(losses)) if losses else 0.0

    cumulative = np.cumsum(pnls)
    equity = initial_balance + cumulative
    peak = np.maximum.accumulate(equity)
    drawdown = (peak - equity) / peak
    mdd = float(np.max(drawdown)) * 100 if len(drawdown) > 0 else 0.0

    if len(pnls) > 1:
        pnl_arr = np.array(pnls)
        sharpe = float(np.mean(pnl_arr) / np.std(pnl_arr) * np.sqrt(_ANNUALIZE_FACTOR)) if np.std(pnl_arr) > 0 else 0.0
    else:
        sharpe = 0.0

    avg_w = float(np.mean(wins)) if wins else 0.0
    avg_l = float(np.mean(losses)) if losses else 0.0
    payoff_ratio = round(avg_w / abs(avg_l), 2) if avg_l != 0 else float("inf")

    max_consec_loss = 0
    cur_streak = 0
    for p in pnls:
        if p <= 0:
            cur_streak += 1
            max_consec_loss = max(max_consec_loss, cur_streak)
        else:
            cur_streak = 0

    reasons = {}
    for t in trades:
        r = t["close_reason"]
        reasons[r] = reasons.get(r, 0) + 1

    return {
        "total_trades": len(trades),
        "total_pnl": round(total_pnl, 4),
        "return_pct": round(total_pnl / initial_balance * 100, 2),
        "win_rate": round(len(wins) / len(trades) * 100, 2),
        "avg_win": round(avg_w, 4),
        "avg_loss": round(avg_l, 4),
        "payoff_ratio": payoff_ratio,
        "max_consecutive_losses": max_consec_loss,
        "profit_factor": round(gross_profit / gross_loss, 2) if gross_loss > 0 else float("inf"),
        "max_drawdown_pct": round(mdd, 2),
        "sharpe_ratio": round(sharpe, 2),
        "total_fees": round(total_fees, 4),
        "close_reasons": reasons,
    }

from src.dataset_builder import (
    _calc_indicators, _calc_signals, _calc_features_vectorized,
    generate_dataset_vectorized, stratified_undersample,
)
from src.ml_features import FEATURE_COLS
from src.ml_filter import MLFilter


# ── 설정 ─────────────────────────────────────────────────────────────
@dataclass
class BacktestConfig:
    symbols: list[str] = field(default_factory=lambda: ["XRPUSDT"])
    start: str | None = None
    end: str | None = None
    initial_balance: float = 1000.0
    leverage: int = 10
    fee_pct: float = 0.04        # taker 수수료 (%)
    slippage_pct: float = 0.01   # 슬리피지 (%)
    use_ml: bool = True
    ml_threshold: float = 0.55
    # 리스크
    max_daily_loss_pct: float = 0.05
    max_positions: int = 3
    max_same_direction: int = 2
    # 증거금
    margin_max_ratio: float = 0.50
    margin_min_ratio: float = 0.20
    margin_decay_rate: float = 0.0006
    # SL/TP ATR 배수
    atr_sl_mult: float = 2.0
    atr_tp_mult: float = 2.0
    min_notional: float = 5.0
    # 전략 파라미터
    signal_threshold: int = 3
    adx_threshold: float = 25.0
    volume_multiplier: float = 2.5

    WARMUP = 60  # 지표 안정화에 필요한 캔들 수


# ── 포지션 상태 ──────────────────────────────────────────────────────
@dataclass
class Position:
    symbol: str
    side: str           # "LONG" | "SHORT"
    entry_price: float
    quantity: float
    sl: float
    tp: float
    entry_time: pd.Timestamp
    entry_fee: float
    entry_indicators: dict = field(default_factory=dict)
    ml_proba: float | None = None


# ── 동기 RiskManager ─────────────────────────────────────────────────
class BacktestRiskManager:
    def __init__(self, cfg: BacktestConfig):
        self.cfg = cfg
        self.daily_pnl: float = 0.0
        self.initial_balance: float = cfg.initial_balance
        self.base_balance: float = cfg.initial_balance
        self.open_positions: dict[str, str] = {}  # {symbol: side}
        self._current_date: str | None = None

    def new_day(self, date_str: str):
        if self._current_date != date_str:
            self._current_date = date_str
            self.daily_pnl = 0.0

    def is_trading_allowed(self) -> bool:
        if self.initial_balance <= 0:
            return True
        if self.daily_pnl < 0 and abs(self.daily_pnl) / self.initial_balance >= self.cfg.max_daily_loss_pct:
            return False
        return True

    def can_open(self, symbol: str, side: str) -> bool:
        if len(self.open_positions) >= self.cfg.max_positions:
            return False
        if symbol in self.open_positions:
            return False
        same_dir = sum(1 for s in self.open_positions.values() if s == side)
        if same_dir >= self.cfg.max_same_direction:
            return False
        return True

    def register(self, symbol: str, side: str):
        self.open_positions[symbol] = side

    def close(self, symbol: str, pnl: float):
        self.open_positions.pop(symbol, None)
        self.daily_pnl += pnl

    def get_dynamic_margin_ratio(self, balance: float) -> float:
        ratio = self.cfg.margin_max_ratio - (
            (balance - self.base_balance) * self.cfg.margin_decay_rate
        )
        return max(self.cfg.margin_min_ratio, min(self.cfg.margin_max_ratio, ratio))


# ── 유틸 ─────────────────────────────────────────────────────────────
def _apply_slippage(price: float, side: str, slippage_pct: float) -> float:
    """시장가 주문의 슬리피지 적용. BUY는 불리하게(+), SELL은 불리하게(-)."""
    factor = slippage_pct / 100.0
    if side == "BUY":
        return price * (1 + factor)
    return price * (1 - factor)


def _calc_fee(price: float, quantity: float, fee_pct: float) -> float:
    return price * quantity * fee_pct / 100.0


def _load_data(symbol: str, start: str | None, end: str | None) -> pd.DataFrame:
    path = Path(f"data/{symbol.lower()}/combined_15m.parquet")
    if not path.exists():
        raise FileNotFoundError(f"데이터 파일 없음: {path}")
    df = pd.read_parquet(path)
    if "timestamp" in df.columns:
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        df = df.set_index("timestamp").sort_index()
    elif not isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index)
        df = df.sort_index()
    # tz-aware → tz-naive 통일 (UTC 기준)
    if df.index.tz is not None:
        df.index = df.index.tz_localize(None)
    if start:
        df = df[df.index >= pd.Timestamp(start)]
    if end:
        df = df[df.index <= pd.Timestamp(end)]
    return df


def _get_ml_proba(ml_filter: MLFilter | None, features: pd.Series) -> float | None:
    """ML 확률을 반환. 모델이 없거나 비활성이면 None."""
    if ml_filter is None or not ml_filter.is_model_loaded():
        return None
    try:
        if ml_filter._onnx_session is not None:
            input_name = ml_filter._onnx_session.get_inputs()[0].name
            X = features[FEATURE_COLS].values.astype(np.float32).reshape(1, -1)
            return float(ml_filter._onnx_session.run(None, {input_name: X})[0][0])
        else:
            available = [c for c in FEATURE_COLS if c in features.index]
            X = pd.DataFrame([features[available].values.astype(np.float64)], columns=available)
            return float(ml_filter._lgbm_model.predict_proba(X)[0][1])
    except Exception as e:
        logger.warning(f"ML PROBA ERROR: {e}")
        return None


# ── 메인 엔진 ────────────────────────────────────────────────────────
class Backtester:
    def __init__(self, cfg: BacktestConfig):
        self.cfg = cfg
        self.risk = BacktestRiskManager(cfg)
        self.balance = cfg.initial_balance
        self.positions: dict[str, Position] = {}  # {symbol: Position}
        self.trades: list[dict] = []
        self.equity_curve: list[dict] = []
        self._peak_equity: float = cfg.initial_balance

        # ML 필터 (심볼별)
        self.ml_filters: dict[str, MLFilter | None] = {}
        if cfg.use_ml:
            for sym in cfg.symbols:
                sym_dir = Path(f"models/{sym.lower()}")
                onnx = str(sym_dir / "mlx_filter.weights.onnx")
                lgbm = str(sym_dir / "lgbm_filter.pkl")
                if not sym_dir.exists():
                    onnx = "models/mlx_filter.weights.onnx"
                    lgbm = "models/lgbm_filter.pkl"
                mf = MLFilter(onnx_path=onnx, lgbm_path=lgbm, threshold=cfg.ml_threshold)
                self.ml_filters[sym] = mf if mf.is_model_loaded() else None
        else:
            for sym in cfg.symbols:
                self.ml_filters[sym] = None

    def run(self, ml_models: dict[str, object] | None = None) -> dict:
        """백테스트 실행. 결과 dict(config, summary, trades, validation) 반환.

        ml_models: walk-forward에서 심볼별 사전 학습 모델을 전달할 때 사용.
                   {symbol: lgbm_model} 형태. None이면 기존 파일 기반 MLFilter 사용.
        """
        # 데이터 로드
        all_data: dict[str, pd.DataFrame] = {}
        all_indicators: dict[str, pd.DataFrame] = {}
        all_signals: dict[str, np.ndarray] = {}
        all_features: dict[str, pd.DataFrame] = {}

        for sym in self.cfg.symbols:
            df = _load_data(sym, self.cfg.start, self.cfg.end)
            all_data[sym] = df

            # BTC/ETH 상관 데이터: 임베딩된 컬럼에서 추출 (별도 파일 폴백)
            base_cols = ["open", "high", "low", "close", "volume"]
            btc_df = eth_df = None
            if "close_btc" in df.columns:
                btc_df = df[[c + "_btc" for c in base_cols]].copy()
                btc_df.columns = base_cols
            else:
                btc_df = self._try_load_corr("BTCUSDT")
            if "close_eth" in df.columns:
                eth_df = df[[c + "_eth" for c in base_cols]].copy()
                eth_df.columns = base_cols
            else:
                eth_df = self._try_load_corr("ETHUSDT")

            df_ind = _calc_indicators(df)
            all_indicators[sym] = df_ind
            sig_arr = _calc_signals(
                df_ind,
                signal_threshold=self.cfg.signal_threshold,
                adx_threshold=self.cfg.adx_threshold,
                volume_multiplier=self.cfg.volume_multiplier,
            )
            all_signals[sym] = sig_arr
            # 벡터화 피처 미리 계산 (학습과 동일한 z-score 적용)
            all_features[sym] = _calc_features_vectorized(
                df_ind, sig_arr, btc_df=btc_df, eth_df=eth_df,
            )
            logger.info(f"[{sym}] 데이터 로드: {len(df):,}캔들 ({df.index[0]} ~ {df.index[-1]})")

        # walk-forward 모델 주입 (use_ml=True일 때만)
        if ml_models is not None and self.cfg.use_ml:
            self.ml_filters = {}
            for sym in self.cfg.symbols:
                if sym in ml_models and ml_models[sym] is not None:
                    self.ml_filters[sym] = MLFilter.from_model(
                        ml_models[sym], threshold=self.cfg.ml_threshold
                    )
                else:
                    self.ml_filters[sym] = None

        # 멀티심볼: 타임스탬프 기준 통합 이벤트 생성
        events = self._build_events(all_indicators, all_signals)
        logger.info(f"총 이벤트: {len(events):,}개")

        # 메인 루프
        latest_prices: dict[str, float] = {}
        for ts, sym, candle_idx in events:
            date_str = str(ts.date())
            self.risk.new_day(date_str)

            df_ind = all_indicators[sym]
            signal = all_signals[sym][candle_idx]
            row = df_ind.iloc[candle_idx]
            latest_prices[sym] = float(row["close"])

            # 에퀴티 기록
            self._record_equity(ts, current_prices=latest_prices)

            # 1) 일일 손실 체크
            if not self.risk.is_trading_allowed():
                continue

            # 2) SL/TP 체크 (보유 포지션)
            if sym in self.positions:
                closed = self._check_sl_tp(sym, row, ts)
                if closed:
                    continue

            # 3) 반대 시그널 재진입
            if sym in self.positions and signal != "HOLD":
                pos = self.positions[sym]
                if (pos.side == "LONG" and signal == "SHORT") or \
                   (pos.side == "SHORT" and signal == "LONG"):
                    self._close_position(sym, row["close"], ts, "REVERSE_SIGNAL")
                    # 새 방향으로 재진입 시도
                    if self.risk.can_open(sym, signal):
                        self._try_enter(
                            sym, signal, df_ind, candle_idx,
                            all_features[sym], ts=ts,
                        )
                    continue

            # 4) 신규 진입
            if sym not in self.positions and signal != "HOLD":
                if self.risk.can_open(sym, signal):
                    self._try_enter(
                        sym, signal, df_ind, candle_idx,
                        all_features[sym], ts=ts,
                    )

        # 미청산 포지션 강제 청산
        for sym in list(self.positions.keys()):
            last_df = all_indicators[sym]
            last_price = last_df["close"].iloc[-1]
            last_ts = last_df.index[-1]
            self._close_position(sym, last_price, last_ts, "END_OF_DATA")

        return self._build_result()

    def _try_load_corr(self, symbol: str) -> pd.DataFrame | None:
        path = Path(f"data/{symbol.lower()}/combined_15m.parquet")
        if not path.exists():
            alt = Path(f"data/combined_15m.parquet")
            if not alt.exists():
                return None
            path = alt
        try:
            df = pd.read_parquet(path)
            if "timestamp" in df.columns:
                df["timestamp"] = pd.to_datetime(df["timestamp"])
                df = df.set_index("timestamp").sort_index()
            elif not isinstance(df.index, pd.DatetimeIndex):
                df.index = pd.to_datetime(df.index)
                df = df.sort_index()
            if df.index.tz is not None:
                df.index = df.index.tz_localize(None)
            if self.cfg.start:
                df = df[df.index >= pd.Timestamp(self.cfg.start)]
            if self.cfg.end:
                df = df[df.index <= pd.Timestamp(self.cfg.end)]
            return df
        except Exception:
            return None

    def _build_events(
        self,
        all_indicators: dict[str, pd.DataFrame],
        all_signals: dict[str, np.ndarray],
    ) -> list[tuple[pd.Timestamp, str, int]]:
        """모든 심볼의 캔들을 타임스탬프 순서로 정렬한 이벤트 리스트 생성."""
        events = []
        for sym, df_ind in all_indicators.items():
            for i in range(self.cfg.WARMUP, len(df_ind)):
                ts = df_ind.index[i]
                events.append((ts, sym, i))
        events.sort(key=lambda x: (x[0], x[1]))
        return events

    def _check_sl_tp(self, symbol: str, row: pd.Series, ts: pd.Timestamp) -> bool:
        """캔들의 고가/저가로 SL/TP 체크. SL 우선. 청산 시 True 반환."""
        pos = self.positions[symbol]
        high = row["high"]
        low = row["low"]

        if pos.side == "LONG":
            # SL 먼저 (보수적)
            if low <= pos.sl:
                self._close_position(symbol, pos.sl, ts, "STOP_LOSS")
                return True
            if high >= pos.tp:
                self._close_position(symbol, pos.tp, ts, "TAKE_PROFIT")
                return True
        else:  # SHORT
            if high >= pos.sl:
                self._close_position(symbol, pos.sl, ts, "STOP_LOSS")
                return True
            if low <= pos.tp:
                self._close_position(symbol, pos.tp, ts, "TAKE_PROFIT")
                return True
        return False

    def _try_enter(
        self,
        symbol: str,
        signal: str,
        df_ind: pd.DataFrame,
        candle_idx: int,
        feat_df: pd.DataFrame,
        ts: pd.Timestamp,
    ):
        """ML 필터 + 포지션 크기 계산 → 진입."""
        row = df_ind.iloc[candle_idx]

        # 벡터화된 피처에서 해당 행을 lookup (학습과 동일한 z-score 적용)
        available_cols = [c for c in FEATURE_COLS if c in feat_df.columns]
        features = feat_df.iloc[candle_idx][available_cols]

        # ML 필터
        ml_filter = self.ml_filters.get(symbol)
        ml_proba = _get_ml_proba(ml_filter, features)

        if ml_filter is not None and ml_filter.is_model_loaded():
            if ml_proba is not None and ml_proba < self.cfg.ml_threshold:
                return  # ML 차단

        # 포지션 크기 계산
        num_symbols = len(self.cfg.symbols)
        per_symbol_balance = self.balance / num_symbols
        price = float(row["close"])
        margin_ratio = self.risk.get_dynamic_margin_ratio(self.balance)
        notional = per_symbol_balance * margin_ratio * self.cfg.leverage
        if notional < self.cfg.min_notional:
            notional = self.cfg.min_notional
        quantity = round(notional / price, 1)
        if quantity * price < self.cfg.min_notional:
            quantity = round(self.cfg.min_notional / price + 0.05, 1)
        if quantity <= 0 or quantity * price < self.cfg.min_notional:
            return

        # 슬리피지 적용 (시장가 진입)
        buy_side = "BUY" if signal == "LONG" else "SELL"
        entry_price = _apply_slippage(price, buy_side, self.cfg.slippage_pct)

        # 수수료 (청산 시 net_pnl에서 차감하므로 여기서 balance 차감하지 않음)
        entry_fee = _calc_fee(entry_price, quantity, self.cfg.fee_pct)

        # SL/TP 계산
        atr = float(row.get("atr", 0))
        if atr <= 0:
            return
        if signal == "LONG":
            sl = entry_price - atr * self.cfg.atr_sl_mult
            tp = entry_price + atr * self.cfg.atr_tp_mult
        else:
            sl = entry_price + atr * self.cfg.atr_sl_mult
            tp = entry_price - atr * self.cfg.atr_tp_mult

        indicators_snapshot = {
            "rsi": float(row.get("rsi", 0)),
            "macd_hist": float(row.get("macd_hist", 0)),
            "atr": float(atr),
            "adx": float(row.get("adx", 0)),
        }

        pos = Position(
            symbol=symbol,
            side=signal,
            entry_price=entry_price,
            quantity=quantity,
            sl=sl,
            tp=tp,
            entry_time=ts,
            entry_fee=entry_fee,
            entry_indicators=indicators_snapshot,
            ml_proba=ml_proba,
        )
        self.positions[symbol] = pos
        self.risk.register(symbol, signal)

    def _close_position(
        self, symbol: str, exit_price: float, ts: pd.Timestamp, reason: str
    ):
        pos = self.positions.pop(symbol)

        # SL/TP 히트는 지정가이므로 슬리피지 없음. 그 외는 시장가.
        if reason in ("REVERSE_SIGNAL", "END_OF_DATA"):
            close_side = "SELL" if pos.side == "LONG" else "BUY"
            exit_price = _apply_slippage(exit_price, close_side, self.cfg.slippage_pct)

        exit_fee = _calc_fee(exit_price, pos.quantity, self.cfg.fee_pct)

        if pos.side == "LONG":
            gross_pnl = (exit_price - pos.entry_price) * pos.quantity
        else:
            gross_pnl = (pos.entry_price - exit_price) * pos.quantity

        net_pnl = gross_pnl - pos.entry_fee - exit_fee
        self.balance += net_pnl
        self.risk.close(symbol, net_pnl)

        trade = {
            "symbol": symbol,
            "side": pos.side,
            "entry_time": str(pos.entry_time),
            "exit_time": str(ts),
            "entry_price": round(pos.entry_price, 6),
            "exit_price": round(exit_price, 6),
            "quantity": pos.quantity,
            "sl": round(pos.sl, 6),
            "tp": round(pos.tp, 6),
            "gross_pnl": round(gross_pnl, 6),
            "entry_fee": round(pos.entry_fee, 6),
            "exit_fee": round(exit_fee, 6),
            "net_pnl": round(net_pnl, 6),
            "close_reason": reason,
            "ml_proba": round(pos.ml_proba, 4) if pos.ml_proba is not None else None,
            "indicators": pos.entry_indicators,
        }
        self.trades.append(trade)

    def _record_equity(self, ts: pd.Timestamp, current_prices: dict[str, float] | None = None):
        unrealized = 0.0
        for sym, pos in self.positions.items():
            price = (current_prices or {}).get(sym)
            if price is not None:
                if pos.side == "LONG":
                    unrealized += (price - pos.entry_price) * pos.quantity
                else:
                    unrealized += (pos.entry_price - price) * pos.quantity
        equity = self.balance + unrealized
        self.equity_curve.append({"timestamp": str(ts), "equity": round(equity, 4)})
        if equity > self._peak_equity:
            self._peak_equity = equity

    def _build_result(self) -> dict:
        summary = self._calc_summary()
        from src.backtest_validator import validate
        validation = validate(self.trades, summary, self.cfg)
        return {
            "config": asdict(self.cfg),
            "summary": summary,
            "trades": self.trades,
            "validation": validation,
        }

    def _calc_summary(self) -> dict:
        return _calc_trade_stats(self.trades, self.cfg.initial_balance)


# ── Walk-Forward 백테스트 ─────────────────────────────────────────────
@dataclass
class WalkForwardConfig(BacktestConfig):
    train_months: int = 6       # 학습 윈도우 (개월)
    test_months: int = 1        # 검증 윈도우 (개월)
    time_weight_decay: float = 2.0
    negative_ratio: int = 3


class WalkForwardBacktester:
    """
    Walk-Forward 백테스트: 기간별로 모델을 학습하고 미래 데이터에서만 검증한다.
    look-ahead bias를 완전히 제거한다.
    """

    def __init__(self, cfg: WalkForwardConfig):
        self.cfg = cfg

    def run(self) -> dict:
        # 데이터 로드 (전체 기간)
        all_raw: dict[str, pd.DataFrame] = {}
        for sym in self.cfg.symbols:
            all_raw[sym] = _load_data(sym, self.cfg.start, self.cfg.end)

        # 윈도우 생성
        windows = self._build_windows(all_raw)
        logger.info(f"Walk-Forward: {len(windows)}개 윈도우 "
                     f"(학습 {self.cfg.train_months}개월, 검증 {self.cfg.test_months}개월)")

        all_trades = []
        fold_summaries = []

        for i, (train_start, train_end, test_start, test_end) in enumerate(windows):
            logger.info(f"  폴드 {i+1}/{len(windows)}: "
                         f"학습 {train_start.date()}~{train_end.date()}, "
                         f"검증 {test_start.date()}~{test_end.date()}")

            # 심볼별 모델 학습 (use_ml=True일 때만)
            models = {}
            if self.cfg.use_ml:
                for sym in self.cfg.symbols:
                    model = self._train_model(
                        all_raw[sym], train_start, train_end, sym
                    )
                    models[sym] = model

            # 검증 구간 백테스트
            test_cfg = BacktestConfig(
                symbols=self.cfg.symbols,
                start=str(test_start.date()),
                end=str(test_end.date()),
                initial_balance=self.cfg.initial_balance,
                leverage=self.cfg.leverage,
                fee_pct=self.cfg.fee_pct,
                slippage_pct=self.cfg.slippage_pct,
                use_ml=self.cfg.use_ml,
                ml_threshold=self.cfg.ml_threshold,
                max_daily_loss_pct=self.cfg.max_daily_loss_pct,
                max_positions=self.cfg.max_positions,
                max_same_direction=self.cfg.max_same_direction,
                margin_max_ratio=self.cfg.margin_max_ratio,
                margin_min_ratio=self.cfg.margin_min_ratio,
                margin_decay_rate=self.cfg.margin_decay_rate,
                atr_sl_mult=self.cfg.atr_sl_mult,
                atr_tp_mult=self.cfg.atr_tp_mult,
                min_notional=self.cfg.min_notional,
                signal_threshold=self.cfg.signal_threshold,
                adx_threshold=self.cfg.adx_threshold,
                volume_multiplier=self.cfg.volume_multiplier,
            )
            bt = Backtester(test_cfg)
            result = bt.run(ml_models=models)

            # 폴드별 트레이드에 폴드 번호 추가
            for t in result["trades"]:
                t["fold"] = i + 1
            all_trades.extend(result["trades"])

            fold_summaries.append({
                "fold": i + 1,
                "train_period": f"{train_start.date()} ~ {train_end.date()}",
                "test_period": f"{test_start.date()} ~ {test_end.date()}",
                "summary": result["summary"],
            })

        # 전체 결과 집계
        return self._aggregate_results(all_trades, fold_summaries)

    def _build_windows(
        self, all_raw: dict[str, pd.DataFrame]
    ) -> list[tuple[pd.Timestamp, pd.Timestamp, pd.Timestamp, pd.Timestamp]]:
        # 모든 심볼의 공통 기간
        start = max(df.index[0] for df in all_raw.values())
        end = min(df.index[-1] for df in all_raw.values())

        train_delta = pd.DateOffset(months=self.cfg.train_months)
        test_delta = pd.DateOffset(months=self.cfg.test_months)

        windows = []
        cursor = start
        while cursor + train_delta + test_delta <= end:
            train_start = cursor
            train_end = cursor + train_delta
            test_start = train_end
            test_end = test_start + test_delta
            windows.append((train_start, train_end, test_start, test_end))
            cursor = test_start  # 슬라이딩 (겹침 없음)

        return windows

    def _train_model(
        self,
        raw_df: pd.DataFrame,
        train_start: pd.Timestamp,
        train_end: pd.Timestamp,
        symbol: str,
    ) -> object | None:
        """학습 구간 데이터로 LightGBM 모델 학습. 실패 시 None 반환."""
        # tz-naive로 비교
        ts_start = train_start.tz_localize(None) if train_start.tz else train_start
        ts_end = train_end.tz_localize(None) if train_end.tz else train_end
        idx = raw_df.index
        if idx.tz is not None:
            idx = idx.tz_localize(None)
        train_df = raw_df[(idx >= ts_start) & (idx < ts_end)]
        if len(train_df) < 200:
            logger.warning(f"  [{symbol}] 학습 데이터 부족: {len(train_df)}캔들")
            return None

        base_cols = ["open", "high", "low", "close", "volume"]
        df = train_df[base_cols].copy()

        # BTC/ETH 상관 데이터 (있으면)
        btc_df = eth_df = None
        if "close_btc" in train_df.columns:
            btc_df = train_df[[c + "_btc" for c in base_cols]].copy()
            btc_df.columns = base_cols
        if "close_eth" in train_df.columns:
            eth_df = train_df[[c + "_eth" for c in base_cols]].copy()
            eth_df.columns = base_cols

        try:
            dataset = generate_dataset_vectorized(
                df, btc_df=btc_df, eth_df=eth_df,
                time_weight_decay=self.cfg.time_weight_decay,
                negative_ratio=self.cfg.negative_ratio,
                signal_threshold=self.cfg.signal_threshold,
                adx_threshold=self.cfg.adx_threshold,
                volume_multiplier=self.cfg.volume_multiplier,
                atr_sl_mult=self.cfg.atr_sl_mult,
                atr_tp_mult=self.cfg.atr_tp_mult,
            )
        except Exception as e:
            logger.warning(f"  [{symbol}] 데이터셋 생성 실패: {e}")
            return None

        if dataset.empty or "label" not in dataset.columns:
            return None

        actual_cols = [c for c in FEATURE_COLS if c in dataset.columns]
        X = dataset[actual_cols].values
        y = dataset["label"].values
        w = dataset["sample_weight"].values
        source = dataset["source"].values if "source" in dataset.columns else np.full(len(X), "signal")

        # 언더샘플링
        idx = stratified_undersample(y, source, seed=42)

        # LightGBM 파라미터 (active 파일 또는 기본값)
        lgbm_params = self._load_params(symbol)

        model = lgb.LGBMClassifier(**lgbm_params, random_state=42, verbose=-1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            model.fit(X[idx], y[idx], sample_weight=w[idx])

        return model

    def _load_params(self, symbol: str) -> dict:
        """심볼별 active 파라미터 로드. 없으면 기본값."""
        params_path = Path(f"models/{symbol.lower()}/active_lgbm_params.json")
        if not params_path.exists():
            params_path = Path("models/active_lgbm_params.json")

        default = {
            "n_estimators": 434,
            "learning_rate": 0.123659,
            "max_depth": 6,
            "num_leaves": 14,
            "min_child_samples": 10,
            "subsample": 0.929062,
            "colsample_bytree": 0.946330,
            "reg_alpha": 0.573971,
            "reg_lambda": 0.000157,
        }

        if params_path.exists():
            import json
            with open(params_path) as f:
                data = json.load(f)
            best = dict(data["best_trial"]["params"])
            best.pop("weight_scale", None)
            default.update(best)

        return default

    def _aggregate_results(
        self, all_trades: list[dict], fold_summaries: list[dict]
    ) -> dict:
        """폴드별 결과를 합산하여 전체 Walk-Forward 결과 생성."""
        from src.backtest_validator import validate

        summary = _calc_trade_stats(all_trades, self.cfg.initial_balance)
        validation = validate(all_trades, summary, self.cfg)

        return {
            "mode": "walk_forward",
            "config": asdict(self.cfg),
            "summary": summary,
            "folds": fold_summaries,
            "trades": all_trades,
            "validation": validation,
        }