feat: strategy parameter sweep and production param optimization

- Add independent backtest engine (backtester.py) with walk-forward support
- Add backtest sanity check validator (backtest_validator.py)
- Add CLI tools: run_backtest.py, strategy_sweep.py (with --combined mode)
- Fix train-serve skew: unify feature z-score normalization (ml_features.py)
- Add strategy params (SL/TP ATR mult, ADX filter, volume multiplier) to
  config.py, indicators.py, dataset_builder.py, bot.py, backtester.py
- Fix WalkForwardBacktester not propagating strategy params to test folds
- Update production defaults: SL=2.0x, TP=2.0x, ADX=25, Vol=2.5
  (3-symbol combined PF: 0.71 → 1.24, MDD: 65.9% → 17.1%)
- Retrain ML models with new strategy parameters

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
21in7
2026-03-06 23:39:43 +09:00
parent 15fb9c158a
commit 02e41881ac
20 changed files with 2153 additions and 33 deletions

View File

@@ -15,6 +15,10 @@ FEATURE_COLS = [
"adx",
]
# rolling z-score 윈도우 (학습과 동일)
_ZSCORE_WINDOW = 288 # 일반 피처: 15분봉 × 288 = 3일
_ZSCORE_WINDOW_OI = 96 # OI/펀딩비: 15분봉 × 96 = 1일
def _calc_ret(closes: pd.Series, n: int) -> float:
"""n캔들 전 대비 수익률. 데이터 부족 시 0.0."""
@@ -31,6 +35,18 @@ def _calc_rs(xrp_ret: float, other_ret: float) -> float:
return xrp_ret / other_ret
def _rolling_zscore_last(arr: np.ndarray, window: int = _ZSCORE_WINDOW) -> float:
"""배열의 마지막 값에 대한 rolling z-score를 반환한다.
학습(dataset_builder._rolling_zscore)과 동일한 로직."""
s = pd.Series(arr, dtype=np.float64)
r = s.rolling(window=window, min_periods=1)
mean = r.mean().iloc[-1]
std = r.std(ddof=0).iloc[-1]
if std < 1e-8:
std = 1e-8
return float((s.iloc[-1] - mean) / std)
def build_features(
df: pd.DataFrame,
signal: str,
@@ -42,10 +58,8 @@ def build_features(
oi_price_spread: float | None = None,
) -> pd.Series:
"""
기술 지표가 계산된 DataFrame의 마지막 행에서 ML 피처를 추출한다.
btc_df, eth_df가 제공되면 26개 피처를, 없으면 18개 피처를 반환한다.
signal: "LONG" | "SHORT"
oi_change, funding_rate, oi_change_ma5, oi_price_spread: 실제 값이 제공되면 사용, 없으면 0.0으로 채운다.
[Deprecated] raw 값 기반 피처. 하위 호환용으로 유지.
신규 코드는 build_features_aligned()를 사용할 것.
"""
last = df.iloc[-1]
close = last["close"]
@@ -142,3 +156,154 @@ def build_features(
base["adx"] = float(last.get("adx", 0))
return pd.Series(base)
def build_features_aligned(
df: pd.DataFrame,
signal: str,
btc_df: pd.DataFrame | None = None,
eth_df: pd.DataFrame | None = None,
oi_change: float | None = None,
funding_rate: float | None = None,
oi_change_ma5: float | None = None,
oi_price_spread: float | None = None,
) -> pd.Series:
"""
학습(dataset_builder._calc_features_vectorized)과 동일한 rolling z-score를
적용한 피처를 반환한다. train-serve skew를 방지한다.
df: 지표가 이미 계산된 DataFrame (최소 60캔들 이상)
signal: "LONG" | "SHORT"
"""
last = df.iloc[-1]
close_series = df["close"]
close = float(close_series.iloc[-1])
# --- raw 값 계산 (z-score 전) ---
bb_upper = df["bb_upper"] if "bb_upper" in df.columns else pd.Series(close, index=df.index)
bb_lower = df["bb_lower"] if "bb_lower" in df.columns else pd.Series(close, index=df.index)
bb_range = bb_upper - bb_lower
bb_pct_series = (close_series - bb_lower) / (bb_range + 1e-8)
ema9 = df.get("ema9", close_series)
ema21 = df.get("ema21", close_series)
ema50 = df.get("ema50", close_series)
ema_align_arr = np.where(
(ema9 > ema21) & (ema21 > ema50), 1,
np.where((ema9 < ema21) & (ema21 < ema50), -1, 0)
).astype(np.float32)
atr_series = df["atr"] if "atr" in df.columns else pd.Series(0.0, index=df.index)
atr_pct_arr = (atr_series / (close_series + 1e-8)).values
volume = df["volume"]
vol_ma20 = df["vol_ma20"] if "vol_ma20" in df.columns else pd.Series(1.0, index=df.index)
vol_ratio_arr = (volume / (vol_ma20 + 1e-8)).values
ret_1_arr = close_series.pct_change(1).fillna(0).values
ret_3_arr = close_series.pct_change(3).fillna(0).values
ret_5_arr = close_series.pct_change(5).fillna(0).values
# z-score 적용 (학습과 동일)
atr_pct_z = _rolling_zscore_last(atr_pct_arr)
vol_ratio_z = _rolling_zscore_last(vol_ratio_arr)
ret_1_z = _rolling_zscore_last(ret_1_arr)
ret_3_z = _rolling_zscore_last(ret_3_arr)
ret_5_z = _rolling_zscore_last(ret_5_arr)
# signal_strength
rsi = float(last.get("rsi", 50))
macd_val = float(last.get("macd", 0))
macd_sig_val = float(last.get("macd_signal", 0))
stoch_k = float(last.get("stoch_k", 50))
stoch_d = float(last.get("stoch_d", 50))
prev = df.iloc[-2] if len(df) >= 2 else last
prev_macd = float(prev.get("macd", 0))
prev_macd_sig = float(prev.get("macd_signal", 0))
strength = 0
if signal == "LONG":
if rsi < 35: strength += 1
if prev_macd < prev_macd_sig and macd_val > macd_sig_val: strength += 2
if close < float(last.get("bb_lower", close)): strength += 1
if ema_align_arr[-1] == 1: strength += 1
if stoch_k < 20 and stoch_k > stoch_d: strength += 1
else:
if rsi > 65: strength += 1
if prev_macd > prev_macd_sig and macd_val < macd_sig_val: strength += 2
if close > float(last.get("bb_upper", close)): strength += 1
if ema_align_arr[-1] == -1: strength += 1
if stoch_k > 80 and stoch_k < stoch_d: strength += 1
# ADX z-score
adx_arr = df["adx"].values.astype(np.float64) if "adx" in df.columns else np.zeros(len(df))
adx_z = _rolling_zscore_last(adx_arr)
base = {
"rsi": rsi,
"macd_hist": float(last.get("macd_hist", 0)),
"bb_pct": float(bb_pct_series.iloc[-1]),
"ema_align": float(ema_align_arr[-1]),
"stoch_k": stoch_k,
"stoch_d": stoch_d,
"atr_pct": atr_pct_z,
"vol_ratio": vol_ratio_z,
"ret_1": ret_1_z,
"ret_3": ret_3_z,
"ret_5": ret_5_z,
"signal_strength": float(strength),
"side": 1.0 if signal == "LONG" else 0.0,
}
# BTC/ETH 상관 피처 (z-score)
if btc_df is not None and eth_df is not None:
btc_r1 = btc_df["close"].pct_change(1).fillna(0).values
btc_r3 = btc_df["close"].pct_change(3).fillna(0).values
btc_r5 = btc_df["close"].pct_change(5).fillna(0).values
eth_r1 = eth_df["close"].pct_change(1).fillna(0).values
eth_r3 = eth_df["close"].pct_change(3).fillna(0).values
eth_r5 = eth_df["close"].pct_change(5).fillna(0).values
# 길이 맞춤 (btc/eth가 더 길 수 있음)
n = len(df)
def _align(arr):
if len(arr) >= n:
return arr[-n:]
return np.concatenate([np.zeros(n - len(arr)), arr])
btc_r1 = _align(btc_r1)
btc_r3 = _align(btc_r3)
btc_r5 = _align(btc_r5)
eth_r1 = _align(eth_r1)
eth_r3 = _align(eth_r3)
eth_r5 = _align(eth_r5)
# 상대강도 (raw → z-score)
xrp_r1 = ret_1_arr.astype(np.float32)
btc_r1_f = btc_r1.astype(np.float32)
eth_r1_f = eth_r1.astype(np.float32)
rs_btc = np.divide(xrp_r1, btc_r1_f, out=np.zeros_like(xrp_r1), where=(btc_r1_f != 0))
rs_eth = np.divide(xrp_r1, eth_r1_f, out=np.zeros_like(xrp_r1), where=(eth_r1_f != 0))
base.update({
"btc_ret_1": _rolling_zscore_last(btc_r1),
"btc_ret_3": _rolling_zscore_last(btc_r3),
"btc_ret_5": _rolling_zscore_last(btc_r5),
"eth_ret_1": _rolling_zscore_last(eth_r1),
"eth_ret_3": _rolling_zscore_last(eth_r3),
"eth_ret_5": _rolling_zscore_last(eth_r5),
"xrp_btc_rs": _rolling_zscore_last(rs_btc),
"xrp_eth_rs": _rolling_zscore_last(rs_eth),
})
# OI/펀딩비 z-score (실시간 값이 제공되면 히스토리 끝에 추가하여 z-score)
# 서빙 시 OI/펀딩비 히스토리가 없으므로 단일 값 → z-score 불가, NaN 처리
# LightGBM은 NaN을 자체 처리함
base["oi_change"] = float(oi_change) if oi_change is not None else np.nan
base["funding_rate"] = float(funding_rate) if funding_rate is not None else np.nan
base["oi_change_ma5"] = float(oi_change_ma5) if oi_change_ma5 is not None else np.nan
base["oi_price_spread"] = float(oi_price_spread) if oi_price_spread is not None else np.nan
base["adx"] = adx_z
return pd.Series(base)