Files
cointrader/src/ml_features.py
21in7 02e41881ac feat: strategy parameter sweep and production param optimization
- Add independent backtest engine (backtester.py) with walk-forward support
- Add backtest sanity check validator (backtest_validator.py)
- Add CLI tools: run_backtest.py, strategy_sweep.py (with --combined mode)
- Fix train-serve skew: unify feature z-score normalization (ml_features.py)
- Add strategy params (SL/TP ATR mult, ADX filter, volume multiplier) to
  config.py, indicators.py, dataset_builder.py, bot.py, backtester.py
- Fix WalkForwardBacktester not propagating strategy params to test folds
- Update production defaults: SL=2.0x, TP=2.0x, ADX=25, Vol=2.5
  (3-symbol combined PF: 0.71 → 1.24, MDD: 65.9% → 17.1%)
- Retrain ML models with new strategy parameters

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 23:39:43 +09:00

310 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import numpy as np
FEATURE_COLS = [
"rsi", "macd_hist", "bb_pct", "ema_align",
"stoch_k", "stoch_d", "atr_pct", "vol_ratio",
"ret_1", "ret_3", "ret_5", "signal_strength", "side",
"btc_ret_1", "btc_ret_3", "btc_ret_5",
"eth_ret_1", "eth_ret_3", "eth_ret_5",
"xrp_btc_rs", "xrp_eth_rs",
# 시장 미시구조: OI 변화율(z-score), 펀딩비(z-score)
"oi_change", "funding_rate",
# OI 파생 피처
"oi_change_ma5", "oi_price_spread",
"adx",
]
# rolling z-score 윈도우 (학습과 동일)
_ZSCORE_WINDOW = 288 # 일반 피처: 15분봉 × 288 = 3일
_ZSCORE_WINDOW_OI = 96 # OI/펀딩비: 15분봉 × 96 = 1일
def _calc_ret(closes: pd.Series, n: int) -> float:
"""n캔들 전 대비 수익률. 데이터 부족 시 0.0."""
if len(closes) < n + 1:
return 0.0
prev = closes.iloc[-(n + 1)]
return (closes.iloc[-1] - prev) / prev if prev != 0 else 0.0
def _calc_rs(xrp_ret: float, other_ret: float) -> float:
"""상대강도 = xrp_ret / other_ret. 분모 0이면 0.0."""
if other_ret == 0.0:
return 0.0
return xrp_ret / other_ret
def _rolling_zscore_last(arr: np.ndarray, window: int = _ZSCORE_WINDOW) -> float:
"""배열의 마지막 값에 대한 rolling z-score를 반환한다.
학습(dataset_builder._rolling_zscore)과 동일한 로직."""
s = pd.Series(arr, dtype=np.float64)
r = s.rolling(window=window, min_periods=1)
mean = r.mean().iloc[-1]
std = r.std(ddof=0).iloc[-1]
if std < 1e-8:
std = 1e-8
return float((s.iloc[-1] - mean) / std)
def build_features(
df: pd.DataFrame,
signal: str,
btc_df: pd.DataFrame | None = None,
eth_df: pd.DataFrame | None = None,
oi_change: float | None = None,
funding_rate: float | None = None,
oi_change_ma5: float | None = None,
oi_price_spread: float | None = None,
) -> pd.Series:
"""
[Deprecated] raw 값 기반 피처. 하위 호환용으로 유지.
신규 코드는 build_features_aligned()를 사용할 것.
"""
last = df.iloc[-1]
close = last["close"]
bb_upper = last.get("bb_upper", close)
bb_lower = last.get("bb_lower", close)
bb_range = bb_upper - bb_lower
bb_pct = (close - bb_lower) / bb_range if bb_range > 0 else 0.5
ema9 = last.get("ema9", close)
ema21 = last.get("ema21", close)
ema50 = last.get("ema50", close)
if ema9 > ema21 > ema50:
ema_align = 1
elif ema9 < ema21 < ema50:
ema_align = -1
else:
ema_align = 0
atr = last.get("atr", 0)
atr_pct = atr / close if close > 0 else 0
vol_ma20 = last.get("vol_ma20", last.get("volume", 1))
vol_ratio = last["volume"] / vol_ma20 if vol_ma20 > 0 else 1.0
closes = df["close"]
ret_1 = _calc_ret(closes, 1)
ret_3 = _calc_ret(closes, 3)
ret_5 = _calc_ret(closes, 5)
prev = df.iloc[-2] if len(df) >= 2 else last
strength = 0
rsi = last.get("rsi", 50)
macd = last.get("macd", 0)
macd_sig = last.get("macd_signal", 0)
prev_macd = prev.get("macd", 0)
prev_macd_sig = prev.get("macd_signal", 0)
stoch_k = last.get("stoch_k", 50)
stoch_d = last.get("stoch_d", 50)
if signal == "LONG":
if rsi < 35: strength += 1
if prev_macd < prev_macd_sig and macd > macd_sig: strength += 2
if close < last.get("bb_lower", close): strength += 1
if ema_align == 1: strength += 1
if stoch_k < 20 and stoch_k > stoch_d: strength += 1
else:
if rsi > 65: strength += 1
if prev_macd > prev_macd_sig and macd < macd_sig: strength += 2
if close > last.get("bb_upper", close): strength += 1
if ema_align == -1: strength += 1
if stoch_k > 80 and stoch_k < stoch_d: strength += 1
base = {
"rsi": float(rsi),
"macd_hist": float(last.get("macd_hist", 0)),
"bb_pct": float(bb_pct),
"ema_align": float(ema_align),
"stoch_k": float(stoch_k),
"stoch_d": float(last.get("stoch_d", 50)),
"atr_pct": float(atr_pct),
"vol_ratio": float(vol_ratio),
"ret_1": float(ret_1),
"ret_3": float(ret_3),
"ret_5": float(ret_5),
"signal_strength": float(strength),
"side": 1.0 if signal == "LONG" else 0.0,
}
if btc_df is not None and eth_df is not None:
btc_ret_1 = _calc_ret(btc_df["close"], 1)
btc_ret_3 = _calc_ret(btc_df["close"], 3)
btc_ret_5 = _calc_ret(btc_df["close"], 5)
eth_ret_1 = _calc_ret(eth_df["close"], 1)
eth_ret_3 = _calc_ret(eth_df["close"], 3)
eth_ret_5 = _calc_ret(eth_df["close"], 5)
base.update({
"btc_ret_1": float(btc_ret_1),
"btc_ret_3": float(btc_ret_3),
"btc_ret_5": float(btc_ret_5),
"eth_ret_1": float(eth_ret_1),
"eth_ret_3": float(eth_ret_3),
"eth_ret_5": float(eth_ret_5),
"xrp_btc_rs": float(_calc_rs(ret_1, btc_ret_1)),
"xrp_eth_rs": float(_calc_rs(ret_1, eth_ret_1)),
})
# 실시간에서 실제 값이 제공되면 사용, 없으면 0으로 채운다
base["oi_change"] = float(oi_change) if oi_change is not None else 0.0
base["funding_rate"] = float(funding_rate) if funding_rate is not None else 0.0
base["oi_change_ma5"] = float(oi_change_ma5) if oi_change_ma5 is not None else 0.0
base["oi_price_spread"] = float(oi_price_spread) if oi_price_spread is not None else 0.0
base["adx"] = float(last.get("adx", 0))
return pd.Series(base)
def build_features_aligned(
df: pd.DataFrame,
signal: str,
btc_df: pd.DataFrame | None = None,
eth_df: pd.DataFrame | None = None,
oi_change: float | None = None,
funding_rate: float | None = None,
oi_change_ma5: float | None = None,
oi_price_spread: float | None = None,
) -> pd.Series:
"""
학습(dataset_builder._calc_features_vectorized)과 동일한 rolling z-score를
적용한 피처를 반환한다. train-serve skew를 방지한다.
df: 지표가 이미 계산된 DataFrame (최소 60캔들 이상)
signal: "LONG" | "SHORT"
"""
last = df.iloc[-1]
close_series = df["close"]
close = float(close_series.iloc[-1])
# --- raw 값 계산 (z-score 전) ---
bb_upper = df["bb_upper"] if "bb_upper" in df.columns else pd.Series(close, index=df.index)
bb_lower = df["bb_lower"] if "bb_lower" in df.columns else pd.Series(close, index=df.index)
bb_range = bb_upper - bb_lower
bb_pct_series = (close_series - bb_lower) / (bb_range + 1e-8)
ema9 = df.get("ema9", close_series)
ema21 = df.get("ema21", close_series)
ema50 = df.get("ema50", close_series)
ema_align_arr = np.where(
(ema9 > ema21) & (ema21 > ema50), 1,
np.where((ema9 < ema21) & (ema21 < ema50), -1, 0)
).astype(np.float32)
atr_series = df["atr"] if "atr" in df.columns else pd.Series(0.0, index=df.index)
atr_pct_arr = (atr_series / (close_series + 1e-8)).values
volume = df["volume"]
vol_ma20 = df["vol_ma20"] if "vol_ma20" in df.columns else pd.Series(1.0, index=df.index)
vol_ratio_arr = (volume / (vol_ma20 + 1e-8)).values
ret_1_arr = close_series.pct_change(1).fillna(0).values
ret_3_arr = close_series.pct_change(3).fillna(0).values
ret_5_arr = close_series.pct_change(5).fillna(0).values
# z-score 적용 (학습과 동일)
atr_pct_z = _rolling_zscore_last(atr_pct_arr)
vol_ratio_z = _rolling_zscore_last(vol_ratio_arr)
ret_1_z = _rolling_zscore_last(ret_1_arr)
ret_3_z = _rolling_zscore_last(ret_3_arr)
ret_5_z = _rolling_zscore_last(ret_5_arr)
# signal_strength
rsi = float(last.get("rsi", 50))
macd_val = float(last.get("macd", 0))
macd_sig_val = float(last.get("macd_signal", 0))
stoch_k = float(last.get("stoch_k", 50))
stoch_d = float(last.get("stoch_d", 50))
prev = df.iloc[-2] if len(df) >= 2 else last
prev_macd = float(prev.get("macd", 0))
prev_macd_sig = float(prev.get("macd_signal", 0))
strength = 0
if signal == "LONG":
if rsi < 35: strength += 1
if prev_macd < prev_macd_sig and macd_val > macd_sig_val: strength += 2
if close < float(last.get("bb_lower", close)): strength += 1
if ema_align_arr[-1] == 1: strength += 1
if stoch_k < 20 and stoch_k > stoch_d: strength += 1
else:
if rsi > 65: strength += 1
if prev_macd > prev_macd_sig and macd_val < macd_sig_val: strength += 2
if close > float(last.get("bb_upper", close)): strength += 1
if ema_align_arr[-1] == -1: strength += 1
if stoch_k > 80 and stoch_k < stoch_d: strength += 1
# ADX z-score
adx_arr = df["adx"].values.astype(np.float64) if "adx" in df.columns else np.zeros(len(df))
adx_z = _rolling_zscore_last(adx_arr)
base = {
"rsi": rsi,
"macd_hist": float(last.get("macd_hist", 0)),
"bb_pct": float(bb_pct_series.iloc[-1]),
"ema_align": float(ema_align_arr[-1]),
"stoch_k": stoch_k,
"stoch_d": stoch_d,
"atr_pct": atr_pct_z,
"vol_ratio": vol_ratio_z,
"ret_1": ret_1_z,
"ret_3": ret_3_z,
"ret_5": ret_5_z,
"signal_strength": float(strength),
"side": 1.0 if signal == "LONG" else 0.0,
}
# BTC/ETH 상관 피처 (z-score)
if btc_df is not None and eth_df is not None:
btc_r1 = btc_df["close"].pct_change(1).fillna(0).values
btc_r3 = btc_df["close"].pct_change(3).fillna(0).values
btc_r5 = btc_df["close"].pct_change(5).fillna(0).values
eth_r1 = eth_df["close"].pct_change(1).fillna(0).values
eth_r3 = eth_df["close"].pct_change(3).fillna(0).values
eth_r5 = eth_df["close"].pct_change(5).fillna(0).values
# 길이 맞춤 (btc/eth가 더 길 수 있음)
n = len(df)
def _align(arr):
if len(arr) >= n:
return arr[-n:]
return np.concatenate([np.zeros(n - len(arr)), arr])
btc_r1 = _align(btc_r1)
btc_r3 = _align(btc_r3)
btc_r5 = _align(btc_r5)
eth_r1 = _align(eth_r1)
eth_r3 = _align(eth_r3)
eth_r5 = _align(eth_r5)
# 상대강도 (raw → z-score)
xrp_r1 = ret_1_arr.astype(np.float32)
btc_r1_f = btc_r1.astype(np.float32)
eth_r1_f = eth_r1.astype(np.float32)
rs_btc = np.divide(xrp_r1, btc_r1_f, out=np.zeros_like(xrp_r1), where=(btc_r1_f != 0))
rs_eth = np.divide(xrp_r1, eth_r1_f, out=np.zeros_like(xrp_r1), where=(eth_r1_f != 0))
base.update({
"btc_ret_1": _rolling_zscore_last(btc_r1),
"btc_ret_3": _rolling_zscore_last(btc_r3),
"btc_ret_5": _rolling_zscore_last(btc_r5),
"eth_ret_1": _rolling_zscore_last(eth_r1),
"eth_ret_3": _rolling_zscore_last(eth_r3),
"eth_ret_5": _rolling_zscore_last(eth_r5),
"xrp_btc_rs": _rolling_zscore_last(rs_btc),
"xrp_eth_rs": _rolling_zscore_last(rs_eth),
})
# OI/펀딩비 z-score (실시간 값이 제공되면 히스토리 끝에 추가하여 z-score)
# 서빙 시 OI/펀딩비 히스토리가 없으므로 단일 값 → z-score 불가, NaN 처리
# LightGBM은 NaN을 자체 처리함
base["oi_change"] = float(oi_change) if oi_change is not None else np.nan
base["funding_rate"] = float(funding_rate) if funding_rate is not None else np.nan
base["oi_change_ma5"] = float(oi_change_ma5) if oi_change_ma5 is not None else np.nan
base["oi_price_spread"] = float(oi_price_spread) if oi_price_spread is not None else np.nan
base["adx"] = adx_z
return pd.Series(base)