fix: resolve ML filter dtype error and missing BTC/ETH correlation features
- Fix LightGBM predict_proba ValueError by filtering FEATURE_COLS and casting to float64 - Extract BTC/ETH correlation data from embedded parquet columns instead of missing separate files - Disable ONNX priority in ML filter tests to use mocked LightGBM correctly - Add NO_ML_FILTER=true to .env.example (ML adds no value with current signal thresholds) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ LEVERAGE=10
|
|||||||
RISK_PER_TRADE=0.02
|
RISK_PER_TRADE=0.02
|
||||||
DISCORD_WEBHOOK_URL=
|
DISCORD_WEBHOOK_URL=
|
||||||
ML_THRESHOLD=0.55
|
ML_THRESHOLD=0.55
|
||||||
|
NO_ML_FILTER=true
|
||||||
MAX_SAME_DIRECTION=2
|
MAX_SAME_DIRECTION=2
|
||||||
ATR_SL_MULT=2.0
|
ATR_SL_MULT=2.0
|
||||||
ATR_TP_MULT=2.0
|
ATR_TP_MULT=2.0
|
||||||
|
|||||||
@@ -164,9 +164,11 @@ def _get_ml_proba(ml_filter: MLFilter | None, features: pd.Series) -> float | No
|
|||||||
X = features[FEATURE_COLS].values.astype(np.float32).reshape(1, -1)
|
X = features[FEATURE_COLS].values.astype(np.float32).reshape(1, -1)
|
||||||
return float(ml_filter._onnx_session.run(None, {input_name: X})[0][0])
|
return float(ml_filter._onnx_session.run(None, {input_name: X})[0][0])
|
||||||
else:
|
else:
|
||||||
X = features.to_frame().T
|
available = [c for c in FEATURE_COLS if c in features.index]
|
||||||
|
X = pd.DataFrame([features[available].values.astype(np.float64)], columns=available)
|
||||||
return float(ml_filter._lgbm_model.predict_proba(X)[0][1])
|
return float(ml_filter._lgbm_model.predict_proba(X)[0][1])
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
logger.warning(f"ML PROBA ERROR: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -209,13 +211,24 @@ class Backtester:
|
|||||||
all_signals: dict[str, np.ndarray] = {}
|
all_signals: dict[str, np.ndarray] = {}
|
||||||
all_features: dict[str, pd.DataFrame] = {}
|
all_features: dict[str, pd.DataFrame] = {}
|
||||||
|
|
||||||
# BTC/ETH 상관 데이터 (있으면 로드)
|
|
||||||
btc_df = self._try_load_corr("BTCUSDT")
|
|
||||||
eth_df = self._try_load_corr("ETHUSDT")
|
|
||||||
|
|
||||||
for sym in self.cfg.symbols:
|
for sym in self.cfg.symbols:
|
||||||
df = _load_data(sym, self.cfg.start, self.cfg.end)
|
df = _load_data(sym, self.cfg.start, self.cfg.end)
|
||||||
all_data[sym] = df
|
all_data[sym] = df
|
||||||
|
|
||||||
|
# BTC/ETH 상관 데이터: 임베딩된 컬럼에서 추출 (별도 파일 폴백)
|
||||||
|
base_cols = ["open", "high", "low", "close", "volume"]
|
||||||
|
btc_df = eth_df = None
|
||||||
|
if "close_btc" in df.columns:
|
||||||
|
btc_df = df[[c + "_btc" for c in base_cols]].copy()
|
||||||
|
btc_df.columns = base_cols
|
||||||
|
else:
|
||||||
|
btc_df = self._try_load_corr("BTCUSDT")
|
||||||
|
if "close_eth" in df.columns:
|
||||||
|
eth_df = df[[c + "_eth" for c in base_cols]].copy()
|
||||||
|
eth_df.columns = base_cols
|
||||||
|
else:
|
||||||
|
eth_df = self._try_load_corr("ETHUSDT")
|
||||||
|
|
||||||
df_ind = _calc_indicators(df)
|
df_ind = _calc_indicators(df)
|
||||||
all_indicators[sym] = df_ind
|
all_indicators[sym] = df_ind
|
||||||
sig_arr = _calc_signals(
|
sig_arr = _calc_signals(
|
||||||
|
|||||||
@@ -141,7 +141,8 @@ class MLFilter:
|
|||||||
X = features[FEATURE_COLS].values.astype(np.float32).reshape(1, -1)
|
X = features[FEATURE_COLS].values.astype(np.float32).reshape(1, -1)
|
||||||
proba = float(self._onnx_session.run(None, {input_name: X})[0][0])
|
proba = float(self._onnx_session.run(None, {input_name: X})[0][0])
|
||||||
else:
|
else:
|
||||||
X = features.to_frame().T
|
available = [c for c in FEATURE_COLS if c in features.index]
|
||||||
|
X = pd.DataFrame([features[available].values.astype(np.float64)], columns=available)
|
||||||
proba = float(self._lgbm_model.predict_proba(X)[0][1])
|
proba = float(self._lgbm_model.predict_proba(X)[0][1])
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"ML 필터 [{self.active_backend}] 확률: {proba:.3f} "
|
f"ML 필터 [{self.active_backend}] 확률: {proba:.3f} "
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ def test_no_model_should_enter_returns_true(tmp_path):
|
|||||||
def test_should_enter_above_threshold():
|
def test_should_enter_above_threshold():
|
||||||
"""확률 >= 0.60 이면 True"""
|
"""확률 >= 0.60 이면 True"""
|
||||||
f = MLFilter(threshold=0.60)
|
f = MLFilter(threshold=0.60)
|
||||||
|
f._onnx_session = None # ONNX 비활성화, LightGBM만 테스트
|
||||||
mock_model = MagicMock()
|
mock_model = MagicMock()
|
||||||
mock_model.predict_proba.return_value = np.array([[0.35, 0.65]])
|
mock_model.predict_proba.return_value = np.array([[0.35, 0.65]])
|
||||||
f._lgbm_model = mock_model
|
f._lgbm_model = mock_model
|
||||||
@@ -42,6 +43,7 @@ def test_should_enter_above_threshold():
|
|||||||
def test_should_enter_below_threshold():
|
def test_should_enter_below_threshold():
|
||||||
"""확률 < 0.60 이면 False"""
|
"""확률 < 0.60 이면 False"""
|
||||||
f = MLFilter(threshold=0.60)
|
f = MLFilter(threshold=0.60)
|
||||||
|
f._onnx_session = None # ONNX 비활성화, LightGBM만 테스트
|
||||||
mock_model = MagicMock()
|
mock_model = MagicMock()
|
||||||
mock_model.predict_proba.return_value = np.array([[0.55, 0.45]])
|
mock_model.predict_proba.return_value = np.array([[0.55, 0.45]])
|
||||||
f._lgbm_model = mock_model
|
f._lgbm_model = mock_model
|
||||||
|
|||||||
Reference in New Issue
Block a user