feat(ml): relax training thresholds for 5-10x more training samples

Add TRAIN_* constants (signal_threshold=2, adx=15, vol_mult=1.5, neg_ratio=3)
as dataset_builder defaults. Remove hardcoded negative_ratio=5 from all callers.
Bot entry conditions unchanged (config.py strict values).

WF 5-fold results (all symbols AUC 0.91+):
- XRPUSDT: 0.9216 ± 0.0052
- SOLUSDT:  0.9174 ± 0.0063
- DOGEUSDT: 0.9222 ± 0.0085

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
21in7
2026-03-21 19:38:15 +09:00
parent 6830549fd6
commit 30ddb2fef4
8 changed files with 305 additions and 19 deletions

View File

@@ -59,7 +59,7 @@ def train_mlx(data_path: str, time_weight_decay: float = 2.0, atr_sl_mult: float
print("\n데이터셋 생성 중...")
t0 = time.perf_counter()
dataset = generate_dataset_vectorized(df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay,
atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult, negative_ratio=5)
atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult)
t1 = time.perf_counter()
print(f"데이터셋 생성 완료: {t1 - t0:.1f}초, {len(dataset)}개 샘플")
@@ -175,7 +175,7 @@ def walk_forward_auc(
dataset = generate_dataset_vectorized(
df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay,
atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult, negative_ratio=5,
atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult,
)
missing = [c for c in FEATURE_COLS if c not in dataset.columns]
for col in missing:

View File

@@ -222,7 +222,7 @@ def train(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: str
dataset = generate_dataset_vectorized(
df, btc_df=btc_df, eth_df=eth_df,
time_weight_decay=time_weight_decay,
negative_ratio=5,
atr_sl_mult=atr_sl_mult,
atr_tp_mult=atr_tp_mult,
)
@@ -367,7 +367,7 @@ def walk_forward_auc(
dataset = generate_dataset_vectorized(
df, btc_df=btc_df, eth_df=eth_df,
time_weight_decay=time_weight_decay,
negative_ratio=5,
atr_sl_mult=atr_sl_mult,
atr_tp_mult=atr_tp_mult,
)
@@ -459,7 +459,7 @@ def compare(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: s
dataset = generate_dataset_vectorized(
df, btc_df=btc_df, eth_df=eth_df,
time_weight_decay=time_weight_decay,
negative_ratio=5,
atr_sl_mult=atr_sl_mult,
atr_tp_mult=atr_tp_mult,
)

View File

@@ -64,7 +64,7 @@ def load_dataset(data_path: str, atr_sl_mult: float = 2.0, atr_tp_mult: float =
df = df_raw[base_cols].copy()
print("\n데이터셋 생성 중 (1회만 실행)...")
dataset = generate_dataset_vectorized(df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=0.0, negative_ratio=5,
dataset = generate_dataset_vectorized(df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=0.0,
atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult)
if dataset.empty or "label" not in dataset.columns: