feat(ml): add purged gap (embargo=24) to walk-forward + ablation CLI

- Add LOOKAHEAD embargo between train/val splits in all 3 WF functions to prevent label leakage from 6h lookahead window - Add --ablation flag to train_model.py for signal_strength/side dependency diagnosis (A/B/C experiment with drop analysis) - Criteria: A→C drop ≤0.05=good, 0.05-0.10=conditional, ≥0.10=redesign Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 19:42:51 +09:00
parent 30ddb2fef4
commit c29d3e0569
6 changed files with 581 additions and 11 deletions
--- a/tests/test_ml_pipeline_fixes.py
+++ b/tests/test_ml_pipeline_fixes.py
@@ -124,6 +124,31 @@ def test_mlx_no_double_normalization():
    assert np.allclose(model._std, 1.0), "normalize=False시 std는 1이어야 한다"


+def test_walk_forward_purged_gap():
+    """Walk-Forward 검증에서 학습/검증 사이에 LOOKAHEAD 만큼의 gap이 존재해야 한다."""
+    from src.dataset_builder import LOOKAHEAD
+
+    n = 1000
+    train_ratio = 0.6
+    n_splits = 5
+    embargo = LOOKAHEAD  # 24
+
+    step = max(1, int(n * (1 - train_ratio) / n_splits))
+    train_end_start = int(n * train_ratio)
+
+    for fold_idx in range(n_splits):
+        tr_end = train_end_start + fold_idx * step
+        val_start = tr_end + embargo
+        val_end = val_start + step
+        if val_end > n:
+            break
+
+        assert val_start - tr_end >= embargo, \
+            f"폴드 {fold_idx}: gap={val_start - tr_end} < embargo={embargo}"
+        assert val_start > tr_end, \
+            f"폴드 {fold_idx}: val_start={val_start} <= tr_end={tr_end}"
+
+
 def test_ml_filter_from_model():
    """MLFilter.from_model()로 LightGBM 모델을 주입할 수 있어야 한다."""
    from src.ml_filter import MLFilter