diff --git a/scripts/train_mlx_model.py b/scripts/train_mlx_model.py index d0f196d..7ecf913 100644 --- a/scripts/train_mlx_model.py +++ b/scripts/train_mlx_model.py @@ -45,7 +45,7 @@ def _split_combined(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame | None return xrp_df, btc_df, eth_df -def train_mlx(data_path: str, time_weight_decay: float = 2.0) -> float: +def train_mlx(data_path: str, time_weight_decay: float = 2.0, atr_sl_mult: float = 2.0, atr_tp_mult: float = 2.0) -> float: print(f"데이터 로드: {data_path}") raw = pd.read_parquet(data_path) print(f"캔들 수: {len(raw)}") @@ -58,7 +58,8 @@ def train_mlx(data_path: str, time_weight_decay: float = 2.0) -> float: print("\n데이터셋 생성 중...") t0 = time.perf_counter() - dataset = generate_dataset_vectorized(df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay) + dataset = generate_dataset_vectorized(df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay, + atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult) t1 = time.perf_counter() print(f"데이터셋 생성 완료: {t1 - t0:.1f}초, {len(dataset)}개 샘플") @@ -170,6 +171,8 @@ def walk_forward_auc( time_weight_decay: float = 2.0, n_splits: int = 5, train_ratio: float = 0.6, + atr_sl_mult: float = 2.0, + atr_tp_mult: float = 2.0, ) -> None: """Walk-Forward 검증: 슬라이딩 윈도우로 n_splits번 학습/검증 반복.""" print(f"\n=== Walk-Forward 검증 ({n_splits}폴드, decay={time_weight_decay}) ===") @@ -177,7 +180,8 @@ def walk_forward_auc( df, btc_df, eth_df = _split_combined(raw) dataset = generate_dataset_vectorized( - df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay + df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay, + atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult, ) missing = [c for c in FEATURE_COLS if c not in dataset.columns] for col in missing: @@ -260,12 +264,16 @@ def main(): ) parser.add_argument("--wf", action="store_true", help="Walk-Forward 검증 실행") parser.add_argument("--wf-splits", type=int, default=5, help="Walk-Forward 폴드 수") + parser.add_argument("--sl-mult", type=float, default=2.0, help="SL ATR 배수 (기본 2.0)") + parser.add_argument("--tp-mult", type=float, default=2.0, help="TP ATR 배수 (기본 2.0)") args = parser.parse_args() if args.wf: - walk_forward_auc(args.data, time_weight_decay=args.decay, n_splits=args.wf_splits) + walk_forward_auc(args.data, time_weight_decay=args.decay, n_splits=args.wf_splits, + atr_sl_mult=args.sl_mult, atr_tp_mult=args.tp_mult) else: - train_mlx(args.data, time_weight_decay=args.decay) + train_mlx(args.data, time_weight_decay=args.decay, + atr_sl_mult=args.sl_mult, atr_tp_mult=args.tp_mult) if __name__ == "__main__": diff --git a/scripts/train_model.py b/scripts/train_model.py index 9b084b0..788e4e7 100644 --- a/scripts/train_model.py +++ b/scripts/train_model.py @@ -54,8 +54,6 @@ def _cgroup_cpu_count() -> int: LOOKAHEAD = 24 # 15분봉 × 24 = 6시간 (dataset_builder.py와 동기화) -ATR_SL_MULT = 1.5 -ATR_TP_MULT = 3.0 MODEL_PATH = Path("models/lgbm_filter.pkl") PREV_MODEL_PATH = Path("models/lgbm_filter_prev.pkl") LOG_PATH = Path("models/training_log.json") @@ -63,6 +61,8 @@ LOG_PATH = Path("models/training_log.json") def _process_index(args: tuple) -> dict | None: """단일 인덱스에 대해 피처+레이블을 계산한다. Pool worker 함수.""" + ATR_SL_MULT = 1.5 # legacy values + ATR_TP_MULT = 3.0 i, df_values, df_columns = args df = pd.DataFrame(df_values, columns=df_columns) @@ -191,7 +191,7 @@ def _load_lgbm_params(tuned_params_path: str | None) -> tuple[dict, float]: return lgbm_params, weight_scale -def train(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: str | None = None): +def train(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: str | None = None, atr_sl_mult: float = 2.0, atr_tp_mult: float = 2.0): print(f"데이터 로드: {data_path}") df_raw = pd.read_parquet(data_path) print(f"캔들 수: {len(df_raw)}, 컬럼: {list(df_raw.columns)}") @@ -218,6 +218,8 @@ def train(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: str df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay, negative_ratio=5, + atr_sl_mult=atr_sl_mult, + atr_tp_mult=atr_tp_mult, ) if dataset.empty or "label" not in dataset.columns: @@ -335,6 +337,8 @@ def walk_forward_auc( n_splits: int = 5, train_ratio: float = 0.6, tuned_params_path: str | None = None, + atr_sl_mult: float = 2.0, + atr_tp_mult: float = 2.0, ) -> None: """Walk-Forward 검증: 슬라이딩 윈도우로 n_splits번 학습/검증 반복. @@ -359,6 +363,8 @@ def walk_forward_auc( df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay, negative_ratio=5, + atr_sl_mult=atr_sl_mult, + atr_tp_mult=atr_tp_mult, ) actual_feature_cols = [c for c in FEATURE_COLS if c in dataset.columns] X = dataset[actual_feature_cols].values @@ -422,7 +428,7 @@ def walk_forward_auc( print(f" 폴드별: {[round(a, 4) for a in aucs]}") -def compare(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: str | None = None): +def compare(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: str | None = None, atr_sl_mult: float = 2.0, atr_tp_mult: float = 2.0): """기존 피처 vs OI 파생 피처 추가 버전 A/B 비교.""" import warnings @@ -449,6 +455,8 @@ def compare(data_path: str, time_weight_decay: float = 2.0, tuned_params_path: s df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=time_weight_decay, negative_ratio=5, + atr_sl_mult=atr_sl_mult, + atr_tp_mult=atr_tp_mult, ) if dataset.empty: @@ -546,6 +554,8 @@ def main(): ) parser.add_argument("--compare", action="store_true", help="OI 파생 피처 추가 전후 A/B 성능 비교") + parser.add_argument("--sl-mult", type=float, default=2.0, help="SL ATR 배수 (기본 2.0)") + parser.add_argument("--tp-mult", type=float, default=2.0, help="TP ATR 배수 (기본 2.0)") args = parser.parse_args() # --symbol 모드: 심볼별 디렉토리 경로 자동 결정 @@ -563,16 +573,20 @@ def main(): args.data = "data/combined_15m.parquet" if args.compare: - compare(args.data, time_weight_decay=args.decay, tuned_params_path=args.tuned_params) + compare(args.data, time_weight_decay=args.decay, tuned_params_path=args.tuned_params, + atr_sl_mult=args.sl_mult, atr_tp_mult=args.tp_mult) elif args.wf: walk_forward_auc( args.data, time_weight_decay=args.decay, n_splits=args.wf_splits, tuned_params_path=args.tuned_params, + atr_sl_mult=args.sl_mult, + atr_tp_mult=args.tp_mult, ) else: - train(args.data, time_weight_decay=args.decay, tuned_params_path=args.tuned_params) + train(args.data, time_weight_decay=args.decay, tuned_params_path=args.tuned_params, + atr_sl_mult=args.sl_mult, atr_tp_mult=args.tp_mult) if __name__ == "__main__": diff --git a/scripts/tune_hyperparams.py b/scripts/tune_hyperparams.py index 119b0f0..d7734e6 100755 --- a/scripts/tune_hyperparams.py +++ b/scripts/tune_hyperparams.py @@ -39,7 +39,7 @@ from src.dataset_builder import generate_dataset_vectorized, stratified_undersam # 데이터 로드 및 데이터셋 생성 (1회 캐싱) # ────────────────────────────────────────────── -def load_dataset(data_path: str) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: +def load_dataset(data_path: str, atr_sl_mult: float = 2.0, atr_tp_mult: float = 2.0) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ parquet 로드 → 벡터화 데이터셋 생성 → (X, y, w) numpy 배열 반환. study 시작 전 1회만 호출하여 모든 trial이 공유한다. @@ -64,7 +64,8 @@ def load_dataset(data_path: str) -> tuple[np.ndarray, np.ndarray, np.ndarray, np df = df_raw[base_cols].copy() print("\n데이터셋 생성 중 (1회만 실행)...") - dataset = generate_dataset_vectorized(df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=0.0, negative_ratio=5) + dataset = generate_dataset_vectorized(df, btc_df=btc_df, eth_df=eth_df, time_weight_decay=0.0, negative_ratio=5, + atr_sl_mult=atr_sl_mult, atr_tp_mult=atr_tp_mult) if dataset.empty or "label" not in dataset.columns: raise ValueError("데이터셋 생성 실패: 샘플 0개") @@ -527,6 +528,8 @@ def main(): parser.add_argument("--train-ratio", type=float, default=0.6, help="학습 구간 비율 (기본: 0.6)") parser.add_argument("--min-recall", type=float, default=0.35, help="최소 재현율 제약 (기본: 0.35)") parser.add_argument("--no-baseline", action="store_true", help="베이스라인 측정 건너뜀") + parser.add_argument("--sl-mult", type=float, default=2.0, help="SL ATR 배수 (기본 2.0)") + parser.add_argument("--tp-mult", type=float, default=2.0, help="TP ATR 배수 (기본 2.0)") args = parser.parse_args() # --symbol 모드: 심볼별 디렉토리 경로 자동 결정 @@ -538,7 +541,7 @@ def main(): args.data = "data/combined_15m.parquet" # 1. 데이터셋 로드 (1회) - X, y, w, source = load_dataset(args.data) + X, y, w, source = load_dataset(args.data, atr_sl_mult=args.sl_mult, atr_tp_mult=args.tp_mult) # 2. 베이스라인 측정 if args.symbol: diff --git a/src/backtester.py b/src/backtester.py index c157bf0..e5577db 100644 --- a/src/backtester.py +++ b/src/backtester.py @@ -743,6 +743,8 @@ class WalkForwardBacktester: signal_threshold=self.cfg.signal_threshold, adx_threshold=self.cfg.adx_threshold, volume_multiplier=self.cfg.volume_multiplier, + atr_sl_mult=self.cfg.atr_sl_mult, + atr_tp_mult=self.cfg.atr_tp_mult, ) except Exception as e: logger.warning(f" [{symbol}] 데이터셋 생성 실패: {e}")