feat: implement Active Config pattern for automatic param promotion

- tune_hyperparams.py: 탐색 완료 후 Best AUC > Baseline AUC 이면 models/active_lgbm_params.json 자동 갱신 - tune_hyperparams.py: 베이스라인을 active 파일 기준으로 측정 (active 없으면 코드 내 기본값 사용) - train_model.py: _load_lgbm_params()에 active 파일 자동 탐색 추가 우선순위: --tuned-params > active_lgbm_params.json > 하드코딩 기본값 - models/active_lgbm_params.json: 현재 best 파라미터로 초기화 - .gitignore: tune_results_*.json 제외, active 파일은 git 추적 유지 Made-with: Cursor
2026-03-02 14:56:42 +09:00
parent d5f8ed4789
commit 6d82febab7
4 changed files with 1112 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,8 @@ logs/
 .venv/
 venv/
 models/*.pkl
 models/*.onnx
 models/tune_results_*.json
 data/*.parquet
 .worktrees/
 .DS_Store
--- a/models/active_lgbm_params.json
+++ b/models/active_lgbm_params.json
--- a/scripts/train_model.py
+++ b/scripts/train_model.py
@@ -146,9 +146,16 @@ def generate_dataset(df: pd.DataFrame, n_jobs: int | None = None) -> pd.DataFram
    return pd.DataFrame(rows)
 ACTIVE_PARAMS_PATH = Path("models/active_lgbm_params.json")
 def _load_lgbm_params(tuned_params_path: str | None) -> tuple[dict, float]:
    """기본 LightGBM 파라미터를 반환하고, 튜닝 JSON이 주어지면 덮어쓴다.
-    반환: (lgbm_params, weight_scale)
+
    우선순위:
      1. --tuned-params 명시적 인자
      2. models/active_lgbm_params.json (Optuna가 자동 갱신)
      3. 코드 내 하드코딩 기본값 (fallback)
    """
    lgbm_params: dict = {
        "n_estimators":      434,
@@ -163,15 +170,23 @@ def _load_lgbm_params(tuned_params_path: str | None) -> tuple[dict, float]:
    }
    weight_scale = 1.783105
-    if tuned_params_path:
+    # 명시적 인자가 없으면 active 파일 자동 탐색
-        with open(tuned_params_path, "r", encoding="utf-8") as f:
+    resolved_path = tuned_params_path or (
        str(ACTIVE_PARAMS_PATH) if ACTIVE_PARAMS_PATH.exists() else None
    )
    if resolved_path:
        with open(resolved_path, "r", encoding="utf-8") as f:
            tune_data = json.load(f)
        best_params = dict(tune_data["best_trial"]["params"])
        weight_scale = float(best_params.pop("weight_scale", 1.0))
        lgbm_params.update(best_params)
-        print(f"\n[Optuna] 튜닝 파라미터 로드: {tuned_params_path}")
+        source = "명시적 인자" if tuned_params_path else "active 파일 자동 로드"
        print(f"\n[Optuna] 튜닝 파라미터 로드 ({source}): {resolved_path}")
        print(f"[Optuna] 적용 파라미터: {lgbm_params}")
        print(f"[Optuna] weight_scale: {weight_scale}\n")
    else:
        print("[Optuna] active 파일 없음 → 코드 내 기본 파라미터 사용\n")
    return lgbm_params, weight_scale
--- a/scripts/tune_hyperparams.py
+++ b/scripts/tune_hyperparams.py
@@ -217,19 +217,30 @@ def measure_baseline(
    n_splits: int,
    train_ratio: float,
 ) -> tuple[float, list[float]]:
-    """train_model.py의 현재 고정 파라미터로 베이스라인 AUC를 측정한다."""
+    """현재 실전 파라미터(active 파일 또는 하드코딩 기본값)로 베이스라인 AUC를 측정한다."""
-    baseline_params = {
+    active_path = Path("models/active_lgbm_params.json")
-        "n_estimators":     500,
+
-        "learning_rate":    0.05,
+    if active_path.exists():
-        "max_depth":        -1,   # train_model.py는 max_depth 미설정
+        with open(active_path, "r", encoding="utf-8") as f:
-        "num_leaves":       31,
+            tune_data = json.load(f)
-        "min_child_samples": 15,
+        best_params = dict(tune_data["best_trial"]["params"])
-        "subsample":        0.8,
+        best_params.pop("weight_scale", None)
-        "colsample_bytree": 0.8,
+        baseline_params = best_params
-        "reg_alpha":        0.05,
+        print(f"베이스라인 측정 중 (active 파일: {active_path})...")
-        "reg_lambda":       0.1,
+    else:
-    }
+        baseline_params = {
-    print("베이스라인 측정 중 (현재 train_model.py 고정 파라미터)...")
+            "n_estimators":      434,
            "learning_rate":     0.123659,
            "max_depth":         6,
            "num_leaves":        14,
            "min_child_samples": 10,
            "subsample":         0.929062,
            "colsample_bytree":  0.946330,
            "reg_alpha":         0.573971,
            "reg_lambda":        0.000157,
        }
        print("베이스라인 측정 중 (active 파일 없음 → 코드 내 기본 파라미터)...")
    return _walk_forward_cv(X, y, w, baseline_params, n_splits=n_splits, train_ratio=train_ratio)
@@ -423,6 +434,19 @@ def main():
    output_path = save_results(study, baseline_auc, baseline_folds, elapsed, args.data)
    print_report(study, baseline_auc, baseline_folds, elapsed, output_path)
    # 5. 성능 개선 시 active 파일 자동 갱신
    import shutil
    active_path = Path("models/active_lgbm_params.json")
    if not args.no_baseline and study.best_value > baseline_auc:
        shutil.copy(output_path, active_path)
        improvement = study.best_value - baseline_auc
        print(f"[MLOps] AUC +{improvement:.4f} 개선 → {active_path} 자동 갱신 완료")
        print(f"[MLOps] 다음 train_model.py 실행 시 새 파라미터가 자동 적용됩니다.\n")
    elif args.no_baseline:
        print("[MLOps] --no-baseline 모드: 성능 비교 없이 active 파일 유지\n")
    else:
        print(f"[MLOps] 성능 개선 없음 (Best={study.best_value:.4f} ≤ Baseline={baseline_auc:.4f}) → active 파일 유지\n")
 if __name__ == "__main__":
    main()