From 6d82febab790c0701a3ffcf8ccf2cfd5f763585d Mon Sep 17 00:00:00 2001 From: 21in7 Date: Mon, 2 Mar 2026 14:56:42 +0900 Subject: [PATCH] feat: implement Active Config pattern for automatic param promotion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - tune_hyperparams.py: 탐색 완료 후 Best AUC > Baseline AUC 이면 models/active_lgbm_params.json 자동 갱신 - tune_hyperparams.py: 베이스라인을 active 파일 기준으로 측정 (active 없으면 코드 내 기본값 사용) - train_model.py: _load_lgbm_params()에 active 파일 자동 탐색 추가 우선순위: --tuned-params > active_lgbm_params.json > 하드코딩 기본값 - models/active_lgbm_params.json: 현재 best 파라미터로 초기화 - .gitignore: tune_results_*.json 제외, active 파일은 git 추적 유지 Made-with: Cursor --- .gitignore | 2 + models/active_lgbm_params.json | 1054 ++++++++++++++++++++++++++++++++ scripts/train_model.py | 23 +- scripts/tune_hyperparams.py | 50 +- 4 files changed, 1112 insertions(+), 17 deletions(-) create mode 100644 models/active_lgbm_params.json diff --git a/.gitignore b/.gitignore index 7c0d95e..315cab6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ logs/ .venv/ venv/ models/*.pkl +models/*.onnx +models/tune_results_*.json data/*.parquet .worktrees/ .DS_Store diff --git a/models/active_lgbm_params.json b/models/active_lgbm_params.json new file mode 100644 index 0000000..db211a2 --- /dev/null +++ b/models/active_lgbm_params.json @@ -0,0 +1,1054 @@ +{ + "timestamp": "2026-03-02T14:47:49.965976", + "data_path": "data/combined_15m.parquet", + "n_trials_total": 50, + "n_trials_complete": 44, + "elapsed_sec": 70.8, + "baseline": { + "auc": 0.580343, + "fold_aucs": [ + 0.657825, + 0.568182, + 0.456471, + 0.548148, + 0.671088 + ] + }, + "best_trial": { + "number": 46, + "auc": 0.600162, + "fold_aucs": [ + 0.708223, + 0.525, + 0.494118, + 0.567901, + 0.70557 + ], + "params": { + "n_estimators": 434, + "learning_rate": 0.123659, + "max_depth": 6, + "num_leaves": 14, + "min_child_samples": 10, + "subsample": 0.929062, + "colsample_bytree": 0.94633, + "reg_alpha": 0.573971, + "reg_lambda": 0.000157, + "weight_scale": 1.783105 + } + }, + "all_trials": [ + { + "number": 0, + "auc": 0.555347, + "fold_aucs": [ + 0.668435, + 0.518182, + 0.437647, + 0.518519, + 0.633952 + ], + "params": { + "n_estimators": 287, + "learning_rate": 0.172547, + "max_depth": 6, + "num_leaves": 21, + "min_child_samples": 16, + "subsample": 0.577997, + "colsample_bytree": 0.529042, + "reg_alpha": 0.291544, + "reg_lambda": 0.025378, + "weight_scale": 1.562109 + } + }, + { + "number": 1, + "auc": 0.564381, + "fold_aucs": [ + 0.68435, + 0.554545, + 0.435294, + 0.511111, + 0.636605 + ], + "params": { + "n_estimators": 110, + "learning_rate": 0.18276, + "max_depth": 6, + "num_leaves": 12, + "min_child_samples": 17, + "subsample": 0.591702, + "colsample_bytree": 0.652121, + "reg_alpha": 0.012561, + "reg_lambda": 0.005343, + "weight_scale": 0.936844 + } + }, + { + "number": 2, + "auc": 0.534985, + "fold_aucs": [ + 0.578249, + 0.579545, + 0.498824, + 0.432099, + 0.586207 + ], + "params": { + "n_estimators": 406, + "learning_rate": 0.015187, + "max_depth": 3, + "num_leaves": 7, + "min_child_samples": 25, + "subsample": 0.728035, + "colsample_bytree": 0.892588, + "reg_alpha": 0.000629, + "reg_lambda": 0.011401, + "weight_scale": 1.388622 + } + }, + { + "number": 3, + "auc": 0.580558, + "fold_aucs": [ + 0.713528, + 0.584091, + 0.449412, + 0.545679, + 0.61008 + ], + "params": { + "n_estimators": 123, + "learning_rate": 0.061721, + "max_depth": 3, + "num_leaves": 7, + "min_child_samples": 12, + "subsample": 0.974443, + "colsample_bytree": 0.982816, + "reg_alpha": 0.171234, + "reg_lambda": 0.001654, + "weight_scale": 0.646508 + } + }, + { + "number": 4, + "auc": 0.552162, + "fold_aucs": [ + 0.562334, + 0.604545, + 0.475294, + 0.474074, + 0.644562 + ], + "params": { + "n_estimators": 442, + "learning_rate": 0.037381, + "max_depth": 2, + "num_leaves": 7, + "min_child_samples": 30, + "subsample": 0.517194, + "colsample_bytree": 0.95466, + "reg_alpha": 0.001084, + "reg_lambda": 0.044678, + "weight_scale": 0.967567 + } + }, + { + "number": 8, + "auc": 0.56184, + "fold_aucs": [ + 0.6313, + 0.572727, + 0.458824, + 0.493827, + 0.65252 + ], + "params": { + "n_estimators": 465, + "learning_rate": 0.100797, + "max_depth": 2, + "num_leaves": 7, + "min_child_samples": 24, + "subsample": 0.557935, + "colsample_bytree": 0.931552, + "reg_alpha": 0.031131, + "reg_lambda": 0.002107, + "weight_scale": 0.595338 + } + }, + { + "number": 10, + "auc": 0.577609, + "fold_aucs": [ + 0.660477, + 0.55, + 0.496471, + 0.525926, + 0.655172 + ], + "params": { + "n_estimators": 573, + "learning_rate": 0.072069, + "max_depth": 4, + "num_leaves": 10, + "min_child_samples": 10, + "subsample": 0.953832, + "colsample_bytree": 0.803721, + "reg_alpha": 0.783021, + "reg_lambda": 0.000165, + "weight_scale": 0.518467 + } + }, + { + "number": 11, + "auc": 0.586348, + "fold_aucs": [ + 0.681698, + 0.565909, + 0.472941, + 0.51358, + 0.697613 + ], + "params": { + "n_estimators": 597, + "learning_rate": 0.077791, + "max_depth": 4, + "num_leaves": 10, + "min_child_samples": 10, + "subsample": 0.981547, + "colsample_bytree": 0.807094, + "reg_alpha": 0.990212, + "reg_lambda": 0.000114, + "weight_scale": 0.505375 + } + }, + { + "number": 12, + "auc": 0.576082, + "fold_aucs": [ + 0.64191, + 0.559091, + 0.461176, + 0.525926, + 0.692308 + ], + "params": { + "n_estimators": 562, + "learning_rate": 0.088469, + "max_depth": 4, + "num_leaves": 10, + "min_child_samples": 10, + "subsample": 0.997169, + "colsample_bytree": 0.810479, + "reg_alpha": 0.963976, + "reg_lambda": 0.000158, + "weight_scale": 0.727238 + } + }, + { + "number": 14, + "auc": 0.564672, + "fold_aucs": [ + 0.67374, + 0.506818, + 0.428235, + 0.575309, + 0.639257 + ], + "params": { + "n_estimators": 491, + "learning_rate": 0.114438, + "max_depth": 3, + "num_leaves": 7, + "min_child_samples": 16, + "subsample": 0.82468, + "colsample_bytree": 0.860169, + "reg_alpha": 0.002741, + "reg_lambda": 0.753493, + "weight_scale": 1.082406 + } + }, + { + "number": 16, + "auc": 0.554097, + "fold_aucs": [ + 0.660477, + 0.522727, + 0.437647, + 0.520988, + 0.628647 + ], + "params": { + "n_estimators": 297, + "learning_rate": 0.061224, + "max_depth": 4, + "num_leaves": 9, + "min_child_samples": 19, + "subsample": 0.993136, + "colsample_bytree": 0.821911, + "reg_alpha": 0.011599, + "reg_lambda": 0.000478, + "weight_scale": 0.692593 + } + }, + { + "number": 17, + "auc": 0.591536, + "fold_aucs": [ + 0.668435, + 0.577273, + 0.451765, + 0.567901, + 0.692308 + ], + "params": { + "n_estimators": 507, + "learning_rate": 0.128894, + "max_depth": 7, + "num_leaves": 14, + "min_child_samples": 12, + "subsample": 0.797487, + "colsample_bytree": 0.909962, + "reg_alpha": 0.03298, + "reg_lambda": 0.000164, + "weight_scale": 1.127831 + } + }, + { + "number": 18, + "auc": 0.57116, + "fold_aucs": [ + 0.67374, + 0.529545, + 0.444706, + 0.523457, + 0.68435 + ], + "params": { + "n_estimators": 508, + "learning_rate": 0.135775, + "max_depth": 7, + "num_leaves": 16, + "min_child_samples": 29, + "subsample": 0.650515, + "colsample_bytree": 0.872642, + "reg_alpha": 0.003181, + "reg_lambda": 0.000126, + "weight_scale": 1.992908 + } + }, + { + "number": 19, + "auc": 0.583971, + "fold_aucs": [ + 0.665782, + 0.540909, + 0.477647, + 0.54321, + 0.692308 + ], + "params": { + "n_estimators": 591, + "learning_rate": 0.141855, + "max_depth": 7, + "num_leaves": 14, + "min_child_samples": 20, + "subsample": 0.795564, + "colsample_bytree": 0.781601, + "reg_alpha": 0.017111, + "reg_lambda": 0.000315, + "weight_scale": 1.169598 + } + }, + { + "number": 20, + "auc": 0.593819, + "fold_aucs": [ + 0.68435, + 0.554545, + 0.475294, + 0.567901, + 0.687003 + ], + "params": { + "n_estimators": 535, + "learning_rate": 0.086738, + "max_depth": 5, + "num_leaves": 19, + "min_child_samples": 13, + "subsample": 0.888545, + "colsample_bytree": 0.713193, + "reg_alpha": 0.040309, + "reg_lambda": 0.000111, + "weight_scale": 1.602167 + } + }, + { + "number": 21, + "auc": 0.59116, + "fold_aucs": [ + 0.68435, + 0.554545, + 0.487059, + 0.548148, + 0.681698 + ], + "params": { + "n_estimators": 533, + "learning_rate": 0.083292, + "max_depth": 5, + "num_leaves": 18, + "min_child_samples": 13, + "subsample": 0.883987, + "colsample_bytree": 0.692559, + "reg_alpha": 0.047587, + "reg_lambda": 0.00028, + "weight_scale": 1.620631 + } + }, + { + "number": 22, + "auc": 0.573142, + "fold_aucs": [ + 0.65252, + 0.534091, + 0.472941, + 0.545679, + 0.660477 + ], + "params": { + "n_estimators": 527, + "learning_rate": 0.122387, + "max_depth": 5, + "num_leaves": 18, + "min_child_samples": 14, + "subsample": 0.903559, + "colsample_bytree": 0.678119, + "reg_alpha": 0.040814, + "reg_lambda": 0.000303, + "weight_scale": 1.698254 + } + }, + { + "number": 23, + "auc": 0.561128, + "fold_aucs": [ + 0.628647, + 0.525, + 0.432941, + 0.550617, + 0.668435 + ], + "params": { + "n_estimators": 533, + "learning_rate": 0.092496, + "max_depth": 7, + "num_leaves": 25, + "min_child_samples": 21, + "subsample": 0.805511, + "colsample_bytree": 0.697889, + "reg_alpha": 0.00491, + "reg_lambda": 0.001173, + "weight_scale": 1.699668 + } + }, + { + "number": 24, + "auc": 0.589916, + "fold_aucs": [ + 0.729443, + 0.590909, + 0.449412, + 0.54321, + 0.636605 + ], + "params": { + "n_estimators": 426, + "learning_rate": 0.041872, + "max_depth": 5, + "num_leaves": 18, + "min_child_samples": 13, + "subsample": 0.856538, + "colsample_bytree": 0.745445, + "reg_alpha": 0.028845, + "reg_lambda": 0.000301, + "weight_scale": 1.406423 + } + }, + { + "number": 25, + "auc": 0.572459, + "fold_aucs": [ + 0.625995, + 0.531818, + 0.451765, + 0.525926, + 0.72679 + ], + "params": { + "n_estimators": 476, + "learning_rate": 0.15213, + "max_depth": 6, + "num_leaves": 21, + "min_child_samples": 27, + "subsample": 0.774147, + "colsample_bytree": 0.610482, + "reg_alpha": 0.05022, + "reg_lambda": 0.000101, + "weight_scale": 1.248118 + } + }, + { + "number": 26, + "auc": 0.557594, + "fold_aucs": [ + 0.612732, + 0.513636, + 0.484706, + 0.511111, + 0.665782 + ], + "params": { + "n_estimators": 543, + "learning_rate": 0.073373, + "max_depth": 7, + "num_leaves": 24, + "min_child_samples": 35, + "subsample": 0.906402, + "colsample_bytree": 0.764308, + "reg_alpha": 0.006737, + "reg_lambda": 0.00388, + "weight_scale": 1.570241 + } + }, + { + "number": 27, + "auc": 0.550817, + "fold_aucs": [ + 0.599469, + 0.529545, + 0.444706, + 0.535802, + 0.644562 + ], + "params": { + "n_estimators": 399, + "learning_rate": 0.104501, + "max_depth": 5, + "num_leaves": 17, + "min_child_samples": 18, + "subsample": 0.857254, + "colsample_bytree": 0.666257, + "reg_alpha": 0.023816, + "reg_lambda": 0.001031, + "weight_scale": 1.768778 + } + }, + { + "number": 28, + "auc": 0.596882, + "fold_aucs": [ + 0.68435, + 0.563636, + 0.491765, + 0.562963, + 0.681698 + ], + "params": { + "n_estimators": 456, + "learning_rate": 0.084831, + "max_depth": 6, + "num_leaves": 13, + "min_child_samples": 14, + "subsample": 0.926878, + "colsample_bytree": 0.721673, + "reg_alpha": 0.082242, + "reg_lambda": 0.0003, + "weight_scale": 1.517278 + } + }, + { + "number": 29, + "auc": 0.580866, + "fold_aucs": [ + 0.66313, + 0.559091, + 0.456471, + 0.533333, + 0.692308 + ], + "params": { + "n_estimators": 306, + "learning_rate": 0.176254, + "max_depth": 6, + "num_leaves": 13, + "min_child_samples": 15, + "subsample": 0.924467, + "colsample_bytree": 0.543211, + "reg_alpha": 0.256208, + "reg_lambda": 0.000581, + "weight_scale": 1.469907 + } + }, + { + "number": 30, + "auc": 0.563834, + "fold_aucs": [ + 0.615385, + 0.609091, + 0.463529, + 0.483951, + 0.647215 + ], + "params": { + "n_estimators": 456, + "learning_rate": 0.010726, + "max_depth": 7, + "num_leaves": 12, + "min_child_samples": 21, + "subsample": 0.678779, + "colsample_bytree": 0.502729, + "reg_alpha": 0.383224, + "reg_lambda": 0.008701, + "weight_scale": 1.275621 + } + }, + { + "number": 31, + "auc": 0.592019, + "fold_aucs": [ + 0.69496, + 0.565909, + 0.437647, + 0.585185, + 0.676393 + ], + "params": { + "n_estimators": 501, + "learning_rate": 0.083426, + "max_depth": 6, + "num_leaves": 19, + "min_child_samples": 13, + "subsample": 0.933585, + "colsample_bytree": 0.711135, + "reg_alpha": 0.127747, + "reg_lambda": 0.000297, + "weight_scale": 1.592315 + } + }, + { + "number": 32, + "auc": 0.581818, + "fold_aucs": [ + 0.668435, + 0.552273, + 0.451765, + 0.528395, + 0.708223 + ], + "params": { + "n_estimators": 500, + "learning_rate": 0.11794, + "max_depth": 6, + "num_leaves": 20, + "min_child_samples": 16, + "subsample": 0.930951, + "colsample_bytree": 0.716886, + "reg_alpha": 0.125843, + "reg_lambda": 0.000221, + "weight_scale": 1.493466 + } + }, + { + "number": 33, + "auc": 0.568285, + "fold_aucs": [ + 0.633952, + 0.554545, + 0.430588, + 0.577778, + 0.644562 + ], + "params": { + "n_estimators": 402, + "learning_rate": 0.196133, + "max_depth": 6, + "num_leaves": 23, + "min_child_samples": 17, + "subsample": 0.760625, + "colsample_bytree": 0.651247, + "reg_alpha": 0.11641, + "reg_lambda": 0.000205, + "weight_scale": 1.825085 + } + }, + { + "number": 34, + "auc": 0.569187, + "fold_aucs": [ + 0.633952, + 0.525, + 0.454118, + 0.54321, + 0.689655 + ], + "params": { + "n_estimators": 497, + "learning_rate": 0.159669, + "max_depth": 7, + "num_leaves": 26, + "min_child_samples": 12, + "subsample": 0.941807, + "colsample_bytree": 0.843175, + "reg_alpha": 0.016681, + "reg_lambda": 0.000472, + "weight_scale": 1.129029 + } + }, + { + "number": 35, + "auc": 0.585201, + "fold_aucs": [ + 0.639257, + 0.565909, + 0.477647, + 0.582716, + 0.660477 + ], + "params": { + "n_estimators": 421, + "learning_rate": 0.071736, + "max_depth": 6, + "num_leaves": 15, + "min_child_samples": 14, + "subsample": 0.808168, + "colsample_bytree": 0.913982, + "reg_alpha": 0.06013, + "reg_lambda": 0.001341, + "weight_scale": 1.659573 + } + }, + { + "number": 36, + "auc": 0.583577, + "fold_aucs": [ + 0.655172, + 0.55, + 0.468235, + 0.565432, + 0.679045 + ], + "params": { + "n_estimators": 558, + "learning_rate": 0.060353, + "max_depth": 7, + "num_leaves": 12, + "min_child_samples": 12, + "subsample": 0.864579, + "colsample_bytree": 0.770752, + "reg_alpha": 0.427458, + "reg_lambda": 0.000374, + "weight_scale": 1.499848 + } + }, + { + "number": 37, + "auc": 0.560281, + "fold_aucs": [ + 0.649867, + 0.572727, + 0.451765, + 0.503704, + 0.623342 + ], + "params": { + "n_estimators": 328, + "learning_rate": 0.046397, + "max_depth": 6, + "num_leaves": 19, + "min_child_samples": 18, + "subsample": 0.831211, + "colsample_bytree": 0.620719, + "reg_alpha": 0.183723, + "reg_lambda": 0.000857, + "weight_scale": 1.004664 + } + }, + { + "number": 38, + "auc": 0.568119, + "fold_aucs": [ + 0.644562, + 0.543182, + 0.447059, + 0.550617, + 0.655172 + ], + "params": { + "n_estimators": 451, + "learning_rate": 0.101323, + "max_depth": 5, + "num_leaves": 27, + "min_child_samples": 22, + "subsample": 0.956515, + "colsample_bytree": 0.579418, + "reg_alpha": 0.008505, + "reg_lambda": 0.005868, + "weight_scale": 1.373266 + } + }, + { + "number": 39, + "auc": 0.561805, + "fold_aucs": [ + 0.618037, + 0.534091, + 0.496471, + 0.518519, + 0.64191 + ], + "params": { + "n_estimators": 379, + "learning_rate": 0.033717, + "max_depth": 6, + "num_leaves": 13, + "min_child_samples": 27, + "subsample": 0.913022, + "colsample_bytree": 0.729845, + "reg_alpha": 0.00026, + "reg_lambda": 0.000101, + "weight_scale": 0.854892 + } + }, + { + "number": 40, + "auc": 0.564239, + "fold_aucs": [ + 0.602122, + 0.486364, + 0.505882, + 0.553086, + 0.67374 + ], + "params": { + "n_estimators": 513, + "learning_rate": 0.13444, + "max_depth": 7, + "num_leaves": 20, + "min_child_samples": 41, + "subsample": 0.879196, + "colsample_bytree": 0.639975, + "reg_alpha": 0.153072, + "reg_lambda": 0.028829, + "weight_scale": 1.261063 + } + }, + { + "number": 41, + "auc": 0.585368, + "fold_aucs": [ + 0.66313, + 0.55, + 0.451765, + 0.580247, + 0.681698 + ], + "params": { + "n_estimators": 480, + "learning_rate": 0.082353, + "max_depth": 5, + "num_leaves": 17, + "min_child_samples": 12, + "subsample": 0.886178, + "colsample_bytree": 0.99982, + "reg_alpha": 0.042776, + "reg_lambda": 0.000239, + "weight_scale": 1.650531 + } + }, + { + "number": 42, + "auc": 0.580051, + "fold_aucs": [ + 0.67374, + 0.522727, + 0.472941, + 0.57037, + 0.660477 + ], + "params": { + "n_estimators": 532, + "learning_rate": 0.089384, + "max_depth": 5, + "num_leaves": 16, + "min_child_samples": 15, + "subsample": 0.969063, + "colsample_bytree": 0.686304, + "reg_alpha": 0.02165, + "reg_lambda": 0.000175, + "weight_scale": 1.560359 + } + }, + { + "number": 43, + "auc": 0.582682, + "fold_aucs": [ + 0.676393, + 0.538636, + 0.465882, + 0.548148, + 0.68435 + ], + "params": { + "n_estimators": 578, + "learning_rate": 0.068217, + "max_depth": 6, + "num_leaves": 19, + "min_child_samples": 11, + "subsample": 0.781231, + "colsample_bytree": 0.703302, + "reg_alpha": 0.036104, + "reg_lambda": 0.002449, + "weight_scale": 1.911434 + } + }, + { + "number": 44, + "auc": 0.575949, + "fold_aucs": [ + 0.679045, + 0.552273, + 0.458824, + 0.518519, + 0.671088 + ], + "params": { + "n_estimators": 557, + "learning_rate": 0.052528, + "max_depth": 4, + "num_leaves": 11, + "min_child_samples": 14, + "subsample": 0.712831, + "colsample_bytree": 0.784825, + "reg_alpha": 0.065441, + "reg_lambda": 0.000397, + "weight_scale": 1.61055 + } + }, + { + "number": 45, + "auc": 0.596786, + "fold_aucs": [ + 0.687003, + 0.563636, + 0.494118, + 0.565432, + 0.67374 + ], + "params": { + "n_estimators": 444, + "learning_rate": 0.105389, + "max_depth": 5, + "num_leaves": 14, + "min_child_samples": 10, + "subsample": 0.876838, + "colsample_bytree": 0.759064, + "reg_alpha": 0.545065, + "reg_lambda": 0.000172, + "weight_scale": 1.777219 + } + }, + { + "number": 46, + "auc": 0.600162, + "fold_aucs": [ + 0.708223, + 0.525, + 0.494118, + 0.567901, + 0.70557 + ], + "params": { + "n_estimators": 434, + "learning_rate": 0.123659, + "max_depth": 6, + "num_leaves": 14, + "min_child_samples": 10, + "subsample": 0.929062, + "colsample_bytree": 0.94633, + "reg_alpha": 0.573971, + "reg_lambda": 0.000157, + "weight_scale": 1.783105 + } + }, + { + "number": 47, + "auc": 0.572182, + "fold_aucs": [ + 0.644562, + 0.515909, + 0.487059, + 0.555556, + 0.657825 + ], + "params": { + "n_estimators": 434, + "learning_rate": 0.107112, + "max_depth": 6, + "num_leaves": 15, + "min_child_samples": 17, + "subsample": 0.929206, + "colsample_bytree": 0.956526, + "reg_alpha": 0.542658, + "reg_lambda": 0.001835, + "weight_scale": 1.815295 + } + }, + { + "number": 48, + "auc": 0.5912, + "fold_aucs": [ + 0.702918, + 0.568182, + 0.451765, + 0.575309, + 0.657825 + ], + "params": { + "n_estimators": 345, + "learning_rate": 0.093154, + "max_depth": 4, + "num_leaves": 13, + "min_child_samples": 11, + "subsample": 0.969964, + "colsample_bytree": 0.749698, + "reg_alpha": 0.263492, + "reg_lambda": 0.000657, + "weight_scale": 1.754072 + } + }, + { + "number": 49, + "auc": 0.592114, + "fold_aucs": [ + 0.660477, + 0.590909, + 0.498824, + 0.560494, + 0.649867 + ], + "params": { + "n_estimators": 475, + "learning_rate": 0.022083, + "max_depth": 6, + "num_leaves": 22, + "min_child_samples": 10, + "subsample": 0.90378, + "colsample_bytree": 0.835696, + "reg_alpha": 0.462134, + "reg_lambda": 0.000146, + "weight_scale": 1.924901 + } + } + ] +} \ No newline at end of file diff --git a/scripts/train_model.py b/scripts/train_model.py index f9f9fc1..34dafa7 100644 --- a/scripts/train_model.py +++ b/scripts/train_model.py @@ -146,9 +146,16 @@ def generate_dataset(df: pd.DataFrame, n_jobs: int | None = None) -> pd.DataFram return pd.DataFrame(rows) +ACTIVE_PARAMS_PATH = Path("models/active_lgbm_params.json") + + def _load_lgbm_params(tuned_params_path: str | None) -> tuple[dict, float]: """기본 LightGBM 파라미터를 반환하고, 튜닝 JSON이 주어지면 덮어쓴다. - 반환: (lgbm_params, weight_scale) + + 우선순위: + 1. --tuned-params 명시적 인자 + 2. models/active_lgbm_params.json (Optuna가 자동 갱신) + 3. 코드 내 하드코딩 기본값 (fallback) """ lgbm_params: dict = { "n_estimators": 434, @@ -163,15 +170,23 @@ def _load_lgbm_params(tuned_params_path: str | None) -> tuple[dict, float]: } weight_scale = 1.783105 - if tuned_params_path: - with open(tuned_params_path, "r", encoding="utf-8") as f: + # 명시적 인자가 없으면 active 파일 자동 탐색 + resolved_path = tuned_params_path or ( + str(ACTIVE_PARAMS_PATH) if ACTIVE_PARAMS_PATH.exists() else None + ) + + if resolved_path: + with open(resolved_path, "r", encoding="utf-8") as f: tune_data = json.load(f) best_params = dict(tune_data["best_trial"]["params"]) weight_scale = float(best_params.pop("weight_scale", 1.0)) lgbm_params.update(best_params) - print(f"\n[Optuna] 튜닝 파라미터 로드: {tuned_params_path}") + source = "명시적 인자" if tuned_params_path else "active 파일 자동 로드" + print(f"\n[Optuna] 튜닝 파라미터 로드 ({source}): {resolved_path}") print(f"[Optuna] 적용 파라미터: {lgbm_params}") print(f"[Optuna] weight_scale: {weight_scale}\n") + else: + print("[Optuna] active 파일 없음 → 코드 내 기본 파라미터 사용\n") return lgbm_params, weight_scale diff --git a/scripts/tune_hyperparams.py b/scripts/tune_hyperparams.py index 98bd14c..faf7895 100755 --- a/scripts/tune_hyperparams.py +++ b/scripts/tune_hyperparams.py @@ -217,19 +217,30 @@ def measure_baseline( n_splits: int, train_ratio: float, ) -> tuple[float, list[float]]: - """train_model.py의 현재 고정 파라미터로 베이스라인 AUC를 측정한다.""" - baseline_params = { - "n_estimators": 500, - "learning_rate": 0.05, - "max_depth": -1, # train_model.py는 max_depth 미설정 - "num_leaves": 31, - "min_child_samples": 15, - "subsample": 0.8, - "colsample_bytree": 0.8, - "reg_alpha": 0.05, - "reg_lambda": 0.1, - } - print("베이스라인 측정 중 (현재 train_model.py 고정 파라미터)...") + """현재 실전 파라미터(active 파일 또는 하드코딩 기본값)로 베이스라인 AUC를 측정한다.""" + active_path = Path("models/active_lgbm_params.json") + + if active_path.exists(): + with open(active_path, "r", encoding="utf-8") as f: + tune_data = json.load(f) + best_params = dict(tune_data["best_trial"]["params"]) + best_params.pop("weight_scale", None) + baseline_params = best_params + print(f"베이스라인 측정 중 (active 파일: {active_path})...") + else: + baseline_params = { + "n_estimators": 434, + "learning_rate": 0.123659, + "max_depth": 6, + "num_leaves": 14, + "min_child_samples": 10, + "subsample": 0.929062, + "colsample_bytree": 0.946330, + "reg_alpha": 0.573971, + "reg_lambda": 0.000157, + } + print("베이스라인 측정 중 (active 파일 없음 → 코드 내 기본 파라미터)...") + return _walk_forward_cv(X, y, w, baseline_params, n_splits=n_splits, train_ratio=train_ratio) @@ -423,6 +434,19 @@ def main(): output_path = save_results(study, baseline_auc, baseline_folds, elapsed, args.data) print_report(study, baseline_auc, baseline_folds, elapsed, output_path) + # 5. 성능 개선 시 active 파일 자동 갱신 + import shutil + active_path = Path("models/active_lgbm_params.json") + if not args.no_baseline and study.best_value > baseline_auc: + shutil.copy(output_path, active_path) + improvement = study.best_value - baseline_auc + print(f"[MLOps] AUC +{improvement:.4f} 개선 → {active_path} 자동 갱신 완료") + print(f"[MLOps] 다음 train_model.py 실행 시 새 파라미터가 자동 적용됩니다.\n") + elif args.no_baseline: + print("[MLOps] --no-baseline 모드: 성능 비교 없이 active 파일 유지\n") + else: + print(f"[MLOps] 성능 개선 없음 (Best={study.best_value:.4f} ≤ Baseline={baseline_auc:.4f}) → active 파일 유지\n") + if __name__ == "__main__": main()