feat: remove in-container retraining, training is now mac-only

Made-with: Cursor
2026-03-01 18:54:00 +09:00
parent fd96055e73
commit de933b97cc
13 changed files with 955 additions and 132 deletions
--- a/scripts/profile_training.py
+++ b/scripts/profile_training.py
@@ -0,0 +1,53 @@
+"""
+학습 파이프라인 각 단계의 소요 시간을 측정한다.
+사용법: python scripts/profile_training.py --data data/xrpusdt_1m.parquet
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import time
+import argparse
+import pandas as pd
+from scripts.train_model import generate_dataset, _cgroup_cpu_count
+
+
+def profile(data_path: str):
+    print(f"데이터 로드: {data_path}")
+    df = pd.read_parquet(data_path)
+    print(f"캔들 수: {len(df)}")
+
+    workers = max(1, _cgroup_cpu_count() - 1)
+    print(f"사용 코어: {workers}")
+
+    t0 = time.perf_counter()
+    dataset = generate_dataset(df)
+    t1 = time.perf_counter()
+    print(f"\n[결과] 데이터셋 생성: {t1 - t0:.1f}초, 샘플 {len(dataset)}개")
+
+    import lightgbm as lgb
+    from sklearn.model_selection import train_test_split
+    from src.ml_features import FEATURE_COLS
+
+    X = dataset[FEATURE_COLS]
+    y = dataset["label"]
+    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    model = lgb.LGBMClassifier(
+        n_estimators=300, learning_rate=0.05, num_leaves=31,
+        min_child_samples=20, subsample=0.8, colsample_bytree=0.8,
+        class_weight="balanced", random_state=42, verbose=-1,
+    )
+    t2 = time.perf_counter()
+    model.fit(X_train, y_train)
+    t3 = time.perf_counter()
+    print(f"[결과] LightGBM 학습: {t3 - t2:.1f}초")
+    print(f"[결과] 전체: {t3 - t0:.1f}초")
+    print(f"\n[비율] 데이터셋 생성: {(t1-t0)/(t3-t0)*100:.0f}% / LightGBM 학습: {(t3-t2)/(t3-t0)*100:.0f}%")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data", default="data/xrpusdt_1m.parquet")
+    args = parser.parse_args()
+    profile(args.data)