diff --git a/.gitignore b/.gitignore index 6beb290..cb4e3a3 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ data/*.parquet .DS_Store .cursor/ -.worktrees/ \ No newline at end of file +.worktrees/ +.venv diff --git a/.venv b/.venv deleted file mode 120000 index 872124e..0000000 --- a/.venv +++ /dev/null @@ -1 +0,0 @@ -/Users/gihyeon/github/cointrader/.venv \ No newline at end of file diff --git a/data/dogeusdt/combined_15m.parquet b/data/dogeusdt/combined_15m.parquet new file mode 100644 index 0000000..3440811 Binary files /dev/null and b/data/dogeusdt/combined_15m.parquet differ diff --git a/data/trxusdt/combined_15m.parquet b/data/trxusdt/combined_15m.parquet new file mode 100644 index 0000000..9b1f6de Binary files /dev/null and b/data/trxusdt/combined_15m.parquet differ diff --git a/data/xrpusdt/combined_15m.parquet b/data/xrpusdt/combined_15m.parquet new file mode 100644 index 0000000..4221ded Binary files /dev/null and b/data/xrpusdt/combined_15m.parquet differ diff --git a/models/dogeusdt/lgbm_filter.pkl b/models/dogeusdt/lgbm_filter.pkl new file mode 100644 index 0000000..97dc77f Binary files /dev/null and b/models/dogeusdt/lgbm_filter.pkl differ diff --git a/models/dogeusdt/training_log.json b/models/dogeusdt/training_log.json new file mode 100644 index 0000000..7ba1e43 --- /dev/null +++ b/models/dogeusdt/training_log.json @@ -0,0 +1,27 @@ +[ + { + "date": "2026-03-05T23:54:51.517734", + "backend": "lgbm", + "auc": 0.9565, + "best_threshold": 0.3318, + "best_precision": 0.548, + "best_recall": 0.489, + "samples": 3330, + "features": 26, + "time_weight_decay": 2.0, + "model_path": "models/dogeusdt/lgbm_filter.pkl", + "tuned_params_path": null, + "lgbm_params": { + "n_estimators": 434, + "learning_rate": 0.123659, + "max_depth": 6, + "num_leaves": 14, + "min_child_samples": 10, + "subsample": 0.929062, + "colsample_bytree": 0.94633, + "reg_alpha": 0.573971, + "reg_lambda": 0.000157 + }, + "weight_scale": 1.783105 + } +] \ No newline at end of file diff --git a/models/trxusdt/lgbm_filter.pkl b/models/trxusdt/lgbm_filter.pkl new file mode 100644 index 0000000..c457fa1 Binary files /dev/null and b/models/trxusdt/lgbm_filter.pkl differ diff --git a/models/trxusdt/training_log.json b/models/trxusdt/training_log.json new file mode 100644 index 0000000..4d52670 --- /dev/null +++ b/models/trxusdt/training_log.json @@ -0,0 +1,27 @@ +[ + { + "date": "2026-03-05T23:54:05.625978", + "backend": "lgbm", + "auc": 0.947, + "best_threshold": 0.2822, + "best_precision": 0.446, + "best_recall": 0.763, + "samples": 2940, + "features": 26, + "time_weight_decay": 2.0, + "model_path": "models/trxusdt/lgbm_filter.pkl", + "tuned_params_path": null, + "lgbm_params": { + "n_estimators": 434, + "learning_rate": 0.123659, + "max_depth": 6, + "num_leaves": 14, + "min_child_samples": 10, + "subsample": 0.929062, + "colsample_bytree": 0.94633, + "reg_alpha": 0.573971, + "reg_lambda": 0.000157 + }, + "weight_scale": 1.783105 + } +] \ No newline at end of file diff --git a/models/xrpusdt/lgbm_filter.pkl b/models/xrpusdt/lgbm_filter.pkl new file mode 100644 index 0000000..c048776 Binary files /dev/null and b/models/xrpusdt/lgbm_filter.pkl differ diff --git a/models/xrpusdt/training_log.json b/models/xrpusdt/training_log.json new file mode 100644 index 0000000..2ce89c8 --- /dev/null +++ b/models/xrpusdt/training_log.json @@ -0,0 +1,27 @@ +[ + { + "date": "2026-03-05T23:53:20.451588", + "backend": "lgbm", + "auc": 0.9428, + "best_threshold": 0.8486, + "best_precision": 0.583, + "best_recall": 0.171, + "samples": 3222, + "features": 26, + "time_weight_decay": 2.0, + "model_path": "models/xrpusdt/lgbm_filter.pkl", + "tuned_params_path": null, + "lgbm_params": { + "n_estimators": 434, + "learning_rate": 0.123659, + "max_depth": 6, + "num_leaves": 14, + "min_child_samples": 10, + "subsample": 0.929062, + "colsample_bytree": 0.94633, + "reg_alpha": 0.573971, + "reg_lambda": 0.000157 + }, + "weight_scale": 1.783105 + } +] \ No newline at end of file diff --git a/scripts/fetch_history.py b/scripts/fetch_history.py index e35ca35..a24e012 100644 --- a/scripts/fetch_history.py +++ b/scripts/fetch_history.py @@ -331,6 +331,10 @@ def main(): "--no-upsert", action="store_true", help="기존 parquet을 Upsert하지 않고 새로 덮어씀 (기본: Upsert 활성화)", ) + parser.add_argument( + "--corr-cache-dir", default=None, + help="상관 심볼(BTC/ETH) 캐시 디렉토리. 첫 수집 시 저장, 이후 재사용", + ) args = parser.parse_args() # --symbol 모드: 단일 거래 심볼 + 상관관계 심볼 자동 추가, 출력 경로 자동 결정 @@ -360,8 +364,43 @@ def main(): df.to_parquet(args.output) print(f"{'Upsert' if not args.no_upsert else '저장'} 완료: {args.output} ({len(df):,}행, {len(df.columns)}컬럼)") else: - # 멀티 심볼: 단일 클라이언트로 순차 수집 후 타임스탬프 기준 inner join 병합 - dfs = asyncio.run(fetch_klines_all(args.symbols, args.interval, args.days)) + # 멀티 심볼: 상관 심볼 캐시 활용 + corr_cache_dir = args.corr_cache_dir + cached_symbols = {} + symbols_to_fetch = list(args.symbols) + + if corr_cache_dir: + os.makedirs(corr_cache_dir, exist_ok=True) + remaining = [] + for sym in args.symbols: + cache_file = os.path.join(corr_cache_dir, f"{sym.lower()}_{args.interval}.parquet") + if os.path.exists(cache_file): + print(f" [{sym}] 캐시 사용: {cache_file}") + cached_symbols[sym] = pd.read_parquet(cache_file) + else: + remaining.append(sym) + symbols_to_fetch = remaining + + if symbols_to_fetch: + dfs = asyncio.run(fetch_klines_all(symbols_to_fetch, args.interval, args.days)) + else: + dfs = {} + + # 캐시에 저장 (상관 심볼만) + if corr_cache_dir: + from src.config import Config + try: + corr_list = Config().correlation_symbols + except Exception: + corr_list = ["BTCUSDT", "ETHUSDT"] + for sym, df in dfs.items(): + if sym in corr_list: + cache_file = os.path.join(corr_cache_dir, f"{sym.lower()}_{args.interval}.parquet") + df.to_parquet(cache_file) + print(f" [{sym}] 캐시 저장: {cache_file}") + + # 캐시 + 새로 수집한 데이터 합치기 + dfs.update(cached_symbols) primary = args.symbols[0] merged = dfs[primary].copy() @@ -377,7 +416,7 @@ def main(): print(f"\n[OI/펀딩비] {primary} 수집 중...") merged = asyncio.run(_fetch_oi_and_funding(primary, args.days, merged)) - output = args.output.replace("xrpusdt", "combined") + output = args.output if not args.no_upsert: merged = upsert_parquet(output, merged) merged.to_parquet(output) diff --git a/scripts/train_and_deploy.sh b/scripts/train_and_deploy.sh index d1ae75d..443cb2c 100755 --- a/scripts/train_and_deploy.sh +++ b/scripts/train_and_deploy.sh @@ -68,6 +68,7 @@ else fi DECAY="${TIME_WEIGHT_DECAY:-2.0}" +CORR_CACHE_DIR="data/.corr_cache" echo "" echo "========================================" @@ -106,6 +107,7 @@ for SYM in "${TARGETS[@]}"; do --symbol "$SYM" \ --interval 15m \ --days "$FETCH_DAYS" \ + --corr-cache-dir "$CORR_CACHE_DIR" \ $UPSERT_FLAG # === [1.5/3] OI 파생 피처 A/B 비교 === @@ -154,6 +156,9 @@ for SYM in "${TARGETS[@]}"; do echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" done +# 상관 심볼 캐시 정리 +rm -rf "$CORR_CACHE_DIR" + echo "" echo "=== 전체 파이프라인 완료: $(date '+%Y-%m-%d %H:%M:%S %Z') ===" echo ""