feat: 전략 리서치 스크립트 및 테스트 일괄 추가
- FR/OI 백테스트, LS ratio 백테스트 스크립트 - 펀딩/OI 분석, 거래 LS 분석 스크립트 - evaluate_oos 테스트 추가 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
364
scripts/fr_oi_backtest.py
Normal file
364
scripts/fr_oi_backtest.py
Normal file
@@ -0,0 +1,364 @@
|
||||
"""
|
||||
FR × OI 변화율 백테스트 — Phase 1: 12개 조합
|
||||
|
||||
신호: FR × OI변화율(1h) = funding_rate × oi_pct_change_4
|
||||
- SHORT: 피처 >= threshold (롱 스퀴즈 전조)
|
||||
- LONG: 피처 <= threshold (숏 스퀴즈 전조)
|
||||
- 보유: 1h(4캔들) / 4h(16캔들)
|
||||
|
||||
Usage: python scripts/fr_oi_backtest.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
BASE = "https://fapi.binance.com"
|
||||
SYMBOL = "XRPUSDT"
|
||||
DATA_DIR = Path("data/xrpusdt")
|
||||
FEE_RATE = 0.0004
|
||||
|
||||
|
||||
async def fetch_oi_history(session, symbol, start_ms, end_ms):
|
||||
all_data = []
|
||||
current = start_ms
|
||||
calls = 0
|
||||
while current < end_ms:
|
||||
params = {"symbol": symbol, "period": "15m", "startTime": current, "endTime": end_ms, "limit": 500}
|
||||
async with session.get(f"{BASE}/futures/data/openInterestHist", params=params) as resp:
|
||||
data = await resp.json()
|
||||
if not data or not isinstance(data, list):
|
||||
break
|
||||
all_data.extend(data)
|
||||
last_ts = int(data[-1]["timestamp"])
|
||||
if last_ts <= current:
|
||||
break
|
||||
current = last_ts + 1
|
||||
calls += 1
|
||||
if calls % 50 == 0:
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
await asyncio.sleep(0.1)
|
||||
if not all_data:
|
||||
return pd.DataFrame()
|
||||
df = pd.DataFrame(all_data)
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"].astype(int), unit="ms", utc=True)
|
||||
df["oi_value"] = df["sumOpenInterestValue"].astype(float)
|
||||
return df[["timestamp", "oi_value"]].drop_duplicates("timestamp").sort_values("timestamp")
|
||||
|
||||
|
||||
async def fetch_funding_rate(session, symbol, start_ms, end_ms):
|
||||
all_data = []
|
||||
current = start_ms
|
||||
while current < end_ms:
|
||||
params = {"symbol": symbol, "startTime": current, "endTime": end_ms, "limit": 1000}
|
||||
async with session.get(f"{BASE}/fapi/v1/fundingRate", params=params) as resp:
|
||||
data = await resp.json()
|
||||
if not data or not isinstance(data, list):
|
||||
break
|
||||
all_data.extend(data)
|
||||
last_ts = int(data[-1]["fundingTime"])
|
||||
if last_ts <= current:
|
||||
break
|
||||
current = last_ts + 1
|
||||
await asyncio.sleep(0.1)
|
||||
if not all_data:
|
||||
return pd.DataFrame()
|
||||
df = pd.DataFrame(all_data)
|
||||
df["timestamp"] = pd.to_datetime(df["fundingTime"].astype(int), unit="ms", utc=True)
|
||||
df["funding_rate"] = df["fundingRate"].astype(float)
|
||||
return df[["timestamp", "funding_rate"]].drop_duplicates("timestamp").sort_values("timestamp")
|
||||
|
||||
|
||||
def run_backtest(df, feature_col, percentile, direction, hold_bars):
|
||||
threshold = df[feature_col].quantile(percentile / 100)
|
||||
trades = []
|
||||
i = 0
|
||||
while i < len(df) - hold_bars - 1:
|
||||
val = df.iloc[i][feature_col]
|
||||
if pd.isna(val):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
trigger = False
|
||||
if direction == "SHORT" and val >= threshold:
|
||||
trigger = True
|
||||
elif direction == "LONG" and val <= threshold:
|
||||
trigger = True
|
||||
|
||||
if trigger:
|
||||
entry_idx = i + 1
|
||||
exit_idx = i + 1 + hold_bars - 1
|
||||
if exit_idx >= len(df):
|
||||
break
|
||||
entry_price = df.iloc[entry_idx]["open"]
|
||||
exit_price = df.iloc[exit_idx]["close"]
|
||||
|
||||
if direction == "LONG":
|
||||
gross_return = (exit_price / entry_price) - 1
|
||||
else:
|
||||
gross_return = (entry_price / exit_price) - 1
|
||||
|
||||
fee = FEE_RATE * 2
|
||||
net_return = gross_return - fee
|
||||
|
||||
trades.append({
|
||||
"entry_time": df.iloc[entry_idx]["timestamp"],
|
||||
"exit_time": df.iloc[exit_idx]["timestamp"],
|
||||
"entry_price": entry_price,
|
||||
"exit_price": exit_price,
|
||||
"feature_val": val,
|
||||
"gross_return_bps": gross_return * 10000,
|
||||
"net_return_bps": net_return * 10000,
|
||||
})
|
||||
i = exit_idx + 1 # 포지션 종료 후 다음
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if not trades:
|
||||
return None
|
||||
|
||||
tdf = pd.DataFrame(trades)
|
||||
wins = tdf[tdf["net_return_bps"] > 0]["net_return_bps"]
|
||||
losses = tdf[tdf["net_return_bps"] <= 0]["net_return_bps"]
|
||||
|
||||
gross_profit = wins.sum() if len(wins) > 0 else 0
|
||||
gross_loss = abs(losses.sum()) if len(losses) > 0 else 0
|
||||
pf = gross_profit / gross_loss if gross_loss > 0 else float("inf") if gross_profit > 0 else 0
|
||||
|
||||
cum_pnl = tdf["net_return_bps"].cumsum()
|
||||
max_dd = (cum_pnl - cum_pnl.cummax()).min()
|
||||
|
||||
return {
|
||||
"trades": len(tdf),
|
||||
"wins": len(wins),
|
||||
"losses": len(losses),
|
||||
"win_rate": len(wins) / len(tdf) * 100,
|
||||
"pf": pf,
|
||||
"total_pnl_bps": tdf["net_return_bps"].sum(),
|
||||
"avg_pnl_bps": tdf["net_return_bps"].mean(),
|
||||
"max_dd_bps": max_dd,
|
||||
"threshold": threshold,
|
||||
"df_trades": tdf,
|
||||
}
|
||||
|
||||
|
||||
def confidence(n):
|
||||
if n < 20:
|
||||
return "🔴", "폐기"
|
||||
elif n < 50:
|
||||
return "🟡", "참고"
|
||||
else:
|
||||
return "🟢", "검토"
|
||||
|
||||
|
||||
async def main():
|
||||
print("=" * 80)
|
||||
print(" FR × OI 변화율 백테스트 — Phase 1: 12개 조합")
|
||||
print("=" * 80)
|
||||
|
||||
# 데이터 수집
|
||||
print("\n[1] 데이터 수집")
|
||||
df_kline = pd.read_parquet(DATA_DIR / "combined_15m.parquet")
|
||||
|
||||
end_dt = datetime.now(timezone.utc)
|
||||
oi_start_dt = end_dt - timedelta(days=29)
|
||||
oi_start_ms = int(oi_start_dt.replace(microsecond=0, second=0).timestamp()) * 1000
|
||||
fr_start_ms = oi_start_ms
|
||||
end_ms = int(end_dt.replace(microsecond=0, second=0).timestamp()) * 1000
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
print(" OI 수집...")
|
||||
oi_df = await fetch_oi_history(session, SYMBOL, oi_start_ms, end_ms)
|
||||
print(f" OI: {len(oi_df)} rows")
|
||||
print(" FR 수집...")
|
||||
fr_df = await fetch_funding_rate(session, SYMBOL, fr_start_ms, end_ms)
|
||||
print(f" FR: {len(fr_df)} rows")
|
||||
|
||||
# 병합
|
||||
print("\n[2] 데이터 병합")
|
||||
df = df_kline.loc[oi_start_dt:].copy().reset_index()
|
||||
print(f" Kline (29일): {len(df)} rows")
|
||||
|
||||
# OI 병합
|
||||
df = pd.merge_asof(df.sort_values("timestamp"), oi_df.sort_values("timestamp"),
|
||||
on="timestamp", direction="nearest", tolerance=pd.Timedelta(minutes=20))
|
||||
df["oi_pct_change_4"] = df["oi_value"].pct_change(4)
|
||||
|
||||
# FR 병합 (forward fill)
|
||||
df = pd.merge_asof(df.sort_values("timestamp"), fr_df.rename(columns={"funding_rate": "fr_api"}).sort_values("timestamp"),
|
||||
on="timestamp", direction="backward")
|
||||
|
||||
# 핵심 피처: FR × OI변화율(1h)
|
||||
df["fr_x_oi_1h"] = df["fr_api"] * df["oi_pct_change_4"]
|
||||
|
||||
valid = df.dropna(subset=["fr_x_oi_1h"])
|
||||
print(f" 유효 데이터: {len(valid)} rows")
|
||||
print(f" fr_x_oi_1h: mean={valid['fr_x_oi_1h'].mean():.8f}, std={valid['fr_x_oi_1h'].std():.8f}")
|
||||
|
||||
for p in [25, 50, 75]:
|
||||
v = valid["fr_x_oi_1h"].quantile(p / 100)
|
||||
print(f" P{p}: {v:.8f}")
|
||||
|
||||
# 12개 조합 백테스트
|
||||
print("\n[3] 12개 조합 백테스트")
|
||||
print("=" * 80)
|
||||
|
||||
combos = []
|
||||
for hold_label, hold_bars in [("1h", 4), ("4h", 16)]:
|
||||
for direction in ["SHORT", "LONG"]:
|
||||
for pct in [75, 50, 25]:
|
||||
desc_dir = "롱스퀴즈" if direction == "SHORT" else "숏스퀴즈"
|
||||
combos.append({
|
||||
"hold_label": hold_label,
|
||||
"hold_bars": hold_bars,
|
||||
"direction": direction,
|
||||
"percentile": pct,
|
||||
"desc": f"{direction} {hold_label} P{pct} ({desc_dir})",
|
||||
})
|
||||
|
||||
results = []
|
||||
for c in combos:
|
||||
r = run_backtest(valid.reset_index(drop=True), "fr_x_oi_1h",
|
||||
c["percentile"], c["direction"], c["hold_bars"])
|
||||
if r:
|
||||
r.update(c)
|
||||
else:
|
||||
r = {**c, "trades": 0, "wins": 0, "losses": 0, "win_rate": 0,
|
||||
"pf": 0, "total_pnl_bps": 0, "avg_pnl_bps": 0, "max_dd_bps": 0, "threshold": 0}
|
||||
results.append(r)
|
||||
|
||||
# 결과 테이블
|
||||
print(f"\n{'ID':>3} {'조합':<28} {'거래수':>6} {'승률':>7} {'PF':>7} {'PnL(bps)':>10} {'MaxDD':>10} {'신뢰도'}")
|
||||
print("-" * 90)
|
||||
|
||||
for i, r in enumerate(results, 1):
|
||||
emoji, label = confidence(r["trades"])
|
||||
pf_str = f"{r['pf']:.2f}" if r["pf"] != float("inf") else "INF"
|
||||
print(f"{i:>3} {r['desc']:<28} {r['trades']:>6} {r['win_rate']:>6.1f}% {pf_str:>7} "
|
||||
f"{r['total_pnl_bps']:>+10.1f} {r['max_dd_bps']:>10.1f} {emoji} {label}")
|
||||
|
||||
# 대칭성 검증
|
||||
print("\n" + "=" * 80)
|
||||
print(" [대칭성 검증]")
|
||||
print("=" * 80)
|
||||
|
||||
for hold_label in ["1h", "4h"]:
|
||||
shorts = [r for r in results if r["hold_label"] == hold_label and r["direction"] == "SHORT" and r["trades"] > 0]
|
||||
longs = [r for r in results if r["hold_label"] == hold_label and r["direction"] == "LONG" and r["trades"] > 0]
|
||||
|
||||
best_short = max(shorts, key=lambda x: x["pf"]) if shorts else None
|
||||
best_long = max(longs, key=lambda x: x["pf"]) if longs else None
|
||||
|
||||
print(f"\n [{hold_label} 보유]")
|
||||
if best_short:
|
||||
print(f" Best SHORT: {best_short['desc']} — PF={best_short['pf']:.2f}, {best_short['trades']}건")
|
||||
if best_long:
|
||||
print(f" Best LONG: {best_long['desc']} — PF={best_long['pf']:.2f}, {best_long['trades']}건")
|
||||
|
||||
if best_short and best_long:
|
||||
s_pf = best_short["pf"]
|
||||
l_pf = best_long["pf"]
|
||||
if s_pf > 1.5 and l_pf > 1.5:
|
||||
print(f" → Case 1: 양방향 생존 ✓ Phase 2 후보")
|
||||
elif (s_pf > 1.5 and l_pf < 0.5) or (l_pf > 1.5 and s_pf < 0.5):
|
||||
print(f" → Case 2: 한쪽만 성공 ✗ 시장 베타/우연")
|
||||
elif s_pf > 1.5 or l_pf > 1.5:
|
||||
print(f" → Case 3: 부분적 edge ~ 낮은 신뢰도")
|
||||
elif s_pf > 1.0 and l_pf > 1.0:
|
||||
print(f" → 양쪽 PF > 1.0이나 < 1.5 — 약한 edge")
|
||||
else:
|
||||
print(f" → 양쪽 모두 약함")
|
||||
|
||||
# 보유시간 비교
|
||||
print("\n" + "=" * 80)
|
||||
print(" [보유시간 비교]")
|
||||
print("=" * 80)
|
||||
|
||||
for direction in ["SHORT", "LONG"]:
|
||||
r_1h = [r for r in results if r["hold_label"] == "1h" and r["direction"] == direction and r["trades"] > 0]
|
||||
r_4h = [r for r in results if r["hold_label"] == "4h" and r["direction"] == direction and r["trades"] > 0]
|
||||
best_1h = max(r_1h, key=lambda x: x["pf"]) if r_1h else None
|
||||
best_4h = max(r_4h, key=lambda x: x["pf"]) if r_4h else None
|
||||
|
||||
print(f"\n [{direction}]")
|
||||
if best_1h:
|
||||
print(f" 1h Best: PF={best_1h['pf']:.2f} ({best_1h['desc']}, {best_1h['trades']}건)")
|
||||
if best_4h:
|
||||
print(f" 4h Best: PF={best_4h['pf']:.2f} ({best_4h['desc']}, {best_4h['trades']}건)")
|
||||
if best_1h and best_4h:
|
||||
if best_4h["pf"] > best_1h["pf"]:
|
||||
print(f" → 4h가 더 강함 (상관분석 r=-0.1734과 일치)")
|
||||
else:
|
||||
print(f" → 1h가 더 강함 (주의: 상관분석은 4h 기준)")
|
||||
|
||||
# 최종 판정
|
||||
print("\n" + "=" * 80)
|
||||
print(" [최종 판정]")
|
||||
print("=" * 80)
|
||||
|
||||
# Phase 2 후보 찾기
|
||||
phase2 = []
|
||||
for hold_label in ["4h", "1h"]:
|
||||
shorts = [r for r in results if r["hold_label"] == hold_label and r["direction"] == "SHORT" and r["trades"] >= 20]
|
||||
longs = [r for r in results if r["hold_label"] == hold_label and r["direction"] == "LONG" and r["trades"] >= 20]
|
||||
|
||||
best_s = max(shorts, key=lambda x: x["pf"]) if shorts else None
|
||||
best_l = max(longs, key=lambda x: x["pf"]) if longs else None
|
||||
|
||||
if best_s and best_l:
|
||||
if best_s["pf"] > 1.5 and best_l["pf"] > 1.5:
|
||||
phase2.append(("Case1", hold_label, best_s, best_l))
|
||||
elif best_s["pf"] > 1.5 or best_l["pf"] > 1.5:
|
||||
phase2.append(("Case3", hold_label, best_s, best_l))
|
||||
|
||||
if phase2:
|
||||
print(f"\n 🟢 Phase 2 후보 발견!")
|
||||
for case, hl, bs, bl in phase2:
|
||||
print(f" [{case}] {hl}: SHORT PF={bs['pf']:.2f}({bs['trades']}건), "
|
||||
f"LONG PF={bl['pf']:.2f}({bl['trades']}건)")
|
||||
print(f"\n → Phase 2 (Bot Simulation) 진행 권장")
|
||||
print(f" → 단, 29일 OI 데이터 + 448행 제한 감안")
|
||||
else:
|
||||
all_pf = [(r["desc"], r["pf"], r["trades"]) for r in results if r["trades"] > 0]
|
||||
all_pf.sort(key=lambda x: x[1], reverse=True)
|
||||
best = all_pf[0] if all_pf else ("N/A", 0, 0)
|
||||
|
||||
above_1 = [r for r in results if r["pf"] > 1.0 and r["trades"] >= 20]
|
||||
if above_1:
|
||||
print(f"\n 🟡 PF > 1.0 조합 존재 ({len(above_1)}개), 단 < 1.5")
|
||||
for r in sorted(above_1, key=lambda x: x["pf"], reverse=True):
|
||||
emoji, _ = confidence(r["trades"])
|
||||
print(f" {r['desc']}: PF={r['pf']:.2f}, {r['trades']}건 {emoji}")
|
||||
print(f"\n → 약한 edge. 4월 데이터 축적 후 재검증 권장.")
|
||||
else:
|
||||
print(f"\n 🔴 PF > 1.0 조합 없음 (20건 이상)")
|
||||
print(f" Best: {best[0]} (PF={best[1]:.2f}, {best[2]}건)")
|
||||
print(f"\n → FR × OI 시그널도 비용 후 edge 없음")
|
||||
|
||||
# Best 조합 상세
|
||||
valid_results = [r for r in results if r["trades"] > 10 and "df_trades" in r]
|
||||
if valid_results:
|
||||
best_r = max(valid_results, key=lambda x: x["pf"])
|
||||
print(f"\n[참고] Best 조합 상세: {best_r['desc']}")
|
||||
print("-" * 60)
|
||||
tdf = best_r["df_trades"]
|
||||
print(f" 기간: {tdf['entry_time'].min()} ~ {tdf['exit_time'].max()}")
|
||||
print(f" 평균 피처값: {tdf['feature_val'].mean():.8f}")
|
||||
w = tdf[tdf["net_return_bps"] > 0]
|
||||
l = tdf[tdf["net_return_bps"] <= 0]
|
||||
if len(w) > 0:
|
||||
print(f" 수익 거래 평균: {w['net_return_bps'].mean():.1f} bps ({len(w)}건)")
|
||||
if len(l) > 0:
|
||||
print(f" 손실 거래 평균: {l['net_return_bps'].mean():.1f} bps ({len(l)}건)")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(" 분석 완료.")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user