feat: implement BTC/ETH correlation features for improved model accuracy

- Added a new design document outlining the integration of BTC/ETH candle data as additional features in the XRP ML filter, enhancing prediction accuracy.
- Introduced `MultiSymbolStream` for combined WebSocket data retrieval of XRP, BTC, and ETH.
- Expanded feature set from 13 to 21 by including 8 new BTC/ETH-related features.
- Updated various scripts and modules to support the new feature set and data handling.
- Enhanced training and deployment scripts to accommodate the new dataset structure.

This commit lays the groundwork for improved model performance by leveraging the correlation between BTC and ETH with XRP.
This commit is contained in:
21in7
2026-03-01 19:30:17 +09:00
parent c4062c39d3
commit d1af736bfc
15 changed files with 1448 additions and 68 deletions

View File

@@ -35,6 +35,19 @@ def sample_df():
})
def test_bot_uses_multi_symbol_stream(config):
from src.data_stream import MultiSymbolStream
with patch("src.bot.BinanceFuturesClient"):
bot = TradingBot(config)
assert isinstance(bot.stream, MultiSymbolStream)
def test_bot_stream_has_btc_eth_buffers(config):
with patch("src.bot.BinanceFuturesClient"):
bot = TradingBot(config)
assert "btcusdt" in bot.stream.buffers
assert "ethusdt" in bot.stream.buffers
@pytest.mark.asyncio
async def test_bot_processes_signal(config, sample_df):
with patch("src.bot.BinanceFuturesClient") as MockExchange:

View File

@@ -2,6 +2,43 @@ import pytest
import asyncio
from unittest.mock import AsyncMock, patch, MagicMock
from src.data_stream import KlineStream
from src.data_stream import MultiSymbolStream
def test_multi_symbol_stream_has_three_buffers():
stream = MultiSymbolStream(
symbols=["XRPUSDT", "BTCUSDT", "ETHUSDT"],
interval="1m",
)
assert "xrpusdt" in stream.buffers
assert "btcusdt" in stream.buffers
assert "ethusdt" in stream.buffers
def test_multi_symbol_stream_get_dataframe_returns_none_when_empty():
stream = MultiSymbolStream(
symbols=["XRPUSDT", "BTCUSDT", "ETHUSDT"],
interval="1m",
)
assert stream.get_dataframe("XRPUSDT") is None
def test_multi_symbol_stream_get_dataframe_returns_df_when_full():
import pandas as pd
stream = MultiSymbolStream(
symbols=["XRPUSDT", "BTCUSDT", "ETHUSDT"],
interval="1m",
buffer_size=200,
)
candle = {
"timestamp": 1000, "open": 1.0, "high": 1.1,
"low": 0.9, "close": 1.05, "volume": 100.0, "is_closed": True,
}
for i in range(50):
c = candle.copy()
c["timestamp"] = 1000 + i
stream.buffers["xrpusdt"].append(c)
df = stream.get_dataframe("XRPUSDT")
assert df is not None
assert len(df) == 50
@pytest.mark.asyncio

View File

@@ -29,12 +29,16 @@ def test_returns_dataframe(sample_df):
def test_has_required_columns(sample_df):
"""FEATURE_COLS + label 컬럼이 모두 있어야 한다."""
from src.ml_features import FEATURE_COLS
"""기본 13개 피처 + label 컬럼이 모두 있어야 한다."""
BASE_FEATURE_COLS = [
"rsi", "macd_hist", "bb_pct", "ema_align",
"stoch_k", "stoch_d", "atr_pct", "vol_ratio",
"ret_1", "ret_3", "ret_5", "signal_strength", "side",
]
result = generate_dataset_vectorized(sample_df)
if len(result) > 0:
assert "label" in result.columns
for col in FEATURE_COLS:
for col in BASE_FEATURE_COLS:
assert col in result.columns, f"컬럼 없음: {col}"
@@ -45,6 +49,30 @@ def test_label_is_binary(sample_df):
assert set(result["label"].unique()).issubset({0, 1})
def test_generate_dataset_vectorized_with_btc_eth_has_21_feature_cols():
"""BTC/ETH DataFrame을 전달하면 결과 컬럼이 21개 피처 + label이어야 한다."""
import pandas as pd
import numpy as np
from src.dataset_builder import generate_dataset_vectorized
from src.ml_features import FEATURE_COLS
np.random.seed(42)
n = 500
closes = np.cumprod(1 + np.random.randn(n) * 0.001) * 1.0
xrp_df = pd.DataFrame({
"open": closes * 0.999, "high": closes * 1.005,
"low": closes * 0.995, "close": closes,
"volume": np.random.rand(n) * 1000 + 500,
})
btc_df = xrp_df.copy() * 50000
eth_df = xrp_df.copy() * 3000
result = generate_dataset_vectorized(xrp_df, btc_df=btc_df, eth_df=eth_df)
if not result.empty:
assert set(FEATURE_COLS).issubset(set(result.columns))
assert len(result.columns) == len(FEATURE_COLS) + 1 # +1 for label
def test_matches_original_generate_dataset(sample_df):
"""벡터화 버전과 기존 버전의 샘플 수가 유사해야 한다.

View File

@@ -4,6 +4,56 @@ import pytest
from src.ml_features import build_features, FEATURE_COLS
def _make_df(n=10, base_price=1.0):
"""테스트용 더미 캔들 DataFrame 생성."""
closes = [base_price * (1 + i * 0.001) for i in range(n)]
return pd.DataFrame({
"close": closes, "high": [c * 1.01 for c in closes],
"low": [c * 0.99 for c in closes],
"volume": [1000.0] * n,
"rsi": [50.0] * n, "macd": [0.0] * n, "macd_signal": [0.0] * n,
"macd_hist": [0.0] * n, "bb_upper": [c * 1.02 for c in closes],
"bb_lower": [c * 0.98 for c in closes], "ema9": closes,
"ema21": closes, "ema50": closes, "atr": [0.01] * n,
"stoch_k": [50.0] * n, "stoch_d": [50.0] * n,
"vol_ma20": [1000.0] * n,
})
def test_build_features_with_btc_eth_has_21_features():
xrp_df = _make_df(10, base_price=1.0)
btc_df = _make_df(10, base_price=50000.0)
eth_df = _make_df(10, base_price=3000.0)
features = build_features(xrp_df, "LONG", btc_df=btc_df, eth_df=eth_df)
assert len(features) == 21
def test_build_features_without_btc_eth_has_13_features():
xrp_df = _make_df(10, base_price=1.0)
features = build_features(xrp_df, "LONG")
assert len(features) == 13
def test_build_features_btc_ret_1_correct():
xrp_df = _make_df(10, base_price=1.0)
btc_df = _make_df(10, base_price=50000.0)
eth_df = _make_df(10, base_price=3000.0)
features = build_features(xrp_df, "LONG", btc_df=btc_df, eth_df=eth_df)
btc_closes = btc_df["close"]
expected_btc_ret_1 = (btc_closes.iloc[-1] - btc_closes.iloc[-2]) / btc_closes.iloc[-2]
assert abs(features["btc_ret_1"] - expected_btc_ret_1) < 1e-6
def test_build_features_rs_zero_when_btc_ret_zero():
xrp_df = _make_df(10, base_price=1.0)
btc_df = _make_df(10, base_price=50000.0)
btc_df["close"] = 50000.0 # 모든 캔들 동일
eth_df = _make_df(10, base_price=3000.0)
features = build_features(xrp_df, "LONG", btc_df=btc_df, eth_df=eth_df)
assert features["xrp_btc_rs"] == 0.0
def test_feature_cols_has_21_items():
from src.ml_features import FEATURE_COLS
assert len(FEATURE_COLS) == 21
def make_df(n=100):
"""테스트용 최소 DataFrame 생성"""
np.random.seed(42)
@@ -27,13 +77,19 @@ def test_build_features_returns_series():
assert isinstance(features, pd.Series)
BASE_FEATURE_COLS = [
"rsi", "macd_hist", "bb_pct", "ema_align",
"stoch_k", "stoch_d", "atr_pct", "vol_ratio",
"ret_1", "ret_3", "ret_5", "signal_strength", "side",
]
def test_build_features_has_all_cols():
from src.indicators import Indicators
df = make_df(100)
ind = Indicators(df)
df_ind = ind.calculate_all()
features = build_features(df_ind, signal="LONG")
for col in FEATURE_COLS:
for col in BASE_FEATURE_COLS:
assert col in features.index, f"피처 누락: {col}"