feat: enhance Jenkins pipeline with Discord notifications and model hot-reload functionality

- Added a new stage to the Jenkins pipeline to notify Discord when a build starts, succeeds, or fails, improving communication during the CI/CD process. - Implemented model hot-reload functionality in the MLFilter class, allowing automatic reloading of models when file changes are detected, enhancing responsiveness to updates. - Updated deployment scripts to provide clearer messaging regarding model loading and container status, improving user experience and debugging capabilities.
2026-03-01 21:46:36 +09:00
parent d9238afaf9
commit c6428af64e
4 changed files with 102 additions and 12 deletions
--- a/28
+++ b/28
@@ -7,9 +7,24 @@ pipeline {
        IMAGE_TAG     = "${env.BUILD_NUMBER}"
        FULL_IMAGE    = "${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}"
        LATEST_IMAGE  = "${REGISTRY}/${IMAGE_NAME}:latest"
+        
+        // 젠킨스 자격 증명에 저장해둔 디스코드 웹훅 주소를 불러옵니다.
+        DISCORD_WEBHOOK = credentials('discord-webhook')
    }

    stages {
+        // 빌드가 시작되자마자 알림을 보냅니다.
+        stage('Notify Build Start') {
+            steps {
+                sh """
+                curl -H "Content-Type: application/json" \
+                     -X POST \
+                     -d '{"content": "🚀 **[빌드 시작]** `cointrader` (Build #${env.BUILD_NUMBER}) 배포 파이프라인 가동"}' \
+                     ${DISCORD_WEBHOOK}
+                """
+            }
+        }
+
        stage('Git Clone from Gitea') {
            steps {
                git branch: 'main',
@@ -55,12 +70,25 @@ pipeline {
        }
    }

+    // 파이프라인 결과에 따른 디스코드 알림
    post {
        success {
            echo "Build #${env.BUILD_NUMBER} 성공: ${FULL_IMAGE} → 운영 LXC(10.1.10.24) 배포 완료"
+            sh """
+            curl -H "Content-Type: application/json" \
+                 -X POST \
+                 -d '{"content": "✅ **[배포 성공]** `cointrader` (Build #${env.BUILD_NUMBER}) 운영 서버(10.1.10.24) 배포 완료!\\n- 📦 이미지: `${FULL_IMAGE}`"}' \
+                 ${DISCORD_WEBHOOK}
+            """
        }
        failure {
            echo "Build #${env.BUILD_NUMBER} 실패"
+            sh """
+            curl -H "Content-Type: application/json" \
+                 -X POST \
+                 -d '{"content": "❌ **[배포 실패]** `cointrader` (Build #${env.BUILD_NUMBER}) 파이프라인 에러 발생. 젠킨스 로그를 확인해 주세요!"}' \
+                 ${DISCORD_WEBHOOK}
+            """
        }
    }
 }
--- a/scripts/deploy_model.sh
+++ b/scripts/deploy_model.sh
@@ -16,7 +16,6 @@ LOCAL_LOG="models/training_log.json"
 # ── 백엔드별 파일 목록 설정 ──────────────────────────────────────────────────
 # mlx: ONNX 파일만 전송 (Linux 서버는 onnxruntime으로 추론)
 # lgbm: pkl 파일 전송
-RELOAD_CMD="from src.ml_filter import MLFilter; f=MLFilter(); f.reload_model(); print('리로드 완료')"
 if [ "$BACKEND" = "mlx" ]; then
  LOCAL_FILES=("models/mlx_filter.weights.onnx")
 else
@@ -68,11 +67,12 @@ fi
 echo "=== 전송 완료 ==="
 echo ""

-# ── 핫리로드 ─────────────────────────────────────────────────────────────────
-echo "=== 핫리로드 시도 ==="
+# ── 핫리로드 안내 ────────────────────────────────────────────────────────────
+# 봇이 캔들마다 모델 파일 mtime을 감지해 자동 리로드한다.
+# 컨테이너가 실행 중이면 다음 캔들(최대 1분) 안에 자동 적용된다.
+echo "=== 모델 전송 완료 — 봇이 다음 캔들에서 자동 리로드합니다 ==="
 if ssh "${LXC_HOST}" "docker inspect -f '{{.State.Running}}' cointrader 2>/dev/null | grep -q true"; then
-  ssh "${LXC_HOST}" "docker exec cointrader python -c \"${RELOAD_CMD}\""
-  echo "=== 핫리로드 완료 ==="
+  echo "  컨테이너 실행 중: 다음 캔들 마감 시 자동 핫리로드 예정"
 else
-  echo "  cointrader 컨테이너가 실행 중이 아닙니다. 건너뜁니다."
+  echo "  cointrader 컨테이너가 실행 중이 아닙니다."
 fi
--- a/src/bot.py
+++ b/src/bot.py
@@ -50,6 +50,8 @@ class TradingBot:
            logger.info("기존 포지션 없음 - 신규 진입 대기")

    async def process_candle(self, df, btc_df=None, eth_df=None):
+        self.ml_filter.check_and_reload()
+
        if not self.risk.is_trading_allowed():
            logger.warning("리스크 한도 초과 - 거래 중단")
            return
--- a/src/ml_filter.py
+++ b/src/ml_filter.py
@@ -10,12 +10,22 @@ ONNX_MODEL_PATH = Path("models/mlx_filter.weights.onnx")
 LGBM_MODEL_PATH = Path("models/lgbm_filter.pkl")


+def _mtime(path: Path) -> float:
+    """파일이 없으면 0.0 반환."""
+    try:
+        return path.stat().st_mtime
+    except FileNotFoundError:
+        return 0.0
+
+
 class MLFilter:
    """
    ML 필터. ONNX(MLX 신경망) 우선 로드, 없으면 LightGBM으로 폴백한다.
    둘 다 없으면 항상 진입을 허용한다.

    우선순위: ONNX > LightGBM > 폴백(항상 허용)
+
+    check_and_reload()를 주기적으로 호출하면 모델 파일 변경 시 자동 리로드된다.
    """

    def __init__(
@@ -29,6 +39,8 @@ class MLFilter:
        self._threshold = threshold
        self._onnx_session = None
        self._lgbm_model = None
+        self._loaded_onnx_mtime: float = 0.0
+        self._loaded_lgbm_mtime: float = 0.0
        self._try_load()

    def _try_load(self):
@@ -41,7 +53,12 @@ class MLFilter:
                    providers=["CPUExecutionProvider"],
                )
                self._lgbm_model = None
-                logger.info(f"ML 필터 ONNX 모델 로드 완료: {self._onnx_path}")
+                self._loaded_onnx_mtime = _mtime(self._onnx_path)
+                self._loaded_lgbm_mtime = 0.0
+                logger.info(
+                    f"ML 필터 로드: ONNX ({self._onnx_path}) "
+                    f"| 임계값={self._threshold}"
+                )
                return
            except Exception as e:
                logger.warning(f"ONNX 모델 로드 실패: {e}")
@@ -51,14 +68,51 @@ class MLFilter:
        if self._lgbm_path.exists():
            try:
                self._lgbm_model = joblib.load(self._lgbm_path)
-                logger.info(f"ML 필터 LightGBM 모델 로드 완료: {self._lgbm_path}")
+                self._loaded_lgbm_mtime = _mtime(self._lgbm_path)
+                self._loaded_onnx_mtime = 0.0
+                logger.info(
+                    f"ML 필터 로드: LightGBM ({self._lgbm_path}) "
+                    f"| 임계값={self._threshold}"
+                )
            except Exception as e:
                logger.warning(f"LightGBM 모델 로드 실패: {e}")
                self._lgbm_model = None
+        else:
+            logger.warning("ML 필터: 모델 파일 없음 → 모든 신호 허용 (폴백)")

    def is_model_loaded(self) -> bool:
        return self._onnx_session is not None or self._lgbm_model is not None

+    @property
+    def active_backend(self) -> str:
+        if self._onnx_session is not None:
+            return "ONNX"
+        if self._lgbm_model is not None:
+            return "LightGBM"
+        return "폴백(없음)"
+
+    def check_and_reload(self) -> bool:
+        """
+        모델 파일의 mtime을 확인해 변경됐으면 리로드한다.
+        실제로 리로드가 일어났으면 True 반환.
+        """
+        onnx_changed = _mtime(self._onnx_path) != self._loaded_onnx_mtime
+        lgbm_changed = _mtime(self._lgbm_path) != self._loaded_lgbm_mtime
+
+        if onnx_changed or lgbm_changed:
+            changed_files = []
+            if onnx_changed:
+                changed_files.append(str(self._onnx_path))
+            if lgbm_changed:
+                changed_files.append(str(self._lgbm_path))
+            logger.info(f"ML 필터: 모델 파일 변경 감지 → 리로드 ({', '.join(changed_files)})")
+            self._onnx_session = None
+            self._lgbm_model = None
+            self._try_load()
+            logger.info(f"ML 필터 핫리로드 완료: 백엔드={self.active_backend}")
+            return True
+        return False
+
    def should_enter(self, features: pd.Series) -> bool:
        """
        확률 >= threshold 이면 True (진입 허용).
@@ -74,15 +128,21 @@ class MLFilter:
            else:
                X = features.to_frame().T
                proba = float(self._lgbm_model.predict_proba(X)[0][1])
-            logger.debug(f"ML 필터 확률: {proba:.3f} (임계값: {self._threshold})")
+            logger.debug(
+                f"ML 필터 [{self.active_backend}] 확률: {proba:.3f} "
+                f"(임계값: {self._threshold})"
+            )
            return bool(proba >= self._threshold)
        except Exception as e:
            logger.warning(f"ML 필터 예측 오류 (폴백 허용): {e}")
            return True

    def reload_model(self):
-        """재학습 후 모델을 핫 리로드한다."""
+        """외부에서 강제 리로드할 때 사용 (하위 호환)."""
+        prev_backend = self.active_backend
        self._onnx_session = None
        self._lgbm_model = None
        self._try_load()
-        logger.info("ML 필터 모델 리로드 완료")
+        logger.info(
+            f"ML 필터 강제 리로드 완료: {prev_backend} → {self.active_backend}"
+        )