feat: enhance model training and deployment scripts with time-weighted sampling

- Updated `train_model.py` and `train_mlx_model.py` to include a time weight decay parameter for improved sample weighting during training. - Modified dataset generation to incorporate sample weights based on time decay, enhancing model performance. - Adjusted deployment scripts to support new backend options and improved error handling for model file transfers. - Added new entries to the training log for better tracking of model performance metrics over time. - Included ONNX model export functionality in the MLX filter for compatibility with Linux servers.
2026-03-01 21:25:06 +09:00
parent 301457ce57
commit db144750a3
10 changed files with 324 additions and 97 deletions
--- a/src/mlx_filter.py
+++ b/src/mlx_filter.py
@@ -1,6 +1,7 @@
 """
 Apple MLX 기반 경량 신경망 필터.
 M4의 통합 GPU를 자동으로 활용한다.
+학습 후 ONNX로 export해 Linux 서버에서 onnxruntime으로 추론한다.
 """
 import numpy as np
 import pandas as pd
@@ -12,6 +13,83 @@ from pathlib import Path
 from src.ml_features import FEATURE_COLS


+def _export_onnx(
+    weights_npz: Path,
+    meta_npz: Path,
+    onnx_path: Path,
+) -> None:
+    """
+    MLX 가중치(.npz)를 읽어 ONNX 그래프로 변환한다.
+    네트워크 구조: fc1(ReLU) → dropout(추론 시 비활성) → fc2(ReLU) → fc3 → sigmoid
+    """
+    import onnx
+    from onnx import helper, TensorProto, numpy_helper
+
+    meta = np.load(meta_npz)
+    mean: np.ndarray = meta["mean"].astype(np.float32)
+    std: np.ndarray  = meta["std"].astype(np.float32)
+    input_dim  = int(meta["input_dim"])
+    hidden_dim = int(meta["hidden_dim"])
+
+    w = np.load(weights_npz)
+    # MLX save_weights 키 패턴: fc1.weight, fc1.bias, ...
+    fc1_w = w["fc1.weight"].astype(np.float32)   # (hidden, input)
+    fc1_b = w["fc1.bias"].astype(np.float32)
+    fc2_w = w["fc2.weight"].astype(np.float32)   # (hidden//2, hidden)
+    fc2_b = w["fc2.bias"].astype(np.float32)
+    fc3_w = w["fc3.weight"].astype(np.float32)   # (1, hidden//2)
+    fc3_b = w["fc3.bias"].astype(np.float32)
+
+    def _t(name: str, arr: np.ndarray) -> onnx.TensorProto:
+        return numpy_helper.from_array(arr, name=name)
+
+    initializers = [
+        _t("mean",  mean),
+        _t("std",   std),
+        _t("fc1_w", fc1_w),
+        _t("fc1_b", fc1_b),
+        _t("fc2_w", fc2_w),
+        _t("fc2_b", fc2_b),
+        _t("fc3_w", fc3_w),
+        _t("fc3_b", fc3_b),
+    ]
+
+    nodes = [
+        # 정규화: (x - mean) / std
+        helper.make_node("Sub",     ["X", "mean"],      ["x_sub"]),
+        helper.make_node("Div",     ["x_sub", "std"],   ["x_norm"]),
+        # fc1: x_norm @ fc1_w.T + fc1_b
+        helper.make_node("Gemm",    ["x_norm", "fc1_w", "fc1_b"], ["fc1_out"],
+                         transB=1),
+        helper.make_node("Relu",    ["fc1_out"],         ["relu1"]),
+        # fc2: relu1 @ fc2_w.T + fc2_b
+        helper.make_node("Gemm",    ["relu1",  "fc2_w", "fc2_b"], ["fc2_out"],
+                         transB=1),
+        helper.make_node("Relu",    ["fc2_out"],         ["relu2"]),
+        # fc3: relu2 @ fc3_w.T + fc3_b  → (N, 1)
+        helper.make_node("Gemm",    ["relu2",  "fc3_w", "fc3_b"], ["logits"],
+                         transB=1),
+        # sigmoid → (N, 1)
+        helper.make_node("Sigmoid", ["logits"],          ["proba_2d"]),
+        # squeeze: (N, 1) → (N,)
+        helper.make_node("Flatten", ["proba_2d"],        ["proba"], axis=0),
+    ]
+
+    graph = helper.make_graph(
+        nodes,
+        "mlx_filter",
+        inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, [None, input_dim])],
+        outputs=[helper.make_tensor_value_info("proba", TensorProto.FLOAT, [None])],
+        initializer=initializers,
+    )
+    model_proto = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 17)])
+    model_proto.ir_version = 8
+    onnx.checker.check_model(model_proto)
+    onnx_path.parent.mkdir(exist_ok=True)
+    onnx.save(model_proto, str(onnx_path))
+    print(f"  ONNX export 완료: {onnx_path}")
+
+
 class _Net(nn.Module):
    """3층 MLP 이진 분류기."""

@@ -53,7 +131,12 @@ class MLXFilter:
        self._std: np.ndarray | None = None
        self._trained = False

-    def fit(self, X: pd.DataFrame, y: pd.Series) -> "MLXFilter":
+    def fit(
+        self,
+        X: pd.DataFrame,
+        y: pd.Series,
+        sample_weight: np.ndarray | None = None,
+    ) -> "MLXFilter":
        X_np = X[FEATURE_COLS].values.astype(np.float32)
        y_np = y.values.astype(np.float32)

@@ -61,11 +144,20 @@ class MLXFilter:
        self._std = X_np.std(axis=0) + 1e-8
        X_np = (X_np - self._mean) / self._std

+        w_np = sample_weight.astype(np.float32) if sample_weight is not None else None
+
        optimizer = optim.Adam(learning_rate=self.lr)

-        def loss_fn(model: _Net, x: mx.array, y: mx.array) -> mx.array:
+        def loss_fn(
+            model: _Net, x: mx.array, y: mx.array, w: mx.array | None
+        ) -> mx.array:
            logits = model(x)
-            return nn.losses.binary_cross_entropy(logits, y, with_logits=True)
+            per_sample = nn.losses.binary_cross_entropy(
+                logits, y, with_logits=True, reduction="none"
+            )
+            if w is not None:
+                return (per_sample * w).sum() / w.sum()
+            return per_sample.mean()

        loss_and_grad = nn.value_and_grad(self._model, loss_fn)

@@ -78,7 +170,8 @@ class MLXFilter:
                batch_idx = idx[start : start + self.batch_size]
                x_batch = mx.array(X_np[batch_idx])
                y_batch = mx.array(y_np[batch_idx])
-                loss, grads = loss_and_grad(self._model, x_batch, y_batch)
+                w_batch = mx.array(w_np[batch_idx]) if w_np is not None else None
+                loss, grads = loss_and_grad(self._model, x_batch, y_batch, w_batch)
                optimizer.update(self._model, grads)
                mx.eval(self._model.parameters(), optimizer.state)
                epoch_loss += loss.item()
@@ -114,6 +207,12 @@ class MLXFilter:
            input_dim=np.array(self.input_dim),
            hidden_dim=np.array(self.hidden_dim),
        )
+        # ONNX export: Linux 서버에서 onnxruntime으로 추론하기 위해 변환
+        try:
+            onnx_path = path.with_suffix(".onnx")
+            _export_onnx(weights_path, meta_path, onnx_path)
+        except ImportError:
+            print("  [경고] onnx 패키지 없음 → ONNX export 생략 (pip install onnx)")

    @classmethod
    def load(cls, path: str | Path) -> "MLXFilter":