Add ai_planner.py - AI 행동 계획 모듈

2026-03-25 10:24:02 +09:00
parent 8858d52b1c
commit 7f7034bba2
1 changed files with 202 additions and 0 deletions
--- a/ai_planner.py
+++ b/ai_planner.py
@@ -0,0 +1,202 @@
+"""
+ai_planner.py — 완전 자율 에이전트 버전
+
+핵심 변경사항:
+  - AI가 한 번에 여러 행동을 계획 (프로젝트 단위)
+  - 각 행동 결과(성공/실패/이유)를 AI에게 피드백
+  - AI가 실패 원인을 분석하고 다른 방법으로 재시도
+  - 삽입기, 전력선, 파이프, 조립기 레시피 등 전체 행동 지원
+"""
+import json
+import os
+import urllib.request
+import urllib.error
+
+
+GLM_API_URL = "https://api.z.ai/api/coding/paas/v4/chat/completions"
+GLM_MODEL   = "GLM-4.7"
+
+
+SYSTEM_PROMPT = """당신은 팩토리오 게임을 완전 자율적으로 플레이하는 AI 에이전트입니다.
+게임 상태와 이전 행동 결과를 분석해서 스스로 판단하고 계획을 세웁니다.
+
+## 당신의 역할
+- 게임 상태를 보고 지금 가장 중요한 일이 무엇인지 스스로 판단
+- 그 일을 완료하기 위한 행동 시퀀스 계획 (최대 8개)
+- 이전 행동이 실패했다면 왜 실패했는지 분석하고 다른 방법 시도
+- 단기 행동과 장기 목표를 항상 연결해서 생각
+
+## 팩토리오 자동화 핵심 지식
+- 채굴기 출력 → inserter → 벨트 → inserter → 제련소/조립기 순서
+- 제련소/보일러/조립기는 연료 또는 전력 필요
+- 전력: offshore-pump → pipe → boiler → steam-engine → small-electric-pole
+- 자동화 연구팩: iron-gear-wheel + iron-plate → assembling-machine으로 생산
+- 건물 배치 전 반드시 인벤토리에 해당 아이템 존재 확인
+
+## 응답 형식 — JSON만 반환, 다른 텍스트 절대 금지
+{
+  "thinking": "현재 상태 분석 및 판단 과정 (자유롭게 서술)",
+  "current_goal": "지금 달성하려는 목표",
+  "actions": [
+    {"action": "행동유형", "params": {...}, "reason": "이 행동이 필요한 이유"},
+    최대 8개
+  ],
+  "after_this": "이 시퀀스 완료 후 다음 계획"
+}
+
+## 전체 action 목록
+
+### 이동/채굴
+- "move"               → {"x": int, "y": int}
+- "mine_resource"      → {"ore": "iron-ore", "count": int}
+
+### 제작
+- "craft_item"         → {"item": str, "count": int}
+  가능한 item: stone-furnace, burner-mining-drill, transport-belt,
+               burner-inserter, inserter, small-electric-pole,
+               medium-electric-pole, pipe, offshore-pump, boiler,
+               steam-engine, assembling-machine-1, iron-gear-wheel, lab
+
+### 건물 배치
+- "place_entity"       → {"name": "burner-mining-drill", "x": int, "y": int, "direction": "north|south|east|west"}
+  배치 가능: burner-mining-drill, electric-mining-drill, stone-furnace,
+             burner-inserter, inserter, fast-inserter, transport-belt,
+             underground-belt, splitter, small-electric-pole,
+             medium-electric-pole, pipe, pipe-to-ground, offshore-pump,
+             boiler, steam-engine, assembling-machine-1, lab, chest
+
+### 벨트 라인 (자동 경로)
+- "place_belt_line"    → {"from_x": int, "from_y": int, "to_x": int, "to_y": int}
+
+### 연료/아이템 삽입
+- "insert_to_entity"   → {"x": int, "y": int, "item": "coal", "count": int}
+
+### 조립기 레시피 설정
+- "set_recipe"         → {"x": int, "y": int, "recipe": "automation-science-pack"}
+
+### 연구
+- "start_research"     → {"tech": "automation"}
+
+### 대기
+- "wait"               → {"seconds": int}
+"""
+
+
+class AIPlanner:
+    def __init__(self):
+        self.api_key = os.environ.get("ZAI_API_KEY", "")
+        if not self.api_key:
+            raise ValueError("ZAI_API_KEY 환경변수를 설정하세요.")
+
+        self.step = 0
+        self.feedback_log: list[dict] = []
+        self.long_term_goal = (
+            "완전 자동화 달성: "
+            "철/구리 채굴→제련 자동화 → 전력 구축 → automation 연구 → "
+            "빨간 과학팩 자동 생산 → lab에서 연구"
+        )
+
+    def decide(self, state_summary: str) -> list[dict]:
+        """
+        게임 상태를 받아 GLM이 스스로 생각하고
+        실행할 행동 시퀀스(여러 개)를 반환
+        """
+        self.step += 1
+        feedback_text = self._format_feedback()
+
+        user_message = (
+            f"## 스텝 {self.step}\n\n"
+            f"### 현재 게임 상태\n{state_summary}\n\n"
+            f"{feedback_text}"
+            f"### 장기 목표\n{self.long_term_goal}\n\n"
+            "현재 상태를 분석하고, 장기 목표를 향해 지금 해야 할 행동 시퀀스를 계획하세요.\n"
+            "반드시 JSON만 반환하세요."
+        )
+
+        print(f"\n[GLM] 생각 중...")
+        raw  = self._call_glm(user_message)
+        plan = self._parse_json(raw)
+
+        thinking = plan.get("thinking", "")
+        if thinking:
+            print(f"\n🧠 AI 판단:\n{thinking}\n")
+
+        print(f"🎯 현재 목표: {plan.get('current_goal', '')}")
+        print(f"📋 완료 후:   {plan.get('after_this', '')}")
+
+        actions = plan.get("actions", [])
+        print(f"⚡ {len(actions)}개 행동 계획됨")
+        return actions
+
+    def record_feedback(self, action: dict, success: bool, message: str = ""):
+        """행동 결과를 기록 (다음 판단에 활용)"""
+        self.feedback_log.append({
+            "action":  action.get("action", ""),
+            "params":  action.get("params", {}),
+            "success": success,
+            "message": message,
+        })
+        if len(self.feedback_log) > 15:
+            self.feedback_log.pop(0)
+
+    def _format_feedback(self) -> str:
+        if not self.feedback_log:
+            return ""
+        lines = ["### 이전 행동 결과 (성공/실패)\n"]
+        for fb in self.feedback_log[-8:]:
+            status = "✅" if fb["success"] else "❌"
+            msg    = f" — {fb['message']}" if fb["message"] else ""
+            lines.append(
+                f"  {status} {fb['action']} "
+                f"{json.dumps(fb['params'], ensure_ascii=False)}{msg}"
+            )
+        return "\n".join(lines) + "\n\n"
+
+    def _call_glm(self, user_message: str) -> str:
+        payload = json.dumps({
+            "model":       GLM_MODEL,
+            "messages":    [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user",   "content": user_message},
+            ],
+            "temperature": 0.3,
+            "max_tokens":  1200,
+        }).encode("utf-8")
+
+        req = urllib.request.Request(
+            GLM_API_URL,
+            data    = payload,
+            headers = {
+                "Content-Type":  "application/json",
+                "Authorization": f"Bearer {self.api_key}",
+            },
+            method = "POST",
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=90) as resp:
+                data = json.loads(resp.read().decode("utf-8"))
+                return data["choices"][0]["message"]["content"].strip()
+        except urllib.error.HTTPError as e:
+            raise ConnectionError(f"GLM API 오류 {e.code}: {e.read().decode()}")
+
+    def _parse_json(self, raw: str) -> dict:
+        text = raw.strip()
+        if "<think>" in text:
+            text = text.split("</think>")[-1].strip()
+        if text.startswith("```"):
+            text = "\n".join(
+                l for l in text.splitlines()
+                if not l.strip().startswith("```")
+            ).strip()
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError:
+            s, e = text.find("{"), text.rfind("}") + 1
+            if s != -1 and e > s:
+                return json.loads(text[s:e])
+            raise ValueError(f"JSON 파싱 실패:\n{raw[:400]}")
+
+    def set_goal(self, goal: str):
+        self.long_term_goal = goal
+        self.feedback_log.clear()
+        print(f"[AI] 새 목표: {goal}")