factorio-ai-agent/ai_planner.py

"""
ai_planner.py — 순수 AI 플레이 버전

핵심 변경사항 (치트 모드 대비):
  - 이동에 실제 시간이 걸림 → 불필요한 장거리 이동 최소화
  - 채굴은 자원 패치 근처에서만 가능 → 반드시 move 후 mine
  - 제작은 재료가 인벤토리에 있어야 함 → 재료 확보 순서 중요
  - 건설은 건설 거리 내에서만 가능 → 배치 전 move 필수
  - AI가 이 제약을 이해하고 행동 순서를 계획해야 함

JSON 파싱 강화:
  - GLM 응답이 잘리거나 마크다운으로 감싸져도 복구
  - 최대 2회 재시도
"""
import json
import os
import re
import time
import urllib.request
import urllib.error


GLM_API_URL = "https://api.z.ai/api/coding/paas/v4/chat/completions"
GLM_MODEL   = "GLM-4.7"


SYSTEM_PROMPT = """당신은 팩토리오 게임을 순수하게 플레이하는 AI 에이전트입니다.
치트나 텔레포트 없이, 실제 게임 메커니즘만 사용합니다.
게임 상태와 이전 행동 결과를 분석해서 스스로 판단하고 계획을 세웁니다.

## 핵심 제약 사항 (반드시 준수!)
1. **이동은 실제 걷기** — 먼 거리는 시간이 오래 걸림. 불필요한 왕복 최소화
2. **채굴은 자원 패치 근처에서만 가능** — 반드시 자원 위치로 move한 후 mine_resource
3. **제작은 재료가 있어야 함** — iron-plate 없이 iron-gear-wheel 못 만듬.
   재료 확인 후 craft_item. 재료 부족하면 먼저 채굴→제련
4. **건설은 건설 거리 내에서만 가능** — 배치할 좌표 근처로 move한 후 place_entity
5. **자원은 유한** — 직접 채굴해야 하고, 제련소에 넣어야 plate가 됨

## 효율적인 행동 패턴
- 같은 구역 작업을 묶어서 (이동 최소화)
- move → mine/place/insert 순서로 항상 위치 먼저 확보
- 채굴 → 제련 → 제작 → 건설 흐름 유지
- 한 번에 넉넉히 채굴 (왕복 줄이기)

## 팩토리오 자동화 핵심 지식
- 채굴기(burner-mining-drill)는 광맥 위에 배치해야 작동
- 제련소(stone-furnace)에 ore + 석탄 넣으면 plate 생산
- 채굴기 출력 → inserter → 벨트 → inserter → 제련소/조립기
- 제련소/보일러/채굴기는 석탄 연료 필요
- 전력: offshore-pump → pipe → boiler → steam-engine → small-electric-pole
- 자동화 연구팩: iron-gear-wheel + iron-plate → assembling-machine
- 건물 배치 전 반드시: 1) 인벤토리에 아이템 있는지 2) 가까이 있는지 확인

## 응답 형식 — 반드시 순수 JSON만 반환, 다른 텍스트 절대 금지
{
  "thinking": "현재 상태 분석. 인벤토리/위치/자원 확인 후 판단 (자유롭게 서술)",
  "current_goal": "지금 달성하려는 목표",
  "actions": [
    {"action": "행동유형", "params": {...}, "reason": "이 행동이 필요한 이유"},
    최대 8개
  ],
  "after_this": "이 시퀀스 완료 후 다음 계획"
}

## 전체 action 목록

### 탐색 (★ 자원 없을 때 최우선! 걸으면서 자원 스캔)
- "explore" → {"direction": "east|west|north|south|...", "max_steps": 200, "wanted_ores": ["stone","coal", ...]} (선택)
  ★ 자원이 보이지 않을 때 반드시 explore 사용! move 대신!
  ★ `wanted_ores`가 있으면: 해당 자원이 발견될 때까지 계속 걷고, 다른 자원(예: iron-ore)만 계속 발견되더라도 즉시 멈추지 말 것
  ★ 방향으로 걸으면서 반경 50타일 자원 스캔, 발견 즉시 멈춤
  ★ 장애물 자동 감지. 막히면 다른 방향 시도
  ★ 한 방향 실패 시 다음 방향 (east→north→south→west)

### 이동 (자원 좌표를 알 때만 사용)
- "move" → {"x": int, "y": int}
  주의: 자원/건물의 정확한 좌표를 알 때만 사용. 탐색에는 explore!

### 채굴 (자원 패치 근처에서만 작동)
- "mine_resource" → {"ore": "iron-ore", "count": int}
  채굴 가능: iron-ore, copper-ore, coal, stone
  권장: count는 20~50 단위로 (작으면 비효율, 크면 오래 걸림)

### 제작 (인벤토리에 재료 필요!)
- "craft_item" → {"item": str, "count": int}
  레시피 예시:
    stone-furnace: stone 5개
    burner-mining-drill: iron-gear-wheel 3 + iron-plate 3 + stone-furnace 1
    transport-belt: iron-gear-wheel 1 + iron-plate 1
    burner-inserter: iron-gear-wheel 1 + iron-plate 1
    iron-gear-wheel: iron-plate 2
    pipe: iron-plate 1

### 건물 배치 (건설 거리 내에서만!)
- "place_entity" → {"name": str, "x": int, "y": int, "direction": "north|south|east|west"}
  주의: 1) 인벤토리에 아이템 필요 2) 가까이 있어야 함 (약 10칸 내)

### 벨트 라인 (걸어다니면서 하나씩 배치 — 시간 많이 걸림)
- "place_belt_line" → {"from_x": int, "from_y": int, "to_x": int, "to_y": int}

### 연료/아이템 삽입 (건설 거리 내에서)
- "insert_to_entity" → {"x": int, "y": int, "item": "coal", "count": int}

### 조립기 레시피 설정
- "set_recipe" → {"x": int, "y": int, "recipe": str}

### 연구
- "start_research" → {"tech": "automation"}

### 대기
- "wait" → {"seconds": int}

## 절대 중요: 순수 JSON만 출력하세요. ```json 같은 마크다운 블록, 설명 텍스트, 주석 없이 오직 { } 만."""


class AIPlanner:
    def __init__(self):
        self.api_key = os.environ.get("ZAI_API_KEY", "")
        if not self.api_key:
            raise ValueError("ZAI_API_KEY 환경변수를 설정하세요.")

        self.step = 0
        self.feedback_log: list[dict] = []
        self.long_term_goal = (
            "완전 자동화 달성: "
            "석탄 채굴 → 철 채굴+제련 자동화 → 구리 채굴+제련 → "
            "전력 구축 → automation 연구 → 빨간 과학팩 자동 생산"
        )

    def decide(self, state_summary: str) -> list[dict]:
        self.step += 1
        feedback_text = self._format_feedback()

        user_message = (
            f"## 스텝 {self.step}\n\n"
            f"### 현재 게임 상태\n{state_summary}\n\n"
            f"{feedback_text}"
            f"### 장기 목표\n{self.long_term_goal}\n\n"
            "현재 상태를 분석하고, 장기 목표를 향해 지금 해야 할 행동 시퀀스를 계획하세요.\n"
            "⚠️ 순수 플레이입니다. 건설/채굴/삽입 전에 반드시 move로 가까이 이동하세요.\n"
            "⚠️ 제작은 재료가 있어야 합니다. 인벤토리를 확인하세요.\n"
            "반드시 JSON만 반환하세요. 마크다운 블록(```)이나 설명 텍스트 없이 순수 JSON만."
        )

        print(f"\n[GLM] 생각 중...")

        for attempt in range(3):
            try:
                raw = self._call_glm(user_message, attempt=attempt)
                plan = self._parse_json(raw)
                break
            except (ValueError, json.JSONDecodeError) as e:
                if attempt < 2:
                    print(f"[경고] JSON 파싱 실패 (시도 {attempt+1}/3), 재시도...")
                    continue
                print(f"[오류] JSON 파싱 3회 실패. 기본 탐색 행동 사용.")
                plan = {
                    "thinking": "API 응답 파싱 실패로 기본 탐색 수행",
                    "current_goal": "주변 탐색",
                    "actions": [
                        {"action": "explore", "params": {"direction": "east", "max_steps": 200}, "reason": "자원 탐색"},
                    ],
                    "after_this": "자원 발견 후 채굴 시작"
                }

        thinking = plan.get("thinking", "")
        if thinking:
            print(f"\n[AI] 판단:\n{thinking}\n")

        print(f"[AI] 현재 목표: {plan.get('current_goal', '')}")
        print(f"[AI] 완료 후:   {plan.get('after_this', '')}")

        actions = plan.get("actions", [])
        print(f"[AI] {len(actions)}개 행동 계획됨")
        return actions

    def record_feedback(self, action: dict, success: bool, message: str = ""):
        self.feedback_log.append({
            "action":  action.get("action", ""),
            "params":  action.get("params", {}),
            "success": success,
            "message": message,
        })
        if len(self.feedback_log) > 15:
            self.feedback_log.pop(0)

    def _format_feedback(self) -> str:
        if not self.feedback_log:
            return ""
        lines = ["### 이전 행동 결과 (성공/실패)\n"]
        for fb in self.feedback_log[-8:]:
            status = "OK" if fb["success"] else "FAIL"
            msg    = f" — {fb['message']}" if fb["message"] else ""
            lines.append(
                f"  {status} {fb['action']} "
                f"{json.dumps(fb['params'], ensure_ascii=False)}{msg}"
            )
        return "\n".join(lines) + "\n\n"

    def _call_glm(self, user_message: str, attempt: int) -> str:
        payload = json.dumps({
            "model":       GLM_MODEL,
            "messages":    [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user",   "content": user_message},
            ],
            "temperature": 0.3,
            "max_tokens":  2000,
        }).encode("utf-8")

        prompt_chars = len(user_message)
        system_chars = len(SYSTEM_PROMPT)
        max_tokens = 2000

        req = urllib.request.Request(
            GLM_API_URL,
            data    = payload,
            headers = {
                "Content-Type":  "application/json",
                "Authorization": f"Bearer {self.api_key}",
            },
            method = "POST",
        )
        try:
            t_total0 = time.perf_counter()
            t_payload0 = time.perf_counter()
            # payload 직렬화 직후(대략)부터 타임라인 측정
            _t0 = time.perf_counter()
            with urllib.request.urlopen(req, timeout=90) as resp:
                raw_text = resp.read().decode("utf-8")
                t_read_done = time.perf_counter()

            t_json0 = time.perf_counter()
            data = json.loads(raw_text)
            content = data["choices"][0]["message"]["content"].strip()
            t_json_done = time.perf_counter()

            dt_total = time.perf_counter() - t_total0
            dt_payload = t_payload0 - t_total0
            dt_read = t_read_done - _t0
            dt_json = t_json_done - t_json0

            print(
                "[GLM] 타이밍 | "
                f"attempt {attempt+1}/3 | "
                f"total {dt_total:.2f}s | "
                f"http_read {dt_read:.2f}s | "
                f"json_parse {dt_json:.2f}s | "
                f"prompt_chars {prompt_chars} | "
                f"system_chars {system_chars} | "
                f"max_tokens {max_tokens} | "
                f"resp_chars {len(raw_text)}"
            )
            return content
        except urllib.error.HTTPError as e:
            raise ConnectionError(f"GLM API 오류 {e.code}: {e.read().decode()}")

    def _parse_json(self, raw: str) -> dict:
        text = raw.strip()
        if "<think>" in text:
            text = text.split("</think>")[-1].strip()
        if text.startswith("```"):
            text = "\n".join(
                l for l in text.splitlines()
                if not l.strip().startswith("```")
            ).strip()
        try:
            return json.loads(text)
        except json.JSONDecodeError:
            pass
        start = text.find("{")
        if start == -1:
            raise ValueError("JSON 파싱 실패 ('{' 없음):\n" + raw[:300])
        brace_depth = 0
        bracket_depth = 0
        in_string = False
        escape = False
        end = start
        for i in range(start, len(text)):
            c = text[i]
            if escape:
                escape = False
                continue
            if c == '\\' and in_string:
                escape = True
                continue
            if c == '"' and not escape:
                in_string = not in_string
                continue
            if in_string:
                continue
            if c == '{':
                brace_depth += 1
            elif c == '}':
                brace_depth -= 1
            elif c == '[':
                bracket_depth += 1
            elif c == ']':
                bracket_depth -= 1

            if brace_depth == 0 and bracket_depth == 0:
                # 최상위 JSON 객체가 종료된 지점으로 추정
                if i > start:
                    end = i + 1
                    break
        if brace_depth != 0 or bracket_depth != 0:
            partial = text[start:]
            partial = self._repair_truncated_json(partial)
            try:
                return json.loads(partial)
            except json.JSONDecodeError:
                raise ValueError(f"JSON 파싱 실패 (잘린 응답 복구 불가):\n{raw[:400]}")
        candidate = text[start:end]
        try:
            return json.loads(candidate)
        except json.JSONDecodeError:
            # 중괄호는 맞지만 배열/후행 속성이 잘려 파싱 실패하는 케이스 복구
            repaired = self._repair_truncated_json(candidate)
            try:
                return json.loads(repaired)
            except json.JSONDecodeError:
                raise ValueError(f"JSON 파싱 실패:\n{raw[:400]}")

    def _repair_truncated_json(self, text: str) -> str:
        if '"actions"' not in text:
            return '{"thinking":"응답 잘림","current_goal":"탐색","actions":[],"after_this":"재시도"}'
        last_complete = -1
        for m in re.finditer(r'"reason"\s*:\s*"[^"]*"\s*\}', text):
            last_complete = m.end()
        if last_complete > 0:
            result = text[:last_complete]
            open_brackets = result.count('[') - result.count(']')
            open_braces = result.count('{') - result.count('}')
            # JSON이 '...,' 로 끝나는 경우를 방지
            if result.rstrip().endswith(","):
                result = result.rstrip()[:-1]
            result += ']' * max(0, open_brackets)
            if '"after_this"' not in result and open_braces > 0:
                result += ',"after_this":"계속 진행"'
            result += '}' * max(0, open_braces)
            return result
        return '{"thinking":"응답 잘림","current_goal":"탐색","actions":[],"after_this":"재시도"}'

    def set_goal(self, goal: str):
        self.long_term_goal = goal
        self.feedback_log.clear()
        print(f"[AI] 새 목표: {goal}")