[2026-06-01] Fix duplicate bot startup guards

This commit is contained in:
2026-06-01 18:54:52 +09:00
parent 57a0f686e1
commit dd789cfbda
6 changed files with 393 additions and 114 deletions
+78 -41
View File
@@ -1,44 +1,83 @@
"""
watchdog.py — 봇 생존 감시 + 자동 재시작
5분마다 실행 (작업 스케줄러) / 장중(09:00~15:10)에만 작동
"""
import asyncio, os, subprocess, sys
import asyncio
import os
import subprocess
import sys
from datetime import datetime
from pathlib import Path
PROJECT = r'C:\Users\whdwo\OneDrive\바탕 화면\stockbot_v3'
PID_FILE = os.path.join(PROJECT, 'logs', 'bot.pid')
PROJECT = Path(__file__).resolve().parents[1]
PID_FILE = PROJECT / "logs" / "bot.pid"
os.chdir(PROJECT)
sys.path.insert(0, '.')
sys.path.insert(0, str(PROJECT))
from app.main import load_env
load_env()
def is_process_alive(pid: int) -> bool:
r = subprocess.run(
['tasklist', '/FI', f'PID eq {pid}', '/NH'],
capture_output=True, text=True,
def _is_process_alive(pid: int) -> bool:
result = subprocess.run(
["tasklist", "/FI", f"PID eq {pid}", "/NH"],
capture_output=True,
text=True,
)
return str(pid) in r.stdout
return str(pid) in result.stdout
def get_pid() -> int | None:
def _find_bot_pids() -> list[int]:
command = (
"Get-CimInstance Win32_Process | "
"Where-Object { $_.Name -like 'python*' -and "
"($_.CommandLine -like '*app/main.py*' "
"-or $_.CommandLine -like '*app\\\\main.py*') } | "
"Select-Object -ExpandProperty ProcessId"
)
result = subprocess.run(
["powershell", "-NoProfile", "-Command", command],
capture_output=True,
text=True,
)
pids: list[int] = []
for line in result.stdout.splitlines():
line = line.strip()
if line.isdigit():
pids.append(int(line))
return pids
def _get_pid() -> int | None:
try:
return int(open(PID_FILE).read().strip())
return int(PID_FILE.read_text(encoding="utf-8").strip())
except Exception:
return None
def restart_bot() -> int:
proc = subprocess.Popen(
[sys.executable, 'app/main.py'],
creationflags=subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP,
stdout=open('logs/bot_stderr.log', 'a', encoding='utf-8'),
stderr=subprocess.STDOUT,
close_fds=True,
)
with open(PID_FILE, 'w') as f:
f.write(str(proc.pid))
def _write_pid(pid: int) -> None:
PID_FILE.parent.mkdir(exist_ok=True)
PID_FILE.write_text(str(pid), encoding="utf-8")
def _restart_bot() -> int:
env = os.environ.copy()
env["PYTHONUNBUFFERED"] = "1"
creationflags = 0
if hasattr(subprocess, "DETACHED_PROCESS"):
creationflags = subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
log_path = PROJECT / "logs" / "bot_stderr.log"
with open(log_path, "a", encoding="utf-8") as log:
proc = subprocess.Popen(
[sys.executable, "-u", "app/main.py"],
cwd=PROJECT,
creationflags=creationflags,
stdout=log,
stderr=subprocess.STDOUT,
close_fds=True,
env=env,
)
_write_pid(proc.pid)
return proc.pid
@@ -46,35 +85,33 @@ async def main():
now = datetime.now()
now_str = now.strftime("%H:%M")
# 장 외 시간은 체크 안 함
if not ("09:00" <= now_str <= "15:10"):
print(f"[{now_str}] 장 외 시간 — 워치독 종료")
print(f"[{now_str}] outside trading window - watchdog skipped")
return
from app.monitor.notifier import send
pid = get_pid()
pid = _get_pid()
if pid is not None and _is_process_alive(pid):
print(f"[{now_str}] bot running PID={pid}")
return
if pid is None:
msg = f"[경고] 봇 PID 파일 없음 — 봇이 실행되지 않은 상태입니다 ({now_str})"
live_pids = _find_bot_pids()
if live_pids:
recovered_pid = live_pids[0]
_write_pid(recovered_pid)
msg = f"[복구] bot.pid corrected to running bot PID={recovered_pid} ({now_str})"
print(msg)
await send(msg)
new_pid = restart_bot()
await send(f"[복구] 봇 자동 재시작 완료 PID={new_pid}")
return
if is_process_alive(pid):
print(f"[{now_str}] 봇 정상 실행 중 PID={pid}")
return
# 봇이 죽어있음
msg = f"[긴급] 봇 프로세스 종료 감지 (PID={pid}) — 자동 재시작 시도"
msg = f"[긴급] bot process not found (pid={pid}) - restarting"
print(msg)
await send(msg)
new_pid = restart_bot()
await send(f"[복구] 봇 자동 재시작 완료 PID={new_pid} ({now_str})")
print(f"봇 재시작 완료 PID={new_pid}")
new_pid = _restart_bot()
await send(f"[복구] bot restarted PID={new_pid} ({now_str})")
print(f"bot restarted PID={new_pid}")
if __name__ == "__main__":