[2026-06-01] Fix duplicate bot startup guards

This commit is contained in:
2026-06-01 18:54:52 +09:00
parent 57a0f686e1
commit dd789cfbda
6 changed files with 393 additions and 114 deletions
+78 -41
View File
@@ -1,44 +1,83 @@
"""
watchdog.py — 봇 생존 감시 + 자동 재시작
5분마다 실행 (작업 스케줄러) / 장중(09:00~15:10)에만 작동
"""
import asyncio, os, subprocess, sys
import asyncio
import os
import subprocess
import sys
from datetime import datetime
from pathlib import Path
PROJECT = r'C:\Users\whdwo\OneDrive\바탕 화면\stockbot_v3'
PID_FILE = os.path.join(PROJECT, 'logs', 'bot.pid')
PROJECT = Path(__file__).resolve().parents[1]
PID_FILE = PROJECT / "logs" / "bot.pid"
os.chdir(PROJECT)
sys.path.insert(0, '.')
sys.path.insert(0, str(PROJECT))
from app.main import load_env
load_env()
def is_process_alive(pid: int) -> bool:
r = subprocess.run(
['tasklist', '/FI', f'PID eq {pid}', '/NH'],
capture_output=True, text=True,
def _is_process_alive(pid: int) -> bool:
result = subprocess.run(
["tasklist", "/FI", f"PID eq {pid}", "/NH"],
capture_output=True,
text=True,
)
return str(pid) in r.stdout
return str(pid) in result.stdout
def get_pid() -> int | None:
def _find_bot_pids() -> list[int]:
command = (
"Get-CimInstance Win32_Process | "
"Where-Object { $_.Name -like 'python*' -and "
"($_.CommandLine -like '*app/main.py*' "
"-or $_.CommandLine -like '*app\\\\main.py*') } | "
"Select-Object -ExpandProperty ProcessId"
)
result = subprocess.run(
["powershell", "-NoProfile", "-Command", command],
capture_output=True,
text=True,
)
pids: list[int] = []
for line in result.stdout.splitlines():
line = line.strip()
if line.isdigit():
pids.append(int(line))
return pids
def _get_pid() -> int | None:
try:
return int(open(PID_FILE).read().strip())
return int(PID_FILE.read_text(encoding="utf-8").strip())
except Exception:
return None
def restart_bot() -> int:
proc = subprocess.Popen(
[sys.executable, 'app/main.py'],
creationflags=subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP,
stdout=open('logs/bot_stderr.log', 'a', encoding='utf-8'),
stderr=subprocess.STDOUT,
close_fds=True,
)
with open(PID_FILE, 'w') as f:
f.write(str(proc.pid))
def _write_pid(pid: int) -> None:
PID_FILE.parent.mkdir(exist_ok=True)
PID_FILE.write_text(str(pid), encoding="utf-8")
def _restart_bot() -> int:
env = os.environ.copy()
env["PYTHONUNBUFFERED"] = "1"
creationflags = 0
if hasattr(subprocess, "DETACHED_PROCESS"):
creationflags = subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
log_path = PROJECT / "logs" / "bot_stderr.log"
with open(log_path, "a", encoding="utf-8") as log:
proc = subprocess.Popen(
[sys.executable, "-u", "app/main.py"],
cwd=PROJECT,
creationflags=creationflags,
stdout=log,
stderr=subprocess.STDOUT,
close_fds=True,
env=env,
)
_write_pid(proc.pid)
return proc.pid
@@ -46,35 +85,33 @@ async def main():
now = datetime.now()
now_str = now.strftime("%H:%M")
# 장 외 시간은 체크 안 함
if not ("09:00" <= now_str <= "15:10"):
print(f"[{now_str}] 장 외 시간 — 워치독 종료")
print(f"[{now_str}] outside trading window - watchdog skipped")
return
from app.monitor.notifier import send
pid = get_pid()
pid = _get_pid()
if pid is not None and _is_process_alive(pid):
print(f"[{now_str}] bot running PID={pid}")
return
if pid is None:
msg = f"[경고] 봇 PID 파일 없음 — 봇이 실행되지 않은 상태입니다 ({now_str})"
live_pids = _find_bot_pids()
if live_pids:
recovered_pid = live_pids[0]
_write_pid(recovered_pid)
msg = f"[복구] bot.pid corrected to running bot PID={recovered_pid} ({now_str})"
print(msg)
await send(msg)
new_pid = restart_bot()
await send(f"[복구] 봇 자동 재시작 완료 PID={new_pid}")
return
if is_process_alive(pid):
print(f"[{now_str}] 봇 정상 실행 중 PID={pid}")
return
# 봇이 죽어있음
msg = f"[긴급] 봇 프로세스 종료 감지 (PID={pid}) — 자동 재시작 시도"
msg = f"[긴급] bot process not found (pid={pid}) - restarting"
print(msg)
await send(msg)
new_pid = restart_bot()
await send(f"[복구] 봇 자동 재시작 완료 PID={new_pid} ({now_str})")
print(f"봇 재시작 완료 PID={new_pid}")
new_pid = _restart_bot()
await send(f"[복구] bot restarted PID={new_pid} ({now_str})")
print(f"bot restarted PID={new_pid}")
if __name__ == "__main__":
+1 -2
View File
@@ -8,7 +8,6 @@ $env:PYTHONIOENCODING = "utf-8"
$PROJECT = Split-Path -Parent $PSScriptRoot
$LOG = "$PROJECT\logs\bot_start.log"
. "$PROJECT\scripts\stockbot_env.ps1"
$CLAUDE = Resolve-StockBotClaude
$PYTHON = Resolve-StockBotPython -Project $PROJECT
$utf8 = New-Object System.Text.UTF8Encoding $false
@@ -25,7 +24,7 @@ if ($LASTEXITCODE -ne 0) {
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
[System.IO.File]::AppendAllText($LOG, "[$timestamp] /start-bot 실행`n", $utf8)
& $CLAUDE -p "/start-bot" --dangerously-skip-permissions 2>&1 |
& $PYTHON "scripts\start_bot.py" 2>&1 |
ForEach-Object { [System.IO.File]::AppendAllText($LOG, "$_`n", $utf8) }
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+1 -1
View File
@@ -37,7 +37,7 @@ $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
[System.IO.File]::AppendAllText($LOG, "[$timestamp] /start-bot 시작`n", $utf8)
& $CLAUDE -p "/start-bot" --dangerously-skip-permissions 2>&1 |
& $PYTHON "scripts\start_bot.py" 2>&1 |
ForEach-Object { [System.IO.File]::AppendAllText($LOG, "$_`n", $utf8) }
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+113
View File
@@ -0,0 +1,113 @@
import asyncio
import os
import subprocess
import sys
import time
from pathlib import Path
PROJECT = Path(__file__).resolve().parents[1]
PID_FILE = PROJECT / "logs" / "bot.pid"
LOG_FILE = PROJECT / "logs" / "bot_stderr.log"
def _taskkill(pid: int) -> None:
subprocess.run(["taskkill", "/PID", str(pid), "/F"], capture_output=True, text=True)
def _find_bot_pids() -> list[int]:
command = (
"Get-CimInstance Win32_Process | "
"Where-Object { $_.Name -like 'python*' -and "
"($_.CommandLine -like '*app/main.py*' "
"-or $_.CommandLine -like '*app\\\\main.py*') } | "
"Select-Object -ExpandProperty ProcessId"
)
result = subprocess.run(
["powershell", "-NoProfile", "-Command", command],
capture_output=True,
text=True,
)
pids: list[int] = []
for line in result.stdout.splitlines():
line = line.strip()
if line.isdigit():
pids.append(int(line))
return pids
def _kill_existing_bots() -> None:
killed: set[int] = set()
if PID_FILE.exists():
try:
pid = int(PID_FILE.read_text(encoding="utf-8").strip())
_taskkill(pid)
killed.add(pid)
print(f"PID file process stopped: {pid}")
except Exception as exc:
print(f"PID file stop skipped: {exc}")
PID_FILE.unlink(missing_ok=True)
for pid in _find_bot_pids():
if pid in killed:
continue
_taskkill(pid)
killed.add(pid)
print(f"Existing bot process stopped: {pid}")
if not killed:
print("No existing bot process found")
def _start_bot() -> int:
PROJECT.joinpath("logs").mkdir(exist_ok=True)
env = os.environ.copy()
env["PYTHONUNBUFFERED"] = "1"
creationflags = 0
if hasattr(subprocess, "DETACHED_PROCESS"):
creationflags = subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
with open(LOG_FILE, "a", encoding="utf-8") as log:
proc = subprocess.Popen(
[sys.executable, "-u", "app/main.py"],
cwd=PROJECT,
creationflags=creationflags,
stdout=log,
stderr=subprocess.STDOUT,
close_fds=True,
env=env,
)
time.sleep(2)
if proc.poll() is not None:
raise RuntimeError(f"bot process exited during startup: returncode={proc.returncode}")
PID_FILE.write_text(str(proc.pid), encoding="utf-8")
return proc.pid
async def _notify_start() -> None:
sys.path.insert(0, str(PROJECT))
from app.main import load_env
load_env()
from app.monitor.notifier import send
mode = os.getenv("KIS_MOCK", "true")
dry = os.getenv("DRY_RUN", "true")
label = "[모의투자]" if mode == "true" else "[실거래]"
await send(f"{label} 자동매매 봇 시작 (DRY_RUN={dry})")
def main() -> int:
os.chdir(PROJECT)
_kill_existing_bots()
pid = _start_bot()
print(f"Bot started PID={pid}")
asyncio.run(_notify_start())
print("Discord start notification sent")
return 0
if __name__ == "__main__":
raise SystemExit(main())