Files
xian_algorithm_new/app/services/qgis/qgis_pool.py
T
2026-06-24 11:22:20 +08:00

325 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
QGIS Worker 进程池管理器 — 主机侧运行。
管理 N 个长驻 Docker Worker 进程,提供任务提交和结果回收。
架构:
主机 FastAPI ←→ qgis_pool (本模块) ←→ docker exec -i ←→ qgis_worker.py (容器内)
用法:
from app.services.qgis.qgis_pool import qgis_pool
# 提交任务(阻塞等待完成)
results, summary = qgis_pool.submit_job(config, models, progress_callback)
# 关闭池
qgis_pool.shutdown()
"""
import json
import subprocess
import threading
import time
from typing import Callable, Dict, List, Optional, Tuple
from app.config.paths import get_logger
logger = get_logger("qgis.pool")
class _WorkerHandle:
"""单个 Worker 进程的句柄"""
def __init__(self, worker_id: int, proc: subprocess.Popen):
self.worker_id = worker_id
self.proc = proc
self.busy = False
self.lock = threading.Lock()
self._reader_thread: Optional[threading.Thread] = None
def is_alive(self) -> bool:
return self.proc.poll() is None
def mark_busy(self):
with self.lock:
self.busy = True
def mark_free(self):
with self.lock:
self.busy = False
def is_free(self) -> bool:
with self.lock:
return not self.busy and self.is_alive()
class QgisPool:
"""QGIS Worker 进程池"""
def __init__(self, pool_size: int = None):
"""
Args:
pool_size: 池中 Worker 数量,默认从配置读取
"""
if pool_size is None:
try:
from config import settings
pool_size = getattr(settings, "QGIS_POOL_SIZE", 2)
except Exception:
pool_size = 2
self._pool_size = pool_size
self._workers: List[_WorkerHandle] = []
self._lock = threading.Lock()
self._started = False
def _ensure_started(self):
"""懒启动:首次使用时启动 Worker 进程"""
if self._started:
return
with self._lock:
if self._started:
return
self._start_workers()
self._started = True
def _start_workers(self):
"""启动所有 Worker 进程"""
from app.services.qgis.qgis_env import (
get_docker_container,
get_docker_project_dir,
get_container_python_path,
)
container = get_docker_container()
project_dir = get_docker_project_dir()
python_path = get_container_python_path()
worker_script = f"{project_dir}/app/services/qgis/qgis_worker.py"
# Qt 平台设置
try:
from config import settings
qt_platform = getattr(settings, "QGIS_DOCKER_QT_PLATFORM", "offscreen")
except Exception:
qt_platform = "offscreen"
for i in range(self._pool_size):
cmd = [
"docker", "exec", "-i",
"-w", project_dir,
"-e", f"QT_QPA_PLATFORM={qt_platform}",
container,
python_path, worker_script,
]
logger.info(f"[Pool] 启动 Worker-{i}: {' '.join(cmd)}")
proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding="utf-8",
errors="replace",
)
worker = _WorkerHandle(i, proc)
self._workers.append(worker)
# 后台线程消费 stderr,防止管道满阻塞
t = threading.Thread(
target=self._drain_stderr, args=(worker,), daemon=True
)
t.start()
logger.info(f"[Pool] 已启动 {self._pool_size} 个 Worker")
@staticmethod
def _drain_stderr(worker: _WorkerHandle):
"""持续读取 Worker stderr 防止管道缓冲区满"""
try:
for line in worker.proc.stderr:
line = line.strip()
if line:
logger.debug(f"[Worker-{worker.worker_id}] stderr: {line}")
except Exception:
pass
def _find_free_worker(self, timeout: float = 30) -> Optional[_WorkerHandle]:
"""找到一个空闲 Worker,超时返回 None"""
deadline = time.time() + timeout
while time.time() < deadline:
with self._lock:
for w in self._workers:
if w.is_free():
return w
time.sleep(0.1)
return None
def submit_job(
self,
config: dict,
models: list,
progress_callback: Callable[[dict], None] = None,
) -> Tuple[list, dict]:
"""
提交一个批次任务,阻塞等待完成。
Args:
config: QGIS 配置字典
models: 模型参数列表
progress_callback: 每张图完成时的回调(接收 PROGRESS dict
Returns:
(results_list, summary_dict)
"""
self._ensure_started()
worker = self._find_free_worker(timeout=60)
if worker is None:
raise RuntimeError("[Pool] 无可用 Worker(等待 60s 超时)")
worker.mark_busy()
try:
return self._run_job(worker, config, models, progress_callback)
finally:
# 检查 Worker 是否还活着,死了就重启
if not worker.is_alive():
logger.error(f"[Pool] Worker-{worker.worker_id} 已死亡,重启...")
self._restart_worker(worker)
worker.mark_free()
def _run_job(
self,
worker: _WorkerHandle,
config: dict,
models: list,
progress_callback: Callable[[dict], None] = None,
) -> Tuple[list, dict]:
"""向指定 Worker 发送任务并读取结果"""
job = {"config": config, "models": models}
job_json = json.dumps(job, ensure_ascii=False) + "\n"
# 写入 stdin
try:
worker.proc.stdin.write(job_json)
worker.proc.stdin.flush()
except (BrokenPipeError, OSError) as e:
raise RuntimeError(f"[Pool] 写入 Worker-{worker.worker_id} stdin 失败: {e}")
# 读取输出(直到 JOB_DONE 或 FATAL
results = []
summary = {}
while True:
line = worker.proc.stdout.readline()
if not line:
# stdout 关闭,Worker 可能已死亡
logger.error(f"[Pool] Worker-{worker.worker_id} stdout 关闭")
summary = {"error": "Worker stdout 关闭"}
break
line = line.strip()
if not line:
continue
if line.startswith("PROGRESS:"):
try:
r = json.loads(line[len("PROGRESS:"):])
results.append(r)
if progress_callback:
progress_callback(r)
except json.JSONDecodeError:
logger.warning(f"[Pool] Worker-{worker.worker_id} PROGRESS 解析失败: {line[:200]}")
elif line.startswith("JOB_DONE:"):
try:
summary = json.loads(line[len("JOB_DONE:"):])
except json.JSONDecodeError:
summary = {"error": f"JOB_DONE 解析失败: {line[:200]}"}
break
elif line.startswith("FATAL:"):
try:
summary = json.loads(line[len("FATAL:"):])
except json.JSONDecodeError:
summary = {"error": f"FATAL: {line[:200]}"}
logger.error(f"[Pool] Worker-{worker.worker_id} FATAL: {summary}")
break
else:
logger.debug(f"[Pool] Worker-{worker.worker_id} 未知输出: {line[:200]}")
return results, summary
def _restart_worker(self, dead_worker: _WorkerHandle):
"""重启一个死亡的 Worker"""
from app.services.qgis.qgis_env import (
get_docker_container,
get_docker_project_dir,
get_container_python_path,
)
container = get_docker_container()
project_dir = get_docker_project_dir()
python_path = get_container_python_path()
worker_script = f"{project_dir}/app/services/qgis/qgis_worker.py"
try:
from config import settings
qt_platform = getattr(settings, "QGIS_DOCKER_QT_PLATFORM", "offscreen")
except Exception:
qt_platform = "offscreen"
cmd = [
"docker", "exec", "-i",
"-w", project_dir,
"-e", f"QT_QPA_PLATFORM={qt_platform}",
container,
python_path, worker_script,
]
proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding="utf-8",
errors="replace",
)
dead_worker.proc = proc
dead_worker.busy = False
# 重启 stderr 消费线程
t = threading.Thread(
target=self._drain_stderr, args=(dead_worker,), daemon=True
)
t.start()
logger.info(f"[Pool] Worker-{dead_worker.worker_id} 已重启")
def shutdown(self):
"""关闭所有 Worker"""
with self._lock:
for w in self._workers:
if w.proc.poll() is None:
try:
w.proc.stdin.close()
except Exception:
pass
try:
w.proc.terminate()
w.proc.wait(timeout=5)
except Exception:
try:
w.proc.kill()
except Exception:
pass
self._workers.clear()
self._started = False
logger.info("[Pool] 已关闭")
# 全局单例
qgis_pool = QgisPool()