""" QGIS Worker 进程池管理器 — 主机侧运行。 管理 N 个长驻 Docker Worker 进程,提供任务提交和结果回收。 架构: 主机 FastAPI ←→ qgis_pool (本模块) ←→ docker exec -i ←→ qgis_worker.py (容器内) 用法: from app.services.qgis.qgis_pool import qgis_pool # 提交任务(阻塞等待完成) results, summary = qgis_pool.submit_job(config, models, progress_callback) # 关闭池 qgis_pool.shutdown() """ import json import subprocess import threading import time from typing import Callable, Dict, List, Optional, Tuple from app.config.paths import get_logger logger = get_logger("qgis.pool") class _WorkerHandle: """单个 Worker 进程的句柄""" def __init__(self, worker_id: int, proc: subprocess.Popen): self.worker_id = worker_id self.proc = proc self.busy = False self.lock = threading.Lock() self._reader_thread: Optional[threading.Thread] = None def is_alive(self) -> bool: return self.proc.poll() is None def mark_busy(self): with self.lock: self.busy = True def mark_free(self): with self.lock: self.busy = False def is_free(self) -> bool: with self.lock: return not self.busy and self.is_alive() class QgisPool: """QGIS Worker 进程池""" def __init__(self, pool_size: int = None): """ Args: pool_size: 池中 Worker 数量,默认从配置读取 """ if pool_size is None: try: from config import settings pool_size = getattr(settings, "QGIS_POOL_SIZE", 2) except Exception: pool_size = 2 self._pool_size = pool_size self._workers: List[_WorkerHandle] = [] self._lock = threading.Lock() self._started = False def _ensure_started(self): """懒启动:首次使用时启动 Worker 进程""" if self._started: return with self._lock: if self._started: return self._start_workers() self._started = True def _start_workers(self): """启动所有 Worker 进程""" from app.services.qgis.qgis_env import ( get_docker_container, get_docker_project_dir, get_container_python_path, ) container = get_docker_container() project_dir = get_docker_project_dir() python_path = get_container_python_path() worker_script = f"{project_dir}/app/services/qgis/qgis_worker.py" # Qt 平台设置 try: from config import settings qt_platform = getattr(settings, "QGIS_DOCKER_QT_PLATFORM", "offscreen") except Exception: qt_platform = "offscreen" for i in range(self._pool_size): cmd = [ "docker", "exec", "-i", "-w", project_dir, "-e", f"QT_QPA_PLATFORM={qt_platform}", container, python_path, worker_script, ] logger.info(f"[Pool] 启动 Worker-{i}: {' '.join(cmd)}") proc = subprocess.Popen( cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding="utf-8", errors="replace", ) worker = _WorkerHandle(i, proc) self._workers.append(worker) # 后台线程消费 stderr,防止管道满阻塞 t = threading.Thread( target=self._drain_stderr, args=(worker,), daemon=True ) t.start() logger.info(f"[Pool] 已启动 {self._pool_size} 个 Worker") @staticmethod def _drain_stderr(worker: _WorkerHandle): """持续读取 Worker stderr 防止管道缓冲区满""" try: for line in worker.proc.stderr: line = line.strip() if line: logger.debug(f"[Worker-{worker.worker_id}] stderr: {line}") except Exception: pass def _find_free_worker(self, timeout: float = 30) -> Optional[_WorkerHandle]: """找到一个空闲 Worker,超时返回 None""" deadline = time.time() + timeout while time.time() < deadline: with self._lock: for w in self._workers: if w.is_free(): return w time.sleep(0.1) return None def submit_job( self, config: dict, models: list, progress_callback: Callable[[dict], None] = None, ) -> Tuple[list, dict]: """ 提交一个批次任务,阻塞等待完成。 Args: config: QGIS 配置字典 models: 模型参数列表 progress_callback: 每张图完成时的回调(接收 PROGRESS dict) Returns: (results_list, summary_dict) """ self._ensure_started() worker = self._find_free_worker(timeout=60) if worker is None: raise RuntimeError("[Pool] 无可用 Worker(等待 60s 超时)") worker.mark_busy() try: return self._run_job(worker, config, models, progress_callback) finally: # 检查 Worker 是否还活着,死了就重启 if not worker.is_alive(): logger.error(f"[Pool] Worker-{worker.worker_id} 已死亡,重启...") self._restart_worker(worker) worker.mark_free() def _run_job( self, worker: _WorkerHandle, config: dict, models: list, progress_callback: Callable[[dict], None] = None, ) -> Tuple[list, dict]: """向指定 Worker 发送任务并读取结果""" job = {"config": config, "models": models} job_json = json.dumps(job, ensure_ascii=False) + "\n" # 写入 stdin try: worker.proc.stdin.write(job_json) worker.proc.stdin.flush() except (BrokenPipeError, OSError) as e: raise RuntimeError(f"[Pool] 写入 Worker-{worker.worker_id} stdin 失败: {e}") # 读取输出(直到 JOB_DONE 或 FATAL) results = [] summary = {} while True: line = worker.proc.stdout.readline() if not line: # stdout 关闭,Worker 可能已死亡 logger.error(f"[Pool] Worker-{worker.worker_id} stdout 关闭") summary = {"error": "Worker stdout 关闭"} break line = line.strip() if not line: continue if line.startswith("PROGRESS:"): try: r = json.loads(line[len("PROGRESS:"):]) results.append(r) if progress_callback: progress_callback(r) except json.JSONDecodeError: logger.warning(f"[Pool] Worker-{worker.worker_id} PROGRESS 解析失败: {line[:200]}") elif line.startswith("JOB_DONE:"): try: summary = json.loads(line[len("JOB_DONE:"):]) except json.JSONDecodeError: summary = {"error": f"JOB_DONE 解析失败: {line[:200]}"} break elif line.startswith("FATAL:"): try: summary = json.loads(line[len("FATAL:"):]) except json.JSONDecodeError: summary = {"error": f"FATAL: {line[:200]}"} logger.error(f"[Pool] Worker-{worker.worker_id} FATAL: {summary}") break else: logger.debug(f"[Pool] Worker-{worker.worker_id} 未知输出: {line[:200]}") return results, summary def _restart_worker(self, dead_worker: _WorkerHandle): """重启一个死亡的 Worker""" from app.services.qgis.qgis_env import ( get_docker_container, get_docker_project_dir, get_container_python_path, ) container = get_docker_container() project_dir = get_docker_project_dir() python_path = get_container_python_path() worker_script = f"{project_dir}/app/services/qgis/qgis_worker.py" try: from config import settings qt_platform = getattr(settings, "QGIS_DOCKER_QT_PLATFORM", "offscreen") except Exception: qt_platform = "offscreen" cmd = [ "docker", "exec", "-i", "-w", project_dir, "-e", f"QT_QPA_PLATFORM={qt_platform}", container, python_path, worker_script, ] proc = subprocess.Popen( cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding="utf-8", errors="replace", ) dead_worker.proc = proc dead_worker.busy = False # 重启 stderr 消费线程 t = threading.Thread( target=self._drain_stderr, args=(dead_worker,), daemon=True ) t.start() logger.info(f"[Pool] Worker-{dead_worker.worker_id} 已重启") def shutdown(self): """关闭所有 Worker""" with self._lock: for w in self._workers: if w.proc.poll() is None: try: w.proc.stdin.close() except Exception: pass try: w.proc.terminate() w.proc.wait(timeout=5) except Exception: try: w.proc.kill() except Exception: pass self._workers.clear() self._started = False logger.info("[Pool] 已关闭") # 全局单例 qgis_pool = QgisPool()