2026-06-24 11:22:20 +08:00
|
|
|
|
"""
|
|
|
|
|
|
QGIS Worker 进程池管理器 — 主机侧运行。
|
|
|
|
|
|
|
|
|
|
|
|
管理 N 个长驻 Docker Worker 进程,提供任务提交和结果回收。
|
|
|
|
|
|
|
|
|
|
|
|
架构:
|
|
|
|
|
|
主机 FastAPI ←→ qgis_pool (本模块) ←→ docker exec -i ←→ qgis_worker.py (容器内)
|
|
|
|
|
|
|
|
|
|
|
|
用法:
|
|
|
|
|
|
from app.services.qgis.qgis_pool import qgis_pool
|
|
|
|
|
|
|
|
|
|
|
|
# 提交任务(阻塞等待完成)
|
|
|
|
|
|
results, summary = qgis_pool.submit_job(config, models, progress_callback)
|
|
|
|
|
|
|
|
|
|
|
|
# 关闭池
|
|
|
|
|
|
qgis_pool.shutdown()
|
|
|
|
|
|
"""
|
|
|
|
|
|
import json
|
|
|
|
|
|
import subprocess
|
|
|
|
|
|
import threading
|
|
|
|
|
|
import time
|
|
|
|
|
|
from typing import Callable, Dict, List, Optional, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
from app.config.paths import get_logger
|
|
|
|
|
|
|
|
|
|
|
|
logger = get_logger("qgis.pool")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _WorkerHandle:
|
|
|
|
|
|
"""单个 Worker 进程的句柄"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, worker_id: int, proc: subprocess.Popen):
|
|
|
|
|
|
self.worker_id = worker_id
|
|
|
|
|
|
self.proc = proc
|
|
|
|
|
|
self.busy = False
|
|
|
|
|
|
self.lock = threading.Lock()
|
|
|
|
|
|
self._reader_thread: Optional[threading.Thread] = None
|
|
|
|
|
|
|
|
|
|
|
|
def is_alive(self) -> bool:
|
|
|
|
|
|
return self.proc.poll() is None
|
|
|
|
|
|
|
|
|
|
|
|
def mark_busy(self):
|
|
|
|
|
|
with self.lock:
|
|
|
|
|
|
self.busy = True
|
|
|
|
|
|
|
|
|
|
|
|
def mark_free(self):
|
|
|
|
|
|
with self.lock:
|
|
|
|
|
|
self.busy = False
|
|
|
|
|
|
|
|
|
|
|
|
def is_free(self) -> bool:
|
|
|
|
|
|
with self.lock:
|
|
|
|
|
|
return not self.busy and self.is_alive()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class QgisPool:
|
|
|
|
|
|
"""QGIS Worker 进程池"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, pool_size: int = None):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Args:
|
|
|
|
|
|
pool_size: 池中 Worker 数量,默认从配置读取
|
|
|
|
|
|
"""
|
|
|
|
|
|
if pool_size is None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from config import settings
|
|
|
|
|
|
pool_size = getattr(settings, "QGIS_POOL_SIZE", 2)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pool_size = 2
|
|
|
|
|
|
|
|
|
|
|
|
self._pool_size = pool_size
|
|
|
|
|
|
self._workers: List[_WorkerHandle] = []
|
|
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
self._started = False
|
|
|
|
|
|
|
|
|
|
|
|
def _ensure_started(self):
|
|
|
|
|
|
"""懒启动:首次使用时启动 Worker 进程"""
|
|
|
|
|
|
if self._started:
|
|
|
|
|
|
return
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
if self._started:
|
|
|
|
|
|
return
|
|
|
|
|
|
self._start_workers()
|
|
|
|
|
|
self._started = True
|
|
|
|
|
|
|
|
|
|
|
|
def _start_workers(self):
|
|
|
|
|
|
"""启动所有 Worker 进程"""
|
|
|
|
|
|
from app.services.qgis.qgis_env import (
|
|
|
|
|
|
get_docker_container,
|
|
|
|
|
|
get_docker_project_dir,
|
|
|
|
|
|
get_container_python_path,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
container = get_docker_container()
|
|
|
|
|
|
project_dir = get_docker_project_dir()
|
|
|
|
|
|
python_path = get_container_python_path()
|
|
|
|
|
|
worker_script = f"{project_dir}/app/services/qgis/qgis_worker.py"
|
|
|
|
|
|
|
|
|
|
|
|
# Qt 平台设置
|
|
|
|
|
|
try:
|
|
|
|
|
|
from config import settings
|
|
|
|
|
|
qt_platform = getattr(settings, "QGIS_DOCKER_QT_PLATFORM", "offscreen")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
qt_platform = "offscreen"
|
|
|
|
|
|
|
|
|
|
|
|
for i in range(self._pool_size):
|
|
|
|
|
|
cmd = [
|
|
|
|
|
|
"docker", "exec", "-i",
|
|
|
|
|
|
"-w", project_dir,
|
|
|
|
|
|
"-e", f"QT_QPA_PLATFORM={qt_platform}",
|
|
|
|
|
|
container,
|
|
|
|
|
|
python_path, worker_script,
|
|
|
|
|
|
]
|
|
|
|
|
|
logger.info(f"[Pool] 启动 Worker-{i}: {' '.join(cmd)}")
|
|
|
|
|
|
|
|
|
|
|
|
proc = subprocess.Popen(
|
|
|
|
|
|
cmd,
|
|
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
|
|
text=True,
|
|
|
|
|
|
encoding="utf-8",
|
|
|
|
|
|
errors="replace",
|
|
|
|
|
|
)
|
|
|
|
|
|
worker = _WorkerHandle(i, proc)
|
|
|
|
|
|
self._workers.append(worker)
|
|
|
|
|
|
|
|
|
|
|
|
# 后台线程消费 stderr,防止管道满阻塞
|
|
|
|
|
|
t = threading.Thread(
|
|
|
|
|
|
target=self._drain_stderr, args=(worker,), daemon=True
|
|
|
|
|
|
)
|
|
|
|
|
|
t.start()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"[Pool] 已启动 {self._pool_size} 个 Worker")
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _drain_stderr(worker: _WorkerHandle):
|
|
|
|
|
|
"""持续读取 Worker stderr 防止管道缓冲区满"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
for line in worker.proc.stderr:
|
|
|
|
|
|
line = line.strip()
|
|
|
|
|
|
if line:
|
2026-06-24 14:16:23 +08:00
|
|
|
|
logger.info(f"[Worker-{worker.worker_id}] {line}")
|
2026-06-24 11:22:20 +08:00
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def _find_free_worker(self, timeout: float = 30) -> Optional[_WorkerHandle]:
|
|
|
|
|
|
"""找到一个空闲 Worker,超时返回 None"""
|
|
|
|
|
|
deadline = time.time() + timeout
|
|
|
|
|
|
while time.time() < deadline:
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
for w in self._workers:
|
|
|
|
|
|
if w.is_free():
|
|
|
|
|
|
return w
|
|
|
|
|
|
time.sleep(0.1)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def submit_job(
|
|
|
|
|
|
self,
|
|
|
|
|
|
config: dict,
|
|
|
|
|
|
models: list,
|
|
|
|
|
|
progress_callback: Callable[[dict], None] = None,
|
|
|
|
|
|
) -> Tuple[list, dict]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
提交一个批次任务,阻塞等待完成。
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
config: QGIS 配置字典
|
|
|
|
|
|
models: 模型参数列表
|
|
|
|
|
|
progress_callback: 每张图完成时的回调(接收 PROGRESS dict)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
(results_list, summary_dict)
|
|
|
|
|
|
"""
|
|
|
|
|
|
self._ensure_started()
|
|
|
|
|
|
|
|
|
|
|
|
worker = self._find_free_worker(timeout=60)
|
|
|
|
|
|
if worker is None:
|
|
|
|
|
|
raise RuntimeError("[Pool] 无可用 Worker(等待 60s 超时)")
|
|
|
|
|
|
|
|
|
|
|
|
worker.mark_busy()
|
|
|
|
|
|
try:
|
|
|
|
|
|
return self._run_job(worker, config, models, progress_callback)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
# 检查 Worker 是否还活着,死了就重启
|
|
|
|
|
|
if not worker.is_alive():
|
|
|
|
|
|
logger.error(f"[Pool] Worker-{worker.worker_id} 已死亡,重启...")
|
|
|
|
|
|
self._restart_worker(worker)
|
|
|
|
|
|
worker.mark_free()
|
|
|
|
|
|
|
|
|
|
|
|
def _run_job(
|
|
|
|
|
|
self,
|
|
|
|
|
|
worker: _WorkerHandle,
|
|
|
|
|
|
config: dict,
|
|
|
|
|
|
models: list,
|
|
|
|
|
|
progress_callback: Callable[[dict], None] = None,
|
|
|
|
|
|
) -> Tuple[list, dict]:
|
|
|
|
|
|
"""向指定 Worker 发送任务并读取结果"""
|
|
|
|
|
|
job = {"config": config, "models": models}
|
|
|
|
|
|
job_json = json.dumps(job, ensure_ascii=False) + "\n"
|
|
|
|
|
|
|
|
|
|
|
|
# 写入 stdin
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker.proc.stdin.write(job_json)
|
|
|
|
|
|
worker.proc.stdin.flush()
|
|
|
|
|
|
except (BrokenPipeError, OSError) as e:
|
|
|
|
|
|
raise RuntimeError(f"[Pool] 写入 Worker-{worker.worker_id} stdin 失败: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
# 读取输出(直到 JOB_DONE 或 FATAL)
|
|
|
|
|
|
results = []
|
|
|
|
|
|
summary = {}
|
|
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
|
line = worker.proc.stdout.readline()
|
|
|
|
|
|
if not line:
|
|
|
|
|
|
# stdout 关闭,Worker 可能已死亡
|
|
|
|
|
|
logger.error(f"[Pool] Worker-{worker.worker_id} stdout 关闭")
|
|
|
|
|
|
summary = {"error": "Worker stdout 关闭"}
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
line = line.strip()
|
|
|
|
|
|
if not line:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if line.startswith("PROGRESS:"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
r = json.loads(line[len("PROGRESS:"):])
|
|
|
|
|
|
results.append(r)
|
|
|
|
|
|
if progress_callback:
|
|
|
|
|
|
progress_callback(r)
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
logger.warning(f"[Pool] Worker-{worker.worker_id} PROGRESS 解析失败: {line[:200]}")
|
|
|
|
|
|
|
|
|
|
|
|
elif line.startswith("JOB_DONE:"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
summary = json.loads(line[len("JOB_DONE:"):])
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
summary = {"error": f"JOB_DONE 解析失败: {line[:200]}"}
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
elif line.startswith("FATAL:"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
summary = json.loads(line[len("FATAL:"):])
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
summary = {"error": f"FATAL: {line[:200]}"}
|
|
|
|
|
|
logger.error(f"[Pool] Worker-{worker.worker_id} FATAL: {summary}")
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.debug(f"[Pool] Worker-{worker.worker_id} 未知输出: {line[:200]}")
|
|
|
|
|
|
|
|
|
|
|
|
return results, summary
|
|
|
|
|
|
|
|
|
|
|
|
def _restart_worker(self, dead_worker: _WorkerHandle):
|
|
|
|
|
|
"""重启一个死亡的 Worker"""
|
|
|
|
|
|
from app.services.qgis.qgis_env import (
|
|
|
|
|
|
get_docker_container,
|
|
|
|
|
|
get_docker_project_dir,
|
|
|
|
|
|
get_container_python_path,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
container = get_docker_container()
|
|
|
|
|
|
project_dir = get_docker_project_dir()
|
|
|
|
|
|
python_path = get_container_python_path()
|
|
|
|
|
|
worker_script = f"{project_dir}/app/services/qgis/qgis_worker.py"
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from config import settings
|
|
|
|
|
|
qt_platform = getattr(settings, "QGIS_DOCKER_QT_PLATFORM", "offscreen")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
qt_platform = "offscreen"
|
|
|
|
|
|
|
|
|
|
|
|
cmd = [
|
|
|
|
|
|
"docker", "exec", "-i",
|
|
|
|
|
|
"-w", project_dir,
|
|
|
|
|
|
"-e", f"QT_QPA_PLATFORM={qt_platform}",
|
|
|
|
|
|
container,
|
|
|
|
|
|
python_path, worker_script,
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
proc = subprocess.Popen(
|
|
|
|
|
|
cmd,
|
|
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
|
|
text=True,
|
|
|
|
|
|
encoding="utf-8",
|
|
|
|
|
|
errors="replace",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
dead_worker.proc = proc
|
|
|
|
|
|
dead_worker.busy = False
|
|
|
|
|
|
|
|
|
|
|
|
# 重启 stderr 消费线程
|
|
|
|
|
|
t = threading.Thread(
|
|
|
|
|
|
target=self._drain_stderr, args=(dead_worker,), daemon=True
|
|
|
|
|
|
)
|
|
|
|
|
|
t.start()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"[Pool] Worker-{dead_worker.worker_id} 已重启")
|
|
|
|
|
|
|
|
|
|
|
|
def shutdown(self):
|
|
|
|
|
|
"""关闭所有 Worker"""
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
for w in self._workers:
|
|
|
|
|
|
if w.proc.poll() is None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
w.proc.stdin.close()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
w.proc.terminate()
|
|
|
|
|
|
w.proc.wait(timeout=5)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
|
|
|
w.proc.kill()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
self._workers.clear()
|
|
|
|
|
|
self._started = False
|
|
|
|
|
|
logger.info("[Pool] 已关闭")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 全局单例
|
|
|
|
|
|
qgis_pool = QgisPool()
|