Files
xian_algorithm_new/app/config/dbn/discretization.yaml
T
wzy-warehouse 118dbd18cf 重构DBN模型
2026-06-12 09:45:35 +08:00

220 lines
8.0 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 离散化规则配置
# 定义所有连续因子的分箱规则
# 包含暴雨灾害链和地震灾害链的全部因子
#
# 2026-06-11: 基于1365个样本(796隐患点+569风险点)的实际数据分布
# 连续因子采用分位数分箱(等频分箱),分类因子基于实际编码映射
# ============================================
# 暴雨触发层离散化规则(保持气象标准不变)
# ============================================
rain_intensity:
description: "降雨强度等级"
unit: "mm/h"
# 中国气象局降雨等级标准
bins: [0, 0.2, 5, 15, 30, 70, 140, 99999]
labels: [no_rain, light, moderate, heavy, storm, downpour, extreme]
duration:
description: "持续时间"
unit: "h"
bins: [1, 3, 12, 99999]
labels: [short, medium, long]
accum_rain:
description: "累计降雨量"
unit: "mm"
# 中国气象局降水等级标准
bins: [0, 10, 25, 50, 100, 99999]
labels: [trace, light, moderate, heavy, extreme]
# ============================================
# 地震触发层离散化规则
# ============================================
magnitude:
description: "地震震级"
unit: "Richter"
# 基于Keefer (1984) 地震触发地质灾害的震级阈值
bins: [0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0]
labels: [minor, light, moderate, strong, major, great]
epicenter_distance:
description: "震中距"
unit: "km"
# 地震地质灾害影响范围(Keefer 1984)
bins: [0, 30, 100, 300, 99999]
labels: [very_near, near, moderate, far]
seismic_intensity:
description: "地震烈度"
unit: "中国烈度表"
# GB 18306-2015 中国地震动参数区划图
bins: [0, 5, 7, 9, 12, 99]
labels: [minor, light, moderate, severe, extreme]
# ============================================
# 环境层离散化规则(暴雨/地震共享)
# 基于1201个样本的分位数分箱
# ============================================
elevation:
description: "高程"
unit: "m"
# 数据: [354, 1926], 均值789.36±325.64
# 分位数: [354, 482, 637, 817, 1074, 1926]
bins: [354, 482, 637, 817, 1074, 1926]
labels: [very_low, low, medium, high, very_high]
slope:
description: "坡度"
unit: "度"
# 数据: [0.0, 47.0], 均值10.46±9.12
# 分位数: [0.0, 2.0, 6.0, 11.0, 17.0, 47.0]
bins: [0.0, 2.0, 6.0, 11.0, 17.0, 47.0]
labels: [very_low, low, medium, high, very_high]
aspect:
description: "坡向"
unit: "度"
# 数据: [0.86, 359.12], 均值173.47±104.96, 偏度0.126
# 分位数: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
bins: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
labels: [flat, north, east, south, west]
soil_type:
description: "土壤分类(中国土壤分类系统)"
unit: "3位数编码"
# 中国土壤分类系统25个亚类,本数据库出现8种
# 编码规则:1xx淋溶土、2xx钙层/均腐土、4xx初育土
mapping:
110: brown_soil # 棕壤(淋溶土,秦岭北麓山地)
120: brown_soil # 暗棕壤(淋溶土,高海拔山地)
130: yellow_brown # 黄棕壤(淋溶土,暖温带过渡区)
150: yellow_brown # 黄褐土(淋溶土,黄土塬区)
210: cinnamon # 褐土(钙层土,黄土塬区主要旱作土壤)
240: black_lu # 黑垆土(均腐土,古土壤残余)
410: alluvial # 新积土(初育土,渭河冲积平原)
420: aeolian # 风沙土(初育土,风积沙质土壤)
255: unknown # GIS背景值(1个样本)
default: cinnamon # 褐土占比最大(38.8%),作为默认值
lithology:
description: "岩性(中国地质分类)"
unit: "分类代码"
mapping:
1: acid_rock # 酸性侵入岩(花岗岩等,SiO₂>66%)
3: basic_rock # 基性侵入岩(辉长岩等,SiO₂ 45-52%)
4: basic_rock # 基性火山岩(玄武岩等,合并入基性岩)
5: carbonate # 碳酸盐岩(石灰岩、白云岩)
10: metamorphic # 变质岩(片麻岩、大理岩)
11: mixed_clastic # 混合碎屑沉积岩(砂岩+泥岩互层)
13: terrigenous # 陆源碎屑岩(砂岩、粉砂岩)
14: unconsolidated # 松散堆积物(黄土、冲洪积)
255: unknown # 无数据(GIS栅格背景值)
default: unconsolidated
landuse:
description: "土地利用类型"
unit: "分类代码"
mapping:
1: forest # 林地(GIS栅格编码1
2: farmland # 农田(GIS栅格编码2
3: urban # 城市(GIS栅格编码3
4: water # 水域(GIS栅格编码4
5: barren # 裸地(GIS栅格编码5
8: farmland # 耕地(GIS栅格编码8,合并入农田)
default: farmland
terrain:
description: "地形分类(中国地形分类体系)"
unit: "分类代码"
mapping:
1: mountain # 断裂山麓地带(秦岭北麓)
2: plain # 平坦平原(渭河平原)
3: deep_valley # 高山深峡谷(秦岭腹地)
4: hill # 丘陵(黄土塬)
5: gentle_hill # 低缓丘陵(塬边过渡带)
6: low_mountain # 低山(骊山等)
7: flat_plain # 平缓平原(冲积平原)
255: unknown # 无数据(GIS栅格背景值)
default: hill
impervious:
description: "不透水率"
unit: "小数比例(0-1"
# 数据: [0.0, 1.0], 均值0.31±0.46
# 68.9%为0.0(无硬化地表),非零值右偏分布
# 分箱策略:0单独一类,其余4等分
bins: [0.0, 0.01, 0.25, 0.50, 0.75, 1.0]
labels: [none, very_low, low, medium, high]
ndvi:
description: "植被指数"
unit: "NDVI值(×1000缩放)"
# 数据: [-1.0, 5336.0], 均值2045.95±689.47
# 分位数: [-1.0, 1616.2, 1891.0, 2172.0, 2496.0, 5336.0]
bins: [-1.0, 1616.0, 1891.0, 2172.0, 2496.0, 5336.0]
labels: [very_low, low, medium, high, very_high]
sand_content:
description: "土壤含沙量"
unit: "百分比"
# 数据: [23.0, 255.0], 均值35.14±7.75
# 255为异常值(缺失值编码),正常范围[23, 52]
# 分位数(正常值): [23.0, 31.0, 34.0, 35.0, 38.0, 52.0]
bins: [23.0, 31.0, 34.0, 35.0, 38.0, 255.0]
labels: [very_low, low, medium, high, very_high]
ph:
description: "土壤PH值"
unit: "PH值(×10缩放,如71=7.1"
# 数据: [60.0, 255.0], 均值71.82±6.91
# 255为异常值(缺失值编码),正常范围[59, 81]
# 分位数(正常值): [60.0, 67.0, 71.0, 74.0, 76.0, 81.0]
bins: [60.0, 67.0, 71.0, 74.0, 76.0, 255.0]
labels: [very_low, low, medium, high, very_high]
soil_moisture:
description: "土壤湿度"
unit: "小数比例(0-1"
# 数据: [-1.0, 0.28], 均值0.15±0.08, 约10%为-1(缺失值)
# 正常值范围: [0.0, 0.28]
# 分位数(正常值): [0.0, 0.12, 0.14, 0.16, 0.19, 0.28]
# 分箱策略:-1视为缺失/极端干燥,其余4等分
bins: [-1.0, 0.0, 0.10, 0.14, 0.18, 0.28]
labels: [very_low, low, medium, high, very_high]
organic_carbon:
description: "有机碳"
unit: "百分比"
# 数据: [0.0, 65.0], 均值39.03±19.13
# 分位数: [0.0, 34.0, 42.0, 48.0, 53.0, 65.0]
bins: [0.0, 34.0, 42.0, 48.0, 53.0, 65.0]
labels: [very_low, low, medium, high, very_high]
dist_to_river:
description: "距离河道距离"
unit: "米"
# 数据: [12.21, 29968.26], 均值11378.07±6704.59
# 分位数: [12.21, 5409.3, 9522.82, 12667.75, 16952.46, 29968.26]
bins: [12.21, 5409.3, 9522.82, 12667.75, 16952.46, 29968.26]
labels: [very_close, close, moderate, far, very_far]
dist_to_fault:
description: "距离断裂带距离"
unit: "米"
# 数据: [1.72, 14685.31], 均值3527.70±3400.55
# 分位数: [1.72, 515.98, 1451.71, 3577.36, 6545.45, 14685.31]
bins: [1.72, 515.98, 1451.71, 3577.36, 6545.45, 14685.31]
labels: [very_close, close, moderate, far, very_far]
pipe_density:
description: "供水管网密度"
unit: "m/m²"
# 数据: [0.0, 0.07], 约83.9%为0.0,非零值分位数[0.000438, 0.007136, 0.015399, 0.024523, 0.065431]
# 分箱策略:0单独一类,其余3等分
bins: [0.0, 0.001, 0.010, 0.025, 0.065]
labels: [none, low, medium, high]