Files

220 lines
8.0 KiB
YAML
Raw Permalink Normal View History

# 离散化规则配置
# 定义所有连续因子的分箱规则
# 包含暴雨灾害链和地震灾害链的全部因子
2026-06-06 11:10:22 +08:00
#
2026-06-12 09:45:35 +08:00
# 2026-06-11: 基于1365个样本(796隐患点+569风险点)的实际数据分布
# 连续因子采用分位数分箱(等频分箱),分类因子基于实际编码映射
# ============================================
2026-06-06 11:10:22 +08:00
# 暴雨触发层离散化规则(保持气象标准不变)
# ============================================
rain_intensity:
description: "降雨强度等级"
unit: "mm/h"
2026-06-06 11:10:22 +08:00
# 中国气象局降雨等级标准
bins: [0, 0.2, 5, 15, 30, 70, 140, 99999]
labels: [no_rain, light, moderate, heavy, storm, downpour, extreme]
duration:
description: "持续时间"
unit: "h"
bins: [1, 3, 12, 99999]
labels: [short, medium, long]
accum_rain:
description: "累计降雨量"
unit: "mm"
2026-06-06 11:10:22 +08:00
# 中国气象局降水等级标准
bins: [0, 10, 25, 50, 100, 99999]
labels: [trace, light, moderate, heavy, extreme]
# ============================================
# 地震触发层离散化规则
# ============================================
magnitude:
description: "地震震级"
unit: "Richter"
# 基于Keefer (1984) 地震触发地质灾害的震级阈值
bins: [0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0]
labels: [minor, light, moderate, strong, major, great]
epicenter_distance:
description: "震中距"
unit: "km"
2026-06-06 11:10:22 +08:00
# 地震地质灾害影响范围(Keefer 1984)
bins: [0, 30, 100, 300, 99999]
labels: [very_near, near, moderate, far]
seismic_intensity:
description: "地震烈度"
unit: "中国烈度表"
# GB 18306-2015 中国地震动参数区划图
bins: [0, 5, 7, 9, 12, 99]
labels: [minor, light, moderate, severe, extreme]
# ============================================
# 环境层离散化规则(暴雨/地震共享)
2026-06-06 11:10:22 +08:00
# 基于1201个样本的分位数分箱
# ============================================
elevation:
description: "高程"
unit: "m"
2026-06-12 09:45:35 +08:00
# 数据: [354, 1926], 均值789.36±325.64
# 分位数: [354, 482, 637, 817, 1074, 1926]
bins: [354, 482, 637, 817, 1074, 1926]
2026-06-06 11:10:22 +08:00
labels: [very_low, low, medium, high, very_high]
slope:
description: "坡度"
unit: "度"
2026-06-12 09:45:35 +08:00
# 数据: [0.0, 47.0], 均值10.46±9.12
# 分位数: [0.0, 2.0, 6.0, 11.0, 17.0, 47.0]
bins: [0.0, 2.0, 6.0, 11.0, 17.0, 47.0]
2026-06-06 11:10:22 +08:00
labels: [very_low, low, medium, high, very_high]
aspect:
description: "坡向"
unit: "度"
2026-06-06 11:10:22 +08:00
# 数据: [0.86, 359.12], 均值173.47±104.96, 偏度0.126
# 分位数: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
bins: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
labels: [flat, north, east, south, west]
soil_type:
description: "土壤分类(中国土壤分类系统)"
2026-06-12 09:45:35 +08:00
unit: "3位数编码"
# 中国土壤分类系统25个亚类,本数据库出现8种
# 编码规则:1xx淋溶土、2xx钙层/均腐土、4xx初育土
mapping:
2026-06-12 09:45:35 +08:00
110: brown_soil # 棕壤(淋溶土,秦岭北麓山地)
120: brown_soil # 暗棕壤(淋溶土,高海拔山地)
130: yellow_brown # 黄棕壤(淋溶土,暖温带过渡区)
150: yellow_brown # 黄褐土(淋溶土,黄土塬区)
210: cinnamon # 褐土(钙层土,黄土塬区主要旱作土壤)
240: black_lu # 黑垆土(均腐土,古土壤残余)
410: alluvial # 新积土(初育土,渭河冲积平原)
420: aeolian # 风沙土(初育土,风积沙质土壤)
255: unknown # GIS背景值(1个样本)
default: cinnamon # 褐土占比最大(38.8%),作为默认值
lithology:
description: "岩性(中国地质分类)"
unit: "分类代码"
mapping:
2026-06-06 11:10:22 +08:00
1: acid_rock # 酸性侵入岩(花岗岩等,SiO₂>66%)
3: basic_rock # 基性侵入岩(辉长岩等,SiO₂ 45-52%)
4: basic_rock # 基性火山岩(玄武岩等,合并入基性岩)
5: carbonate # 碳酸盐岩(石灰岩、白云岩)
10: metamorphic # 变质岩(片麻岩、大理岩)
11: mixed_clastic # 混合碎屑沉积岩(砂岩+泥岩互层)
13: terrigenous # 陆源碎屑岩(砂岩、粉砂岩)
14: unconsolidated # 松散堆积物(黄土、冲洪积)
2026-06-12 09:45:35 +08:00
255: unknown # 无数据(GIS栅格背景值)
default: unconsolidated
landuse:
description: "土地利用类型"
unit: "分类代码"
mapping:
2026-06-12 09:45:35 +08:00
1: forest # 林地(GIS栅格编码1
2: farmland # 农田(GIS栅格编码2
3: urban # 城市(GIS栅格编码3
4: water # 水域(GIS栅格编码4
5: barren # 裸地(GIS栅格编码5
8: farmland # 耕地(GIS栅格编码8,合并入农田)
default: farmland
terrain:
description: "地形分类(中国地形分类体系)"
unit: "分类代码"
mapping:
2026-06-06 11:10:22 +08:00
1: mountain # 断裂山麓地带(秦岭北麓)
2: plain # 平坦平原(渭河平原)
3: deep_valley # 高山深峡谷(秦岭腹地)
4: hill # 丘陵(黄土塬)
5: gentle_hill # 低缓丘陵(塬边过渡带)
6: low_mountain # 低山(骊山等)
7: flat_plain # 平缓平原(冲积平原)
2026-06-12 09:45:35 +08:00
255: unknown # 无数据(GIS栅格背景值)
default: hill
impervious:
2026-06-06 11:10:22 +08:00
description: "不透水率"
2026-06-12 09:45:35 +08:00
unit: "小数比例(0-1"
# 数据: [0.0, 1.0], 均值0.31±0.46
# 68.9%为0.0(无硬化地表),非零值右偏分布
# 分箱策略:0单独一类,其余4等分
bins: [0.0, 0.01, 0.25, 0.50, 0.75, 1.0]
2026-06-06 11:10:22 +08:00
labels: [none, very_low, low, medium, high]
ndvi:
description: "植被指数"
2026-06-12 09:45:35 +08:00
unit: "NDVI值(×1000缩放)"
# 数据: [-1.0, 5336.0], 均值2045.95±689.47
# 分位数: [-1.0, 1616.2, 1891.0, 2172.0, 2496.0, 5336.0]
bins: [-1.0, 1616.0, 1891.0, 2172.0, 2496.0, 5336.0]
2026-06-06 11:10:22 +08:00
labels: [very_low, low, medium, high, very_high]
sand_content:
description: "土壤含沙量"
unit: "百分比"
2026-06-12 09:45:35 +08:00
# 数据: [23.0, 255.0], 均值35.14±7.75
# 255为异常值(缺失值编码),正常范围[23, 52]
# 分位数(正常值): [23.0, 31.0, 34.0, 35.0, 38.0, 52.0]
bins: [23.0, 31.0, 34.0, 35.0, 38.0, 255.0]
2026-06-06 11:10:22 +08:00
labels: [very_low, low, medium, high, very_high]
ph:
description: "土壤PH值"
2026-06-12 09:45:35 +08:00
unit: "PH值(×10缩放,如71=7.1"
# 数据: [60.0, 255.0], 均值71.82±6.91
# 255为异常值(缺失值编码),正常范围[59, 81]
# 分位数(正常值): [60.0, 67.0, 71.0, 74.0, 76.0, 81.0]
bins: [60.0, 67.0, 71.0, 74.0, 76.0, 255.0]
2026-06-06 11:10:22 +08:00
labels: [very_low, low, medium, high, very_high]
soil_moisture:
description: "土壤湿度"
2026-06-12 09:45:35 +08:00
unit: "小数比例(0-1"
# 数据: [-1.0, 0.28], 均值0.15±0.08, 约10%为-1(缺失值)
# 正常值范围: [0.0, 0.28]
# 分位数(正常值): [0.0, 0.12, 0.14, 0.16, 0.19, 0.28]
# 分箱策略:-1视为缺失/极端干燥,其余4等分
bins: [-1.0, 0.0, 0.10, 0.14, 0.18, 0.28]
labels: [very_low, low, medium, high, very_high]
organic_carbon:
description: "有机碳"
unit: "百分比"
2026-06-12 09:45:35 +08:00
# 数据: [0.0, 65.0], 均值39.03±19.13
# 分位数: [0.0, 34.0, 42.0, 48.0, 53.0, 65.0]
bins: [0.0, 34.0, 42.0, 48.0, 53.0, 65.0]
2026-06-06 11:10:22 +08:00
labels: [very_low, low, medium, high, very_high]
dist_to_river:
description: "距离河道距离"
unit: "米"
2026-06-12 09:45:35 +08:00
# 数据: [12.21, 29968.26], 均值11378.07±6704.59
# 分位数: [12.21, 5409.3, 9522.82, 12667.75, 16952.46, 29968.26]
bins: [12.21, 5409.3, 9522.82, 12667.75, 16952.46, 29968.26]
2026-06-06 11:10:22 +08:00
labels: [very_close, close, moderate, far, very_far]
dist_to_fault:
description: "距离断裂带距离"
unit: "米"
2026-06-12 09:45:35 +08:00
# 数据: [1.72, 14685.31], 均值3527.70±3400.55
# 分位数: [1.72, 515.98, 1451.71, 3577.36, 6545.45, 14685.31]
bins: [1.72, 515.98, 1451.71, 3577.36, 6545.45, 14685.31]
2026-06-06 11:10:22 +08:00
labels: [very_close, close, moderate, far, very_far]
pipe_density:
description: "供水管网密度"
unit: "m/m²"
2026-06-12 09:45:35 +08:00
# 数据: [0.0, 0.07], 约83.9%为0.0,非零值分位数[0.000438, 0.007136, 0.015399, 0.024523, 0.065431]
# 分箱策略:0单独一类,其余3等分
bins: [0.0, 0.001, 0.010, 0.025, 0.065]
2026-06-06 11:10:22 +08:00
labels: [none, low, medium, high]