重构DBN模型
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
# 定义所有连续因子的分箱规则
|
||||
# 包含暴雨灾害链和地震灾害链的全部因子
|
||||
#
|
||||
# 2026-06-06: 基于1201个样本的实际数据分布,采用分位数分箱(等频分箱)
|
||||
# 替代原有等宽分箱,使每个区间样本量更均匀
|
||||
# 2026-06-11: 基于1365个样本(796隐患点+569风险点)的实际数据分布
|
||||
# 连续因子采用分位数分箱(等频分箱),分类因子基于实际编码映射
|
||||
|
||||
# ============================================
|
||||
# 暴雨触发层离散化规则(保持气象标准不变)
|
||||
@@ -62,17 +62,17 @@ seismic_intensity:
|
||||
elevation:
|
||||
description: "高程"
|
||||
unit: "m"
|
||||
# 数据: [356, 1934], 均值764.3±317.89, 偏度0.973
|
||||
# 分位数: [356, 470, 624, 792, 1016, 1934]
|
||||
bins: [356, 470, 624, 792, 1016, 1934]
|
||||
# 数据: [354, 1926], 均值789.36±325.64
|
||||
# 分位数: [354, 482, 637, 817, 1074, 1926]
|
||||
bins: [354, 482, 637, 817, 1074, 1926]
|
||||
labels: [very_low, low, medium, high, very_high]
|
||||
|
||||
slope:
|
||||
description: "坡度"
|
||||
unit: "度"
|
||||
# 数据: [0.11, 47.14], 均值9.42±8.57, 偏度1.433
|
||||
# 分位数: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14]
|
||||
bins: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14]
|
||||
# 数据: [0.0, 47.0], 均值10.46±9.12
|
||||
# 分位数: [0.0, 2.0, 6.0, 11.0, 17.0, 47.0]
|
||||
bins: [0.0, 2.0, 6.0, 11.0, 17.0, 47.0]
|
||||
labels: [very_low, low, medium, high, very_high]
|
||||
|
||||
aspect:
|
||||
@@ -85,13 +85,20 @@ aspect:
|
||||
|
||||
soil_type:
|
||||
description: "土壤分类(中国土壤分类系统)"
|
||||
unit: "分类代码"
|
||||
unit: "3位数编码"
|
||||
# 中国土壤分类系统25个亚类,本数据库出现8种
|
||||
# 编码规则:1xx淋溶土、2xx钙层/均腐土、4xx初育土
|
||||
mapping:
|
||||
0: ultisol # 老成土
|
||||
6: entisol # 初育土
|
||||
11: fluvo_aquic # 潮土
|
||||
18: yellow_brown # 黄棕壤
|
||||
default: entisol
|
||||
110: brown_soil # 棕壤(淋溶土,秦岭北麓山地)
|
||||
120: brown_soil # 暗棕壤(淋溶土,高海拔山地)
|
||||
130: yellow_brown # 黄棕壤(淋溶土,暖温带过渡区)
|
||||
150: yellow_brown # 黄褐土(淋溶土,黄土塬区)
|
||||
210: cinnamon # 褐土(钙层土,黄土塬区主要旱作土壤)
|
||||
240: black_lu # 黑垆土(均腐土,古土壤残余)
|
||||
410: alluvial # 新积土(初育土,渭河冲积平原)
|
||||
420: aeolian # 风沙土(初育土,风积沙质土壤)
|
||||
255: unknown # GIS背景值(1个样本)
|
||||
default: cinnamon # 褐土占比最大(38.8%),作为默认值
|
||||
|
||||
lithology:
|
||||
description: "岩性(中国地质分类)"
|
||||
@@ -105,18 +112,19 @@ lithology:
|
||||
11: mixed_clastic # 混合碎屑沉积岩(砂岩+泥岩互层)
|
||||
13: terrigenous # 陆源碎屑岩(砂岩、粉砂岩)
|
||||
14: unconsolidated # 松散堆积物(黄土、冲洪积)
|
||||
255: unknown # 无数据(GIS栅格背景值)
|
||||
default: unconsolidated
|
||||
|
||||
landuse:
|
||||
description: "土地利用类型"
|
||||
unit: "分类代码"
|
||||
mapping:
|
||||
10: forest # 林地
|
||||
30: farmland # 农田
|
||||
40: urban # 城市
|
||||
50: water # 水域
|
||||
60: barren # 裸地
|
||||
80: farmland # 耕地(合并入农田)
|
||||
1: forest # 林地(GIS栅格编码1)
|
||||
2: farmland # 农田(GIS栅格编码2)
|
||||
3: urban # 城市(GIS栅格编码3)
|
||||
4: water # 水域(GIS栅格编码4)
|
||||
5: barren # 裸地(GIS栅格编码5)
|
||||
8: farmland # 耕地(GIS栅格编码8,合并入农田)
|
||||
default: farmland
|
||||
|
||||
terrain:
|
||||
@@ -130,80 +138,82 @@ terrain:
|
||||
5: gentle_hill # 低缓丘陵(塬边过渡带)
|
||||
6: low_mountain # 低山(骊山等)
|
||||
7: flat_plain # 平缓平原(冲积平原)
|
||||
255: unknown # 无数据(GIS栅格背景值)
|
||||
default: hill
|
||||
|
||||
impervious:
|
||||
description: "不透水率"
|
||||
unit: "百分比"
|
||||
# 数据: [0.0, 97.2], 均值16.40±25.99, 偏度1.787
|
||||
# 26.9%为0.0(无硬化地表),非零值右偏分布
|
||||
# 分箱策略:0单独一类,其余4等分(分位数分箱)
|
||||
# 分位数(非零): [2.0, 9.95, 31.8, 97.2]
|
||||
bins: [0.0, 0.01, 2.0, 10.0, 32.0, 97.2]
|
||||
unit: "小数比例(0-1)"
|
||||
# 数据: [0.0, 1.0], 均值0.31±0.46
|
||||
# 68.9%为0.0(无硬化地表),非零值右偏分布
|
||||
# 分箱策略:0单独一类,其余4等分
|
||||
bins: [0.0, 0.01, 0.25, 0.50, 0.75, 1.0]
|
||||
labels: [none, very_low, low, medium, high]
|
||||
|
||||
ndvi:
|
||||
description: "植被指数"
|
||||
unit: "NDVI值"
|
||||
# 数据: [1.25, 38.68], 均值20.67±5.87, 偏度-0.106
|
||||
# 分位数: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68]
|
||||
bins: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68]
|
||||
unit: "NDVI值(×1000缩放)"
|
||||
# 数据: [-1.0, 5336.0], 均值2045.95±689.47
|
||||
# 分位数: [-1.0, 1616.2, 1891.0, 2172.0, 2496.0, 5336.0]
|
||||
bins: [-1.0, 1616.0, 1891.0, 2172.0, 2496.0, 5336.0]
|
||||
labels: [very_low, low, medium, high, very_high]
|
||||
|
||||
sand_content:
|
||||
description: "土壤含沙量"
|
||||
unit: "百分比"
|
||||
# 数据: [23.0, 52.0], 均值34.43±4.29, 偏度0.538
|
||||
# 分位数: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0]
|
||||
bins: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0]
|
||||
# 数据: [23.0, 255.0], 均值35.14±7.75
|
||||
# 255为异常值(缺失值编码),正常范围[23, 52]
|
||||
# 分位数(正常值): [23.0, 31.0, 34.0, 35.0, 38.0, 52.0]
|
||||
bins: [23.0, 31.0, 34.0, 35.0, 38.0, 255.0]
|
||||
labels: [very_low, low, medium, high, very_high]
|
||||
|
||||
ph:
|
||||
description: "土壤PH值"
|
||||
unit: "PH值"
|
||||
# 数据: [59.0, 81.0], 均值71.79±4.14, 偏度-0.398
|
||||
# 分位数: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0]
|
||||
bins: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0]
|
||||
unit: "PH值(×10缩放,如71=7.1)"
|
||||
# 数据: [60.0, 255.0], 均值71.82±6.91
|
||||
# 255为异常值(缺失值编码),正常范围[59, 81]
|
||||
# 分位数(正常值): [60.0, 67.0, 71.0, 74.0, 76.0, 81.0]
|
||||
bins: [60.0, 67.0, 71.0, 74.0, 76.0, 255.0]
|
||||
labels: [very_low, low, medium, high, very_high]
|
||||
|
||||
soil_moisture:
|
||||
description: "土壤湿度"
|
||||
unit: "百分比"
|
||||
# 数据: [0.0, 41.1], 均值32.02±14.92, 偏度-1.676
|
||||
# 约10%为0.0(缺失/极端干燥),其余集中在37-41
|
||||
# 分位数: [0.0, 37.7, 38.6, 38.9, 39.4, 41.1]
|
||||
bins: [0.0, 37.0, 38.5, 39.5, 41.1]
|
||||
labels: [very_low, low, medium, high]
|
||||
unit: "小数比例(0-1)"
|
||||
# 数据: [-1.0, 0.28], 均值0.15±0.08, 约10%为-1(缺失值)
|
||||
# 正常值范围: [0.0, 0.28]
|
||||
# 分位数(正常值): [0.0, 0.12, 0.14, 0.16, 0.19, 0.28]
|
||||
# 分箱策略:-1视为缺失/极端干燥,其余4等分
|
||||
bins: [-1.0, 0.0, 0.10, 0.14, 0.18, 0.28]
|
||||
labels: [very_low, low, medium, high, very_high]
|
||||
|
||||
organic_carbon:
|
||||
description: "有机碳"
|
||||
unit: "百分比"
|
||||
# 数据: [0.0, 73.0], 均值38.36±19.14, 偏度-1.187
|
||||
# 分位数: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0]
|
||||
bins: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0]
|
||||
# 数据: [0.0, 65.0], 均值39.03±19.13
|
||||
# 分位数: [0.0, 34.0, 42.0, 48.0, 53.0, 65.0]
|
||||
bins: [0.0, 34.0, 42.0, 48.0, 53.0, 65.0]
|
||||
labels: [very_low, low, medium, high, very_high]
|
||||
|
||||
dist_to_river:
|
||||
description: "距离河道距离"
|
||||
unit: "米"
|
||||
# 数据: [12.21, 29904.99], 均值11003.92±6582.23, 偏度0.271
|
||||
# 分位数: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99]
|
||||
bins: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99]
|
||||
# 数据: [12.21, 29968.26], 均值11378.07±6704.59
|
||||
# 分位数: [12.21, 5409.3, 9522.82, 12667.75, 16952.46, 29968.26]
|
||||
bins: [12.21, 5409.3, 9522.82, 12667.75, 16952.46, 29968.26]
|
||||
labels: [very_close, close, moderate, far, very_far]
|
||||
|
||||
dist_to_fault:
|
||||
description: "距离断裂带距离"
|
||||
unit: "米"
|
||||
# 数据: [1.74, 14542.53], 均值3448.52±3406.56, 偏度1.055
|
||||
# 分位数: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53]
|
||||
bins: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53]
|
||||
# 数据: [1.72, 14685.31], 均值3527.70±3400.55
|
||||
# 分位数: [1.72, 515.98, 1451.71, 3577.36, 6545.45, 14685.31]
|
||||
bins: [1.72, 515.98, 1451.71, 3577.36, 6545.45, 14685.31]
|
||||
labels: [very_close, close, moderate, far, very_far]
|
||||
|
||||
pipe_density:
|
||||
description: "供水管网密度"
|
||||
unit: "m/m²"
|
||||
# 数据: [0.0, 0.07], 约80%为0.0,90%分位数0.013,95%分位数0.023
|
||||
# 分箱策略:0单独一类,其余3等分(分位数分箱)
|
||||
# 分位数(非零): [0.013, 0.023, 0.065]
|
||||
bins: [0.0, 0.001, 0.013, 0.023, 0.065]
|
||||
# 数据: [0.0, 0.07], 约83.9%为0.0,非零值分位数[0.000438, 0.007136, 0.015399, 0.024523, 0.065431]
|
||||
# 分箱策略:0单独一类,其余3等分
|
||||
bins: [0.0, 0.001, 0.010, 0.025, 0.065]
|
||||
labels: [none, low, medium, high]
|
||||
|
||||
Reference in New Issue
Block a user