Files
xian_algorithm_new/app/config/dbn/discretization.yaml
T
wzy-warehouse 9c3b0575d2 优化参数
2026-06-06 11:10:22 +08:00

209 lines
6.9 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 离散化规则配置
# 定义所有连续因子的分箱规则
# 包含暴雨灾害链和地震灾害链的全部因子
#
# 2026-06-06: 基于1201个样本的实际数据分布,采用分位数分箱(等频分箱)
# 替代原有等宽分箱,使每个区间样本量更均匀
# ============================================
# 暴雨触发层离散化规则(保持气象标准不变)
# ============================================
rain_intensity:
description: "降雨强度等级"
unit: "mm/h"
# 中国气象局降雨等级标准
bins: [0, 0.2, 5, 15, 30, 70, 140, 99999]
labels: [no_rain, light, moderate, heavy, storm, downpour, extreme]
duration:
description: "持续时间"
unit: "h"
bins: [1, 3, 12, 99999]
labels: [short, medium, long]
accum_rain:
description: "累计降雨量"
unit: "mm"
# 中国气象局降水等级标准
bins: [0, 10, 25, 50, 100, 99999]
labels: [trace, light, moderate, heavy, extreme]
# ============================================
# 地震触发层离散化规则
# ============================================
magnitude:
description: "地震震级"
unit: "Richter"
# 基于Keefer (1984) 地震触发地质灾害的震级阈值
bins: [0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0]
labels: [minor, light, moderate, strong, major, great]
epicenter_distance:
description: "震中距"
unit: "km"
# 地震地质灾害影响范围(Keefer 1984)
bins: [0, 30, 100, 300, 99999]
labels: [very_near, near, moderate, far]
seismic_intensity:
description: "地震烈度"
unit: "中国烈度表"
# GB 18306-2015 中国地震动参数区划图
bins: [0, 5, 7, 9, 12, 99]
labels: [minor, light, moderate, severe, extreme]
# ============================================
# 环境层离散化规则(暴雨/地震共享)
# 基于1201个样本的分位数分箱
# ============================================
elevation:
description: "高程"
unit: "m"
# 数据: [356, 1934], 均值764.3±317.89, 偏度0.973
# 分位数: [356, 470, 624, 792, 1016, 1934]
bins: [356, 470, 624, 792, 1016, 1934]
labels: [very_low, low, medium, high, very_high]
slope:
description: "坡度"
unit: "度"
# 数据: [0.11, 47.14], 均值9.42±8.57, 偏度1.433
# 分位数: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14]
bins: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14]
labels: [very_low, low, medium, high, very_high]
aspect:
description: "坡向"
unit: "度"
# 数据: [0.86, 359.12], 均值173.47±104.96, 偏度0.126
# 分位数: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
bins: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
labels: [flat, north, east, south, west]
soil_type:
description: "土壤分类(中国土壤分类系统)"
unit: "分类代码"
mapping:
0: ultisol # 老成土
6: entisol # 初育土
11: fluvo_aquic # 潮土
18: yellow_brown # 黄棕壤
default: entisol
lithology:
description: "岩性(中国地质分类)"
unit: "分类代码"
mapping:
1: acid_rock # 酸性侵入岩(花岗岩等,SiO₂>66%)
3: basic_rock # 基性侵入岩(辉长岩等,SiO₂ 45-52%)
4: basic_rock # 基性火山岩(玄武岩等,合并入基性岩)
5: carbonate # 碳酸盐岩(石灰岩、白云岩)
10: metamorphic # 变质岩(片麻岩、大理岩)
11: mixed_clastic # 混合碎屑沉积岩(砂岩+泥岩互层)
13: terrigenous # 陆源碎屑岩(砂岩、粉砂岩)
14: unconsolidated # 松散堆积物(黄土、冲洪积)
default: unconsolidated
landuse:
description: "土地利用类型"
unit: "分类代码"
mapping:
10: forest # 林地
30: farmland # 农田
40: urban # 城市
50: water # 水域
60: barren # 裸地
80: farmland # 耕地(合并入农田)
default: farmland
terrain:
description: "地形分类(中国地形分类体系)"
unit: "分类代码"
mapping:
1: mountain # 断裂山麓地带(秦岭北麓)
2: plain # 平坦平原(渭河平原)
3: deep_valley # 高山深峡谷(秦岭腹地)
4: hill # 丘陵(黄土塬)
5: gentle_hill # 低缓丘陵(塬边过渡带)
6: low_mountain # 低山(骊山等)
7: flat_plain # 平缓平原(冲积平原)
default: hill
impervious:
description: "不透水率"
unit: "百分比"
# 数据: [0.0, 97.2], 均值16.40±25.99, 偏度1.787
# 26.9%为0.0(无硬化地表),非零值右偏分布
# 分箱策略:0单独一类,其余4等分(分位数分箱)
# 分位数(非零): [2.0, 9.95, 31.8, 97.2]
bins: [0.0, 0.01, 2.0, 10.0, 32.0, 97.2]
labels: [none, very_low, low, medium, high]
ndvi:
description: "植被指数"
unit: "NDVI值"
# 数据: [1.25, 38.68], 均值20.67±5.87, 偏度-0.106
# 分位数: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68]
bins: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68]
labels: [very_low, low, medium, high, very_high]
sand_content:
description: "土壤含沙量"
unit: "百分比"
# 数据: [23.0, 52.0], 均值34.43±4.29, 偏度0.538
# 分位数: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0]
bins: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0]
labels: [very_low, low, medium, high, very_high]
ph:
description: "土壤PH值"
unit: "PH值"
# 数据: [59.0, 81.0], 均值71.79±4.14, 偏度-0.398
# 分位数: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0]
bins: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0]
labels: [very_low, low, medium, high, very_high]
soil_moisture:
description: "土壤湿度"
unit: "百分比"
# 数据: [0.0, 41.1], 均值32.02±14.92, 偏度-1.676
# 约10%为0.0(缺失/极端干燥),其余集中在37-41
# 分位数: [0.0, 37.7, 38.6, 38.9, 39.4, 41.1]
bins: [0.0, 37.0, 38.5, 39.5, 41.1]
labels: [very_low, low, medium, high]
organic_carbon:
description: "有机碳"
unit: "百分比"
# 数据: [0.0, 73.0], 均值38.36±19.14, 偏度-1.187
# 分位数: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0]
bins: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0]
labels: [very_low, low, medium, high, very_high]
dist_to_river:
description: "距离河道距离"
unit: "米"
# 数据: [12.21, 29904.99], 均值11003.92±6582.23, 偏度0.271
# 分位数: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99]
bins: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99]
labels: [very_close, close, moderate, far, very_far]
dist_to_fault:
description: "距离断裂带距离"
unit: "米"
# 数据: [1.74, 14542.53], 均值3448.52±3406.56, 偏度1.055
# 分位数: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53]
bins: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53]
labels: [very_close, close, moderate, far, very_far]
pipe_density:
description: "供水管网密度"
unit: "m/m²"
# 数据: [0.0, 0.07], 约80%为0.090%分位数0.01395%分位数0.023
# 分位数: [0.0, 0.013, 0.023, 0.065]
bins: [0.0, 0.013, 0.023, 0.065]
labels: [none, low, medium, high]