优化参数

This commit is contained in:
wzy-warehouse
2026-06-06 11:10:22 +08:00
parent eddbdaca1f
commit 9c3b0575d2
5 changed files with 212 additions and 196 deletions
+82 -68
View File
@@ -1,14 +1,18 @@
# 离散化规则配置
# 定义所有连续因子的分箱规则
# 包含暴雨灾害链和地震灾害链的全部因子
#
# 2026-06-06: 基于1201个样本的实际数据分布,采用分位数分箱(等频分箱)
# 替代原有等宽分箱,使每个区间样本量更均匀
# ============================================
# 暴雨触发层离散化规则
# 暴雨触发层离散化规则(保持气象标准不变)
# ============================================
rain_intensity:
description: "降雨强度等级"
unit: "mm/h"
# 中国气象局降雨等级标准
bins: [0, 0.2, 5, 15, 30, 70, 140, 99999]
labels: [no_rain, light, moderate, heavy, storm, downpour, extreme]
@@ -21,6 +25,7 @@ duration:
accum_rain:
description: "累计降雨量"
unit: "mm"
# 中国气象局降水等级标准
bins: [0, 10, 25, 50, 100, 99999]
labels: [trace, light, moderate, heavy, extreme]
@@ -32,17 +37,13 @@ magnitude:
description: "地震震级"
unit: "Richter"
# 基于Keefer (1984) 地震触发地质灾害的震级阈值
# M<4.0: 无显著地质灾害; M4.0-4.9: 轻微; M5.0-5.9: 中等
# M6.0-6.9: 显著; M7.0-7.9: 严重; M≥8.0: 灾难性
bins: [0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0]
labels: [minor, light, moderate, strong, major, great]
epicenter_distance:
description: "震中距"
unit: "km"
# 地震地质灾害影响范围(Keefer 1984)
# M5.0: ~10km; M6.0: ~30km; M7.0: ~100km; M8.0: ~300km
# 分级取各震级影响范围的中位值
# 地震地质灾害影响范围(Keefer 1984)
bins: [0, 30, 100, 300, 99999]
labels: [very_near, near, moderate, far]
@@ -50,37 +51,41 @@ seismic_intensity:
description: "地震烈度"
unit: "中国烈度表"
# GB 18306-2015 中国地震动参数区划图
# I-V: 无显著地质灾害; VI-VII: 轻微; VIII-IX: 显著; X-XII: 严重; >XII: 灾难性
# 输入为数值烈度(1-12),映射到离散等级
bins: [0, 5, 7, 9, 12, 99]
labels: [minor, light, moderate, severe, extreme]
# ============================================
# 环境层离散化规则(暴雨/地震共享)
# 基于1201个样本的分位数分箱
# ============================================
elevation:
description: "高程"
unit: "m"
bins: [0, 400, 500, 700, 1000, 1500, 99999]
labels: [basin, plain_urban, transition, low_mountain, mid_mountain, high_mountain]
# 数据: [356, 1934], 均值764.3±317.89, 偏度0.973
# 分位数: [356, 470, 624, 792, 1016, 1934]
bins: [356, 470, 624, 792, 1016, 1934]
labels: [very_low, low, medium, high, very_high]
slope:
description: "坡度"
unit: "度"
bins: [0, 5, 15, 25, 35, 45, 90]
labels: [flat, gentle, moderate, steep, very_steep, extreme_steep]
# 数据: [0.11, 47.14], 均值9.42±8.57, 偏度1.433
# 分位数: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14]
bins: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14]
labels: [very_low, low, medium, high, very_high]
aspect:
description: "坡向"
unit: "度"
bins: [0, 45, 135, 225, 315, 360]
labels: [north, east, south, west, north_loop]
# 数据: [0.86, 359.12], 均值173.47±104.96, 偏度0.126
# 分位数: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
bins: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12]
labels: [flat, north, east, south, west]
soil_type:
description: "土壤分类(中国土壤分类系统)"
unit: "分类代码"
# 数据库实际编码(xian_soil 表 value 字段,来源:pg_description
mapping:
0: ultisol # 老成土
6: entisol # 初育土
@@ -91,104 +96,113 @@ soil_type:
lithology:
description: "岩性(中国地质分类)"
unit: "分类代码"
# 数据库实际编码(xian_lithology 表 value 字段,来源:pg_description
# 工程地质分组:按 SiO₂ 含量 + 成因合并同类岩性
mapping:
1: acid_rock # 酸性侵入岩(花岗岩等,SiO₂>66%100条
3: basic_rock # 基性侵入岩(辉长岩等,SiO₂ 45-52%5条
4: basic_rock # 基性火山岩(玄武岩等,合并入基性岩17条
5: carbonate # 碳酸盐岩(石灰岩、白云岩142条
10: metamorphic # 变质岩(片麻岩、大理岩156条
11: mixed_clastic # 混合碎屑沉积岩(砂岩+泥岩互层35条
13: terrigenous # 陆源碎屑岩(砂岩、粉砂岩180条
14: unconsolidated # 松散堆积物(黄土、冲洪积566条
1: acid_rock # 酸性侵入岩(花岗岩等,SiO₂>66%)
3: basic_rock # 基性侵入岩(辉长岩等,SiO₂ 45-52%)
4: basic_rock # 基性火山岩(玄武岩等,合并入基性岩)
5: carbonate # 碳酸盐岩(石灰岩、白云岩)
10: metamorphic # 变质岩(片麻岩、大理岩)
11: mixed_clastic # 混合碎屑沉积岩(砂岩+泥岩互层)
13: terrigenous # 陆源碎屑岩(砂岩、粉砂岩)
14: unconsolidated # 松散堆积物(黄土、冲洪积)
default: unconsolidated
landuse:
description: "土地利用类型"
unit: "分类代码"
# 数据库实际编码(GLC FCS30 分类体系)
mapping:
10: forest # 林地377条)
30: farmland # 农田190条)
40: urban # 城市105条)
50: water # 水域505条)
60: barren # 裸地23条)
80: farmland # 耕地(1条,合并入农田)
10: forest # 林地
30: farmland # 农田
40: urban # 城市
50: water # 水域
60: barren # 裸地
80: farmland # 耕地(合并入农田)
default: farmland
terrain:
description: "地形分类(中国地形分类体系)"
unit: "分类代码"
# 数据库实际编码(xian_landform 表 value 字段,来源:pg_description
# 工程地质分组:按坡度 + 地貌特征合并
mapping:
1: mountain # 断裂山麓地带(秦岭北麓276条
2: plain # 平坦平原(渭河平原218条
3: deep_valley # 高山深峡谷(秦岭腹地11条
4: hill # 丘陵(黄土塬250条
5: gentle_hill # 低缓丘陵(塬边过渡带86条
6: low_mountain # 低山(骊山等261条
7: flat_plain # 平缓平原(冲积平原99条
1: mountain # 断裂山麓地带(秦岭北麓)
2: plain # 平坦平原(渭河平原)
3: deep_valley # 高山深峡谷(秦岭腹地)
4: hill # 丘陵(黄土塬)
5: gentle_hill # 低缓丘陵(塬边过渡带)
6: low_mountain # 低山(骊山等)
7: flat_plain # 平缓平原(冲积平原)
default: hill
impervious:
description: "不透水"
unit: "比"
bins: [0, 0.3, 0.6, 1.0]
labels: [low, medium, high]
description: "不透水"
unit: "百分比"
# 数据: [0.0, 97.2], 均值16.40±25.99, 偏度1.787
# 26.9%为0.0(无硬化地表),非零值右偏分布
# 分箱策略:0单独一类,其余4等分(分位数分箱)
# 分位数(非零): [2.0, 9.95, 31.8, 97.2]
bins: [0.0, 0.01, 2.0, 10.0, 32.0, 97.2]
labels: [none, very_low, low, medium, high]
ndvi:
description: "植被指数"
unit: "NDVI值"
bins: [-1, 0, 0.1, 0.3, 0.5, 0.8, 1.0]
labels: [water, bare, sparse, moderate, dense, very_dense]
# 数据: [1.25, 38.68], 均值20.67±5.87, 偏度-0.106
# 分位数: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68]
bins: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68]
labels: [very_low, low, medium, high, very_high]
sand_content:
description: "土壤含沙量"
unit: "百分比"
bins: [0, 20, 40, 100]
labels: [low, medium, high]
# 数据: [23.0, 52.0], 均值34.43±4.29, 偏度0.538
# 分位数: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0]
bins: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0]
labels: [very_low, low, medium, high, very_high]
ph:
description: "土壤PH值"
unit: "PH值"
bins: [0, 6.5, 7.5, 14]
labels: [acidic, neutral, alkaline]
# 数据: [59.0, 81.0], 均值71.79±4.14, 偏度-0.398
# 分位数: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0]
bins: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0]
labels: [very_low, low, medium, high, very_high]
soil_moisture:
description: "土壤湿度"
unit: "百分比"
bins: [0, 20, 40, 80, 100]
labels: [dry, moist, wet, saturated]
# 数据: [0.0, 41.1], 均值32.02±14.92, 偏度-1.676
# 约10%为0.0(缺失/极端干燥),其余集中在37-41
# 分位数: [0.0, 37.7, 38.6, 38.9, 39.4, 41.1]
bins: [0.0, 37.0, 38.5, 39.5, 41.1]
labels: [very_low, low, medium, high]
organic_carbon:
description: "有机碳"
unit: "百分比"
bins: [0, 1, 2, 100]
labels: [low, medium, high]
# 数据: [0.0, 73.0], 均值38.36±19.14, 偏度-1.187
# 分位数: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0]
bins: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0]
labels: [very_low, low, medium, high, very_high]
dist_to_river:
description: "距离河道距离"
unit: "米"
bins: [0, 50, 200, 500, 99999]
labels: [very_close, close, moderate, far]
# 数据: [12.21, 29904.99], 均值11003.92±6582.23, 偏度0.271
# 分位数: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99]
bins: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99]
labels: [very_close, close, moderate, far, very_far]
dist_to_fault:
description: "距离断裂带距离"
unit: "米"
bins: [0, 500, 1500, 3000, 99999]
labels: [very_close, close, moderate, far]
# 数据: [1.74, 14542.53], 均值3448.52±3406.56, 偏度1.055
# 分位数: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53]
bins: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53]
labels: [very_close, close, moderate, far, very_far]
pipe_density:
description: "供水管网密度"
unit: "m/m²"
# 默认规则
default:
bins: [0, 0.001, 0.01, 0.05, 99999]
labels: [none, low, medium, high]
# 区域覆盖规则
region_overrides:
610100: # 西安市
bins: [0, 0.002, 0.015, 0.04, 99999]
labels: [none, low, medium, high]
# 数据: [0.0, 0.07], 约80%为0.090%分位数0.01395%分位数0.023
# 分位数: [0.0, 0.013, 0.023, 0.065]
bins: [0.0, 0.013, 0.023, 0.065]
labels: [none, low, medium, high]