# 离散化规则配置 # 定义所有连续因子的分箱规则 # 包含暴雨灾害链和地震灾害链的全部因子 # # 2026-06-06: 基于1201个样本的实际数据分布,采用分位数分箱(等频分箱) # 替代原有等宽分箱,使每个区间样本量更均匀 # ============================================ # 暴雨触发层离散化规则(保持气象标准不变) # ============================================ rain_intensity: description: "降雨强度等级" unit: "mm/h" # 中国气象局降雨等级标准 bins: [0, 0.2, 5, 15, 30, 70, 140, 99999] labels: [no_rain, light, moderate, heavy, storm, downpour, extreme] duration: description: "持续时间" unit: "h" bins: [1, 3, 12, 99999] labels: [short, medium, long] accum_rain: description: "累计降雨量" unit: "mm" # 中国气象局降水等级标准 bins: [0, 10, 25, 50, 100, 99999] labels: [trace, light, moderate, heavy, extreme] # ============================================ # 地震触发层离散化规则 # ============================================ magnitude: description: "地震震级" unit: "Richter" # 基于Keefer (1984) 地震触发地质灾害的震级阈值 bins: [0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0] labels: [minor, light, moderate, strong, major, great] epicenter_distance: description: "震中距" unit: "km" # 地震地质灾害影响范围(Keefer 1984) bins: [0, 30, 100, 300, 99999] labels: [very_near, near, moderate, far] seismic_intensity: description: "地震烈度" unit: "中国烈度表" # GB 18306-2015 中国地震动参数区划图 bins: [0, 5, 7, 9, 12, 99] labels: [minor, light, moderate, severe, extreme] # ============================================ # 环境层离散化规则(暴雨/地震共享) # 基于1201个样本的分位数分箱 # ============================================ elevation: description: "高程" unit: "m" # 数据: [356, 1934], 均值764.3±317.89, 偏度0.973 # 分位数: [356, 470, 624, 792, 1016, 1934] bins: [356, 470, 624, 792, 1016, 1934] labels: [very_low, low, medium, high, very_high] slope: description: "坡度" unit: "度" # 数据: [0.11, 47.14], 均值9.42±8.57, 偏度1.433 # 分位数: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14] bins: [0.11, 1.81, 5.43, 9.48, 15.13, 47.14] labels: [very_low, low, medium, high, very_high] aspect: description: "坡向" unit: "度" # 数据: [0.86, 359.12], 均值173.47±104.96, 偏度0.126 # 分位数: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12] bins: [0.86, 57.39, 135.0, 230.19, 297.83, 359.12] labels: [flat, north, east, south, west] soil_type: description: "土壤分类(中国土壤分类系统)" unit: "分类代码" mapping: 0: ultisol # 老成土 6: entisol # 初育土 11: fluvo_aquic # 潮土 18: yellow_brown # 黄棕壤 default: entisol lithology: description: "岩性(中国地质分类)" unit: "分类代码" mapping: 1: acid_rock # 酸性侵入岩(花岗岩等,SiO₂>66%) 3: basic_rock # 基性侵入岩(辉长岩等,SiO₂ 45-52%) 4: basic_rock # 基性火山岩(玄武岩等,合并入基性岩) 5: carbonate # 碳酸盐岩(石灰岩、白云岩) 10: metamorphic # 变质岩(片麻岩、大理岩) 11: mixed_clastic # 混合碎屑沉积岩(砂岩+泥岩互层) 13: terrigenous # 陆源碎屑岩(砂岩、粉砂岩) 14: unconsolidated # 松散堆积物(黄土、冲洪积) default: unconsolidated landuse: description: "土地利用类型" unit: "分类代码" mapping: 10: forest # 林地 30: farmland # 农田 40: urban # 城市 50: water # 水域 60: barren # 裸地 80: farmland # 耕地(合并入农田) default: farmland terrain: description: "地形分类(中国地形分类体系)" unit: "分类代码" mapping: 1: mountain # 断裂山麓地带(秦岭北麓) 2: plain # 平坦平原(渭河平原) 3: deep_valley # 高山深峡谷(秦岭腹地) 4: hill # 丘陵(黄土塬) 5: gentle_hill # 低缓丘陵(塬边过渡带) 6: low_mountain # 低山(骊山等) 7: flat_plain # 平缓平原(冲积平原) default: hill impervious: description: "不透水率" unit: "百分比" # 数据: [0.0, 97.2], 均值16.40±25.99, 偏度1.787 # 26.9%为0.0(无硬化地表),非零值右偏分布 # 分箱策略:0单独一类,其余4等分(分位数分箱) # 分位数(非零): [2.0, 9.95, 31.8, 97.2] bins: [0.0, 0.01, 2.0, 10.0, 32.0, 97.2] labels: [none, very_low, low, medium, high] ndvi: description: "植被指数" unit: "NDVI值" # 数据: [1.25, 38.68], 均值20.67±5.87, 偏度-0.106 # 分位数: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68] bins: [1.25, 17.09, 20.3, 22.4, 25.2, 38.68] labels: [very_low, low, medium, high, very_high] sand_content: description: "土壤含沙量" unit: "百分比" # 数据: [23.0, 52.0], 均值34.43±4.29, 偏度0.538 # 分位数: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0] bins: [23.0, 31.0, 33.0, 35.0, 37.0, 52.0] labels: [very_low, low, medium, high, very_high] ph: description: "土壤PH值" unit: "PH值" # 数据: [59.0, 81.0], 均值71.79±4.14, 偏度-0.398 # 分位数: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0] bins: [59.0, 68.0, 72.0, 74.0, 76.0, 81.0] labels: [very_low, low, medium, high, very_high] soil_moisture: description: "土壤湿度" unit: "百分比" # 数据: [0.0, 41.1], 均值32.02±14.92, 偏度-1.676 # 约10%为0.0(缺失/极端干燥),其余集中在37-41 # 分位数: [0.0, 37.7, 38.6, 38.9, 39.4, 41.1] bins: [0.0, 37.0, 38.5, 39.5, 41.1] labels: [very_low, low, medium, high] organic_carbon: description: "有机碳" unit: "百分比" # 数据: [0.0, 73.0], 均值38.36±19.14, 偏度-1.187 # 分位数: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0] bins: [0.0, 34.0, 41.0, 47.0, 53.0, 73.0] labels: [very_low, low, medium, high, very_high] dist_to_river: description: "距离河道距离" unit: "米" # 数据: [12.21, 29904.99], 均值11003.92±6582.23, 偏度0.271 # 分位数: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99] bins: [12.21, 5165.0, 9003.0, 12424.97, 16431.82, 29904.99] labels: [very_close, close, moderate, far, very_far] dist_to_fault: description: "距离断裂带距离" unit: "米" # 数据: [1.74, 14542.53], 均值3448.52±3406.56, 偏度1.055 # 分位数: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53] bins: [1.74, 476.69, 1433.62, 3334.87, 6502.28, 14542.53] labels: [very_close, close, moderate, far, very_far] pipe_density: description: "供水管网密度" unit: "m/m²" # 数据: [0.0, 0.07], 约80%为0.0,90%分位数0.013,95%分位数0.023 # 分位数: [0.0, 0.013, 0.023, 0.065] bins: [0.0, 0.013, 0.023, 0.065] labels: [none, low, medium, high]