Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- README.md +555 -9
- __pycache__/feature_calculator.cpython-313.pyc +0 -0
- __pycache__/gradio_app.cpython-313.pyc +0 -0
- __pycache__/wearable_anomaly_detector.cpython-313.pyc +0 -0
- checkpoints/phase2/exp_factor_balanced/best_model.pt +3 -0
- configs/api_config.json +31 -0
- configs/detector_config.json +18 -0
- configs/features_config.json +108 -0
- configs/formatter_config.json +189 -0
- data_storage/baselines.json +31 -0
- demo_llm_inputs/case_am77_full.json +30 -0
- demo_llm_inputs/case_ba30_full.json +30 -0
- demo_llm_inputs/case_ej27_full.json +30 -0
- demo_llm_inputs/manifest.json +17 -0
- feature_calculator.py +273 -0
- processed_data/stage3/norm_params.json +146 -0
- requirements.txt +6 -0
- run_official_inference.py +122 -0
- test_data/example_window.json +291 -0
- test_quickstart.py +264 -0
- utils/__init__.py +10 -0
- utils/__pycache__/formatter.cpython-313.pyc +0 -0
- utils/api_client.py +158 -0
- utils/baseline_storage.py +348 -0
- utils/formatter.py +277 -0
- wearable_anomaly_detector.py +785 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
README.md
CHANGED
|
@@ -1,12 +1,558 @@
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
library_name: pytorch
|
| 3 |
+
pipeline_tag: time-series-forecasting
|
| 4 |
+
language:
|
| 5 |
+
- zh
|
| 6 |
+
- en
|
| 7 |
+
tags:
|
| 8 |
+
- anomaly-detection
|
| 9 |
+
- time-series
|
| 10 |
+
- wearable
|
| 11 |
+
- health
|
| 12 |
+
- lstm
|
| 13 |
+
- transformer
|
| 14 |
+
- physiological-monitoring
|
| 15 |
+
- hrv
|
| 16 |
+
- heart-rate
|
| 17 |
+
- real-time
|
| 18 |
+
- multi-user
|
| 19 |
+
- personalized
|
| 20 |
+
- sensor-fusion
|
| 21 |
+
- healthcare
|
| 22 |
+
- continuous-monitoring
|
| 23 |
+
license: apache-2.0
|
| 24 |
+
pretty_name: Wearable TimeSeries Health Monitor
|
| 25 |
---
|
| 26 |
|
| 27 |
+
<div align="center">
|
| 28 |
+
|
| 29 |
+
**Language / 语言**: [中文](#中文版本) | [English](#english-version)
|
| 30 |
+
|
| 31 |
+
</div>
|
| 32 |
+
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
<a id="中文版本"></a>
|
| 36 |
+
# Wearable_TimeSeries_Health_Monitor
|
| 37 |
+
|
| 38 |
+
面向可穿戴设备的多用户健康监控方案:一份模型、一个配置,就能为不同用户构建个性化异常检测。模型基于 **Phased LSTM + Temporal Fusion Transformer (TFT)**,并整合自适应基线、因子特征以及单位秒级的数据滑窗能力,适合当作 HuggingFace 模型或企业内部服务快速接入。
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
|
| 42 |
+
## 🌟 模型应用亮点
|
| 43 |
+
|
| 44 |
+
| 能力 | 说明 |
|
| 45 |
+
| --- | --- |
|
| 46 |
+
| **即插即用** | 内置 `WearableAnomalyDetector` 封装,加载模型即可预测,一次初始化后可持续监控多个用户 |
|
| 47 |
+
| **配置驱动特征** | `configs/features_config.json` 描述所有特征、缺省值、类别映射,新增/删减血氧、呼吸率等只需改配置 |
|
| 48 |
+
| **多用户实时服务** | `FeatureCalculator` + 轻量级 `data_storage` 缓存,实现用户历史管理、基线演化、批量推理 |
|
| 49 |
+
| **真实数据验证** | README 内置“真实数据测试”操作说明,可一键模拟正常/异常用户、基线更新与多天模式检测 |
|
| 50 |
+
| **自适应基线支持** | 可扩展 `UserDataManager` 将个人/分组基线接入推理流程,持续改善个体敏感度 |
|
| 51 |
+
|
| 52 |
+
---
|
| 53 |
+
|
| 54 |
+
## ⚡ 核心特点与技术优势
|
| 55 |
+
|
| 56 |
+
### 🎯 自适应基线:个人与群体智能融合
|
| 57 |
+
|
| 58 |
+
模型采用**自适应基线策略**,根据用户历史数据量动态选择最优基线:
|
| 59 |
+
|
| 60 |
+
- **个人基线优先**:当用户有足够历史数据(如 ≥7 天)时,使用个人 HRV 均值/标准差作为基线,捕捉个体生理节律差异
|
| 61 |
+
- **群体基线兜底**:新用户或数据稀疏时,自动切换到群体统计基线,确保冷启动也能稳定检测
|
| 62 |
+
- **平滑过渡机制**:通过加权混合(如 `final_mean = α × personal_mean + (1-α) × group_mean`)实现从群体到个人的渐进式适应
|
| 63 |
+
- **实时基线更新**:推理过程中持续累积用户数据,基线随用户状态演化而动态调整,提升长期监控精度
|
| 64 |
+
|
| 65 |
+
**优势**:相比固定阈值或纯群体基线,自适应基线能同时兼顾**个性化敏感度**(减少误报)和**冷启动鲁棒性**(新用户可用),特别适合多用户、长周期监控场景。
|
| 66 |
+
|
| 67 |
+
### ⏱️ 灵活的时间窗口与周期
|
| 68 |
+
|
| 69 |
+
- **5 分钟级粒度**:每条数据点代表 5 分钟聚合,支持秒级到小时级的灵活时间尺度
|
| 70 |
+
- **可配置窗口大小**:默认 12 点(1 小时),可根据业务需求调整为 6 点(30 分钟)或 24 点(2 小时)
|
| 71 |
+
- **不等间隔容错**:Phased LSTM 架构天然处理缺失数据点,即使数据稀疏(如夜间传感器断开)也能稳定推理
|
| 72 |
+
- **多时间尺度特征**:同时提取短期波动(RMSSD)、中期趋势(滑动均值)和长期模式(日/周周期),捕捉不同时间尺度的异常信号
|
| 73 |
+
|
| 74 |
+
**优势**:适应不同设备采样频率、用户佩戴习惯,无需强制对齐时间戳,降低数据预处理复杂度。
|
| 75 |
+
|
| 76 |
+
### 🔄 多通道数据协同作用
|
| 77 |
+
|
| 78 |
+
模型整合**4 大类特征通道**,通过因子特征与注意力机制实现跨通道信息融合:
|
| 79 |
+
|
| 80 |
+
1. **生理通道**(HR、HRV 系列、呼吸率、血氧)
|
| 81 |
+
- 直接反映心血管与呼吸系统状态
|
| 82 |
+
- 因子特征:`physiological_mean`, `physiological_std`, `physiological_max`, `physiological_min`
|
| 83 |
+
|
| 84 |
+
2. **活动通道**(步数、距离、能量消耗、加速度、陀螺仪)
|
| 85 |
+
- 捕捉运动强度与身体负荷
|
| 86 |
+
- 因子特征:`activity_mean`, `activity_std` 等
|
| 87 |
+
|
| 88 |
+
3. **环境通道**(光线、时间周期、数据质量)
|
| 89 |
+
- 提供上下文信息,区分运动性心率升高 vs 静息异常
|
| 90 |
+
- 类别特征:`time_period_primary`(morning/day/evening/night)
|
| 91 |
+
|
| 92 |
+
4. **基线通道**(自适应基线均值/标准差、偏差特征)
|
| 93 |
+
- 提供个性化参考基准,计算 `hrv_deviation_abs`, `hrv_z_score` 等相对异常指标
|
| 94 |
+
|
| 95 |
+
**协同机制**:
|
| 96 |
+
- **因子特征聚合**:将同类通道的统计量(均值/标准差/最值)作为高层特征,让模型学习通道间的关联模式
|
| 97 |
+
- **TFT 注意力**:Temporal Fusion Transformer 的变量选择网络自动识别哪些通道在特定时间点最重要
|
| 98 |
+
- **已知未来特征**:时间特征(小时、星期、是否周末)帮��模型理解周期性,区分正常波动与异常
|
| 99 |
+
|
| 100 |
+
**优势**:多通道协同能显著降低**单一指标误报**(如运动导致心率升高),提升**异常检测的上下文感知能力**,特别适合可穿戴设备的多传感器融合场景。
|
| 101 |
+
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
## 📊 核心指标(短期窗口)
|
| 105 |
+
|
| 106 |
+
- **F1**: 0.2819
|
| 107 |
+
- **Precision**: 0.1769
|
| 108 |
+
- **Recall**: 0.6941
|
| 109 |
+
- **最佳阈值**: 0.53
|
| 110 |
+
- **窗口定义**: 12 条 5 分钟数据(1小时时间窗,预测未来 0.5 小时)
|
| 111 |
+
|
| 112 |
+
> 模型偏向召回,适合“异常先提醒、人机协同复核”的场景。可通过阈值/采样策略调节精度与召回。
|
| 113 |
+
|
| 114 |
+
---
|
| 115 |
+
|
| 116 |
+
## 🚀 快速体验
|
| 117 |
+
|
| 118 |
+
### 1. 克隆或下载模型仓库
|
| 119 |
+
|
| 120 |
+
```bash
|
| 121 |
+
git clone https://huggingface.co/oscarzhang/Wearable_TimeSeries_Health_Monitor
|
| 122 |
+
cd Wearable_TimeSeries_Health_Monitor
|
| 123 |
+
pip install -r requirements.txt
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
### 2. 在业务代码中调用
|
| 127 |
+
|
| 128 |
+
```python
|
| 129 |
+
from wearable_anomaly_detector import WearableAnomalyDetector
|
| 130 |
+
|
| 131 |
+
detector = WearableAnomalyDetector(
|
| 132 |
+
model_dir="checkpoints/phase2/exp_factor_balanced",
|
| 133 |
+
threshold=0.53,
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
result = detector.predict(data_points, return_score=True, return_details=True)
|
| 137 |
+
print(result)
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
> `data_points` 为 12 条最新的 5 分钟记录;若缺静态特征/设备信息,系统会自动从配置/缓存补齐。
|
| 141 |
+
|
| 142 |
+
### 3. 快速体验真实数据模拟
|
| 143 |
+
|
| 144 |
+
```python
|
| 145 |
+
from datetime import datetime, timedelta
|
| 146 |
+
from wearable_anomaly_detector import WearableAnomalyDetector
|
| 147 |
+
|
| 148 |
+
detector = WearableAnomalyDetector("checkpoints/phase2/exp_factor_balanced", device="cpu")
|
| 149 |
+
|
| 150 |
+
def make_point(ts, hrv, hr):
|
| 151 |
+
return {
|
| 152 |
+
"timestamp": ts.isoformat(),
|
| 153 |
+
"deviceId": "demo_user",
|
| 154 |
+
"features": {
|
| 155 |
+
"hr": hr,
|
| 156 |
+
"hr_resting": 65,
|
| 157 |
+
"hrv_rmssd": hrv,
|
| 158 |
+
"time_period_primary": "day",
|
| 159 |
+
"data_quality": "high",
|
| 160 |
+
"baseline_hrv_mean": 75.0,
|
| 161 |
+
"baseline_hrv_std": 5.0
|
| 162 |
+
},
|
| 163 |
+
"static_features": {
|
| 164 |
+
"age_group": 2,
|
| 165 |
+
"sex": 0,
|
| 166 |
+
"exercise": 1
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
start = datetime.now() - timedelta(hours=1)
|
| 171 |
+
window = [make_point(start + timedelta(minutes=5*i), 75 - i*0.5, 70 + i*0.2) for i in range(12)]
|
| 172 |
+
print(detector.detect_realtime(window))
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
以上脚本会自动构造 12 条 5 分钟数据,完成一次实时检测。可自行调节 HRV、HR 或窗口大小模拟不同场景。
|
| 176 |
+
|
| 177 |
+
---
|
| 178 |
+
|
| 179 |
+
## 🧪 真实数据测试
|
| 180 |
+
|
| 181 |
+
> 以下结果来自 README 中的示例脚本(模拟正常/异常用户、基线更新、多天模式)。全部在 CPU 上完成。
|
| 182 |
+
|
| 183 |
+
| 场景 | 数据概况 | 结果 |
|
| 184 |
+
| --- | --- | --- |
|
| 185 |
+
| 实时检测(正常) | HRV≈76ms,HR≈68 bpm,12 条数据 | 异常分数 0.5393,阈值 0.53(轻微触发,模型对边缘异常敏感) |
|
| 186 |
+
| 实时检测(异常) | HRV≈69ms,HR≈74 bpm,12 条数据 | 异常分数 0.4764,未超阈值,需结合多天模式进一步观察 |
|
| 187 |
+
| 模式聚合(7 天) | 前 3 天正常,后 4 天逐渐下行 | 正确识别持续 3 天的异常模式,趋势为 stable |
|
| 188 |
+
| 基线存储/更新 | 初始基线 75±5,记录 30 条 | 存储成功;新值 70ms 后均值更新为 74.84,记录数 31 |
|
| 189 |
+
| 完整流程 | 实时检测 → 基线更新 → LLM 文本 | 全流程执行成功,生成 114 字符的结构化异常摘要 |
|
| 190 |
+
|
| 191 |
+
复制上文的“真实数据模拟”代码,按需调整 HRV/HR、窗口长度或异常强度即可复现同样的流程。
|
| 192 |
+
|
| 193 |
+
---
|
| 194 |
+
|
| 195 |
+
## 🔧 输入与输出
|
| 196 |
+
|
| 197 |
+
### 输入(单个数据点)
|
| 198 |
+
|
| 199 |
+
```python
|
| 200 |
+
{
|
| 201 |
+
"timestamp": "2024-01-01T08:00:00",
|
| 202 |
+
"deviceId": "ab60", # 可选,缺失时会自动创建匿名 ID
|
| 203 |
+
"features": {
|
| 204 |
+
"hr": 72.0,
|
| 205 |
+
"hrv_rmssd": 30.0,
|
| 206 |
+
"time_period_primary": "morning",
|
| 207 |
+
"data_quality": "high",
|
| 208 |
+
...
|
| 209 |
+
}
|
| 210 |
+
}
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
- 每个窗口需 12 条数据(默认 1 小时)
|
| 214 |
+
- 特征是否必填由 `configs/features_config.json` 控制
|
| 215 |
+
- 缺失值会自动回落到 default 或 category_mapping 定义值
|
| 216 |
+
|
| 217 |
+
### 输出
|
| 218 |
+
|
| 219 |
+
```python
|
| 220 |
+
{
|
| 221 |
+
"is_anomaly": True,
|
| 222 |
+
"anomaly_score": 0.5760,
|
| 223 |
+
"threshold": 0.5300,
|
| 224 |
+
"details": {
|
| 225 |
+
"window_size": 12,
|
| 226 |
+
"model_output": 0.5760,
|
| 227 |
+
"prediction_confidence": 0.0460
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
---
|
| 233 |
+
|
| 234 |
+
## 🧱 模型架构与训练
|
| 235 |
+
|
| 236 |
+
- **模型骨干**:Phased LSTM 处理不等间隔序列 + Temporal Fusion Transformer 聚合时间上下文
|
| 237 |
+
- **异常检测头**:增强注意力、多层 MLP、可选对比学习/类型辅助头
|
| 238 |
+
- **特征体系**:
|
| 239 |
+
- 生理:HR、HRV(RMSSD/SDNN/PNN50…)
|
| 240 |
+
- 活动:步数、距离、能量消耗、加速度、陀螺仪
|
| 241 |
+
- 环境:光线、昼夜标签、数据质量
|
| 242 |
+
- 基线:自适应基线均值/标准差 + 偏差特征
|
| 243 |
+
- **标签来源**:问卷高置信度标签 + 自适应基线低置信度标签
|
| 244 |
+
- **训练流程**:Stage1/2/3 数据加工 ➜ Phase1 自监督预训练 ➜ Phase2 监督微调 ➜ 阈值/案例校正
|
| 245 |
+
|
| 246 |
+
---
|
| 247 |
+
|
| 248 |
+
## 📦 仓库结构(部分)
|
| 249 |
+
|
| 250 |
+
```
|
| 251 |
+
├─ configs/
|
| 252 |
+
│ └─ features_config.json # 特征定义 & 归一化策��
|
| 253 |
+
├─ wearable_anomaly_detector.py # 核心封装:加载、预测、批处理
|
| 254 |
+
├─ feature_calculator.py # 配置驱动的特征构建 + 用户历史缓存
|
| 255 |
+
└─ checkpoints/phase2/... # 模型权重 & summary
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
---
|
| 259 |
+
|
| 260 |
+
## 📚 数据来源与许可证
|
| 261 |
+
|
| 262 |
+
- 训练数据基于 **“A continuous real-world dataset comprising wearable-based heart rate variability alongside sleep diaries”**(Baigutanova *et al.*, Scientific Data, 2025)以及其 Figshare 数据集 [doi:10.1038/s41597-025-05801-3](https://www.nature.com/articles/s41597-025-05801-3) / [dataset link](https://springernature.figshare.com/articles/dataset/In-situ_wearable-based_dataset_of_continuous_heart_rate_variability_monitoring_accompanied_by_sleep_diaries/28509740)。
|
| 263 |
+
- 该数据集以 **Creative Commons Attribution 4.0 (CC BY 4.0)** 许可发布,可自由使用、修改、分发,但必须保留署名并附上许可证链接。
|
| 264 |
+
- 本仓库沿用 CC BY 4.0 对原始数据的要求;若你在此基础上再加工或发布,请继续保留上述署名与许可证说明。
|
| 265 |
+
- 代码/模型可根据需要使用 MIT/Apache 等许可证,但凡涉及数据的部分,仍需遵循 CC BY 4.0。
|
| 266 |
+
|
| 267 |
+
---
|
| 268 |
+
|
| 269 |
+
## 🤝 贡献与扩展
|
| 270 |
+
|
| 271 |
+
欢迎:
|
| 272 |
+
1. 新增特征或数据源 ⇒ 更新 `features_config.json` + 提交 PR
|
| 273 |
+
2. 接入新的用户数据管理/基线策略 ⇒ 扩展 `FeatureCalculator` 或贡献 `UserDataManager`
|
| 274 |
+
3. 反馈案例或真实部署经验 ⇒ 提 Issue 或 Discussion
|
| 275 |
+
|
| 276 |
+
---
|
| 277 |
+
|
| 278 |
+
## 📄 许可证
|
| 279 |
+
|
| 280 |
+
- **模型与代码**:Apache-2.0。可在保留版权与许可证声明的前提下任意使用/修改/分发。
|
| 281 |
+
- **训练数据**:原始可穿戴 HRV 数据集使用 CC BY 4.0,复用时请继续保留作者署名与许可信息。
|
| 282 |
+
|
| 283 |
+
---
|
| 284 |
+
|
| 285 |
+
## 🔖 引用
|
| 286 |
+
|
| 287 |
+
```bibtex
|
| 288 |
+
@software{Wearable_TimeSeries_Health_Monitor,
|
| 289 |
+
title = {Wearable\_TimeSeries\_Health\_Monitor},
|
| 290 |
+
author = {oscarzhang},
|
| 291 |
+
year = {2025},
|
| 292 |
+
url = {https://huggingface.co/oscarzhang/Wearable_TimeSeries_Health_Monitor}
|
| 293 |
+
}
|
| 294 |
+
```
|
| 295 |
+
|
| 296 |
+
---
|
| 297 |
+
|
| 298 |
+
<a id="english-version"></a>
|
| 299 |
+
# Wearable_TimeSeries_Health_Monitor
|
| 300 |
+
|
| 301 |
+
A multi-user health monitoring solution for wearable devices: one model, one configuration, enabling personalized anomaly detection for different users. The model is based on **Phased LSTM + Temporal Fusion Transformer (TFT)**, integrating adaptive baselines, factor features, and second-level data sliding window capabilities, suitable for deployment as a HuggingFace model or rapid integration into enterprise services.
|
| 302 |
+
|
| 303 |
+
---
|
| 304 |
+
|
| 305 |
+
## 🌟 Model Highlights
|
| 306 |
+
|
| 307 |
+
| Capability | Description |
|
| 308 |
+
| --- | --- |
|
| 309 |
+
| **Plug-and-Play** | Built-in `WearableAnomalyDetector` wrapper, load the model and start predicting, supports continuous monitoring of multiple users after a single initialization |
|
| 310 |
+
| **Configuration-Driven Features** | `configs/features_config.json` defines all features, default values, and category mappings; adding/removing features like blood oxygen or respiratory rate only requires configuration changes |
|
| 311 |
+
| **Multi-User Real-Time Service** | `FeatureCalculator` + lightweight `data_storage` cache enables user history management, baseline evolution, and batch inference |
|
| 312 |
+
| **Real-World Validation** | README ships with a “Real Data Tests” section plus sample simulation code so you can mimic normal/abnormal users in minutes |
|
| 313 |
+
| **Adaptive Baseline Support** | Extensible `UserDataManager` integrates personal/group baselines into the inference pipeline, continuously improving individual sensitivity |
|
| 314 |
+
|
| 315 |
+
---
|
| 316 |
+
|
| 317 |
+
## ⚡ Core Features & Technical Advantages
|
| 318 |
+
|
| 319 |
+
### 🎯 Adaptive Baseline: Intelligent Fusion of Personal and Group
|
| 320 |
+
|
| 321 |
+
The model employs an **adaptive baseline strategy** that dynamically selects the optimal baseline based on user historical data volume:
|
| 322 |
+
|
| 323 |
+
- **Personal Baseline Priority**: When users have sufficient historical data (e.g., ≥7 days), use personal HRV mean/std as baseline to capture individual physiological rhythm differences
|
| 324 |
+
- **Group Baseline Fallback**: For new users or sparse data, automatically switch to group statistical baseline, ensuring stable detection even during cold start
|
| 325 |
+
- **Smooth Transition Mechanism**: Achieve gradual adaptation from group to personal through weighted mixing (e.g., `final_mean = α × personal_mean + (1-α) × group_mean`)
|
| 326 |
+
- **Real-Time Baseline Updates**: Continuously accumulate user data during inference, baseline dynamically adjusts as user state evolves, improving long-term monitoring accuracy
|
| 327 |
+
|
| 328 |
+
**Advantage**: Compared to fixed thresholds or pure group baselines, adaptive baselines balance **personalized sensitivity** (reducing false positives) and **cold-start robustness** (usable for new users), especially suitable for multi-user, long-term monitoring scenarios.
|
| 329 |
+
|
| 330 |
+
### ⏱️ Flexible Time Windows & Periods
|
| 331 |
+
|
| 332 |
+
- **5-Minute Granularity**: Each data point represents 5-minute aggregation, supporting flexible time scales from seconds to hours
|
| 333 |
+
- **Configurable Window Size**: Default 12 points (1 hour), adjustable to 6 points (30 minutes) or 24 points (2 hours) based on business needs
|
| 334 |
+
- **Uneven Interval Tolerance**: Phased LSTM architecture naturally handles missing data points, stable inference even with sparse data (e.g., sensor disconnection at night)
|
| 335 |
+
- **Multi-Time-Scale Features**: Simultaneously extract short-term fluctuations (RMSSD), medium-term trends (rolling mean), and long-term patterns (daily/weekly cycles), capturing anomaly signals at different time scales
|
| 336 |
+
|
| 337 |
+
**Advantage**: Adapts to different device sampling frequencies and user wearing habits, no need to force timestamp alignment, reducing data preprocessing complexity.
|
| 338 |
+
|
| 339 |
+
### 🔄 Multi-Channel Data Synergy
|
| 340 |
+
|
| 341 |
+
The model integrates **4 major feature channels**, achieving cross-channel information fusion through factor features and attention mechanisms:
|
| 342 |
+
|
| 343 |
+
1. **Physiological Channel** (HR, HRV series, respiratory rate, blood oxygen)
|
| 344 |
+
- Directly reflects cardiovascular and respiratory system status
|
| 345 |
+
- Factor features: `physiological_mean`, `physiological_std`, `physiological_max`, `physiological_min`
|
| 346 |
+
|
| 347 |
+
2. **Activity Channel** (steps, distance, energy consumption, acceleration, gyroscope)
|
| 348 |
+
- Captures exercise intensity and body load
|
| 349 |
+
- Factor features: `activity_mean`, `activity_std`, etc.
|
| 350 |
+
|
| 351 |
+
3. **Environmental Channel** (light, time period, data quality)
|
| 352 |
+
- Provides contextual information, distinguishing exercise-induced heart rate elevation vs. resting anomalies
|
| 353 |
+
- Categorical features: `time_period_primary` (morning/day/evening/night)
|
| 354 |
+
|
| 355 |
+
4. **Baseline Channel** (adaptive baseline mean/std, deviation features)
|
| 356 |
+
- Provides personalized reference baseline, calculating relative anomaly indicators like `hrv_deviation_abs`, `hrv_z_score`
|
| 357 |
+
|
| 358 |
+
**Synergy Mechanism**:
|
| 359 |
+
- **Factor Feature Aggregation**: Use statistical measures (mean/std/max/min) of similar channels as high-level features, enabling the model to learn association patterns between channels
|
| 360 |
+
- **TFT Attention**: Temporal Fusion Transformer's variable selection network automatically identifies which channels are most important at specific time points
|
| 361 |
+
- **Known Future Features**: Time features (hour, day of week, is_weekend) help the model understand periodicity, distinguishing normal fluctuations from anomalies
|
| 362 |
+
|
| 363 |
+
**Advantage**: Multi-channel synergy significantly reduces **single-indicator false positives** (e.g., exercise-induced heart rate elevation) and improves **context-aware anomaly detection**, especially suitable for multi-sensor fusion scenarios in wearable devices.
|
| 364 |
+
|
| 365 |
+
---
|
| 366 |
+
|
| 367 |
+
## 📊 Core Metrics (Short-Term Window)
|
| 368 |
+
|
| 369 |
+
- **F1**: 0.2819
|
| 370 |
+
- **Precision**: 0.1769
|
| 371 |
+
- **Recall**: 0.6941
|
| 372 |
+
- **Optimal Threshold**: 0.53
|
| 373 |
+
- **Window Definition**: 12 data points of 5-minute intervals (1-hour time window, predicting 0.5 hours ahead)
|
| 374 |
+
|
| 375 |
+
> The model favors recall, suitable for "anomaly-first alert, human-machine collaborative review" scenarios. Precision and recall can be adjusted through threshold/sampling strategies.
|
| 376 |
+
|
| 377 |
+
---
|
| 378 |
+
|
| 379 |
+
## 🚀 Quick Start
|
| 380 |
+
|
| 381 |
+
### 1. Clone or Download the Model Repository
|
| 382 |
+
|
| 383 |
+
```bash
|
| 384 |
+
git clone https://huggingface.co/oscarzhang/Wearable_TimeSeries_Health_Monitor
|
| 385 |
+
cd Wearable_TimeSeries_Health_Monitor
|
| 386 |
+
pip install -r requirements.txt
|
| 387 |
+
```
|
| 388 |
+
|
| 389 |
+
### 2. Run the Official Inference Script
|
| 390 |
+
|
| 391 |
+
```bash
|
| 392 |
+
python run_official_inference.py \
|
| 393 |
+
--window-file test_data/example_window.json \
|
| 394 |
+
--model-dir checkpoints/phase2/exp_factor_balanced
|
| 395 |
+
```
|
| 396 |
+
|
| 397 |
+
脚本会:
|
| 398 |
+
- 读取 `test_data/example_window.json`(12 条真实格式的窗口数据)
|
| 399 |
+
- 调用 `WearableAnomalyDetector.detect_realtime`
|
| 400 |
+
- 打印完整 JSON 结果
|
| 401 |
+
- 使用 `AnomalyFormatter` 输出 LLM 可直接消费的 Markdown 文本
|
| 402 |
+
|
| 403 |
+
想测试自己的窗口,只需替换 `--window-file` 路径;该脚本不会注入随机噪声,输出与正式 API 一致。
|
| 404 |
+
|
| 405 |
+
### 3. Call in Business Code
|
| 406 |
+
|
| 407 |
+
```python
|
| 408 |
+
from wearable_anomaly_detector import WearableAnomalyDetector
|
| 409 |
+
|
| 410 |
+
detector = WearableAnomalyDetector(
|
| 411 |
+
model_dir="checkpoints/phase2/exp_factor_balanced",
|
| 412 |
+
threshold=0.53,
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
result = detector.predict(data_points, return_score=True, return_details=True)
|
| 416 |
+
print(result)
|
| 417 |
+
```
|
| 418 |
+
|
| 419 |
+
> `data_points` should be 12 latest 5-minute records; if static features/device information are missing, the system will automatically fill from configuration/cache.
|
| 420 |
+
|
| 421 |
+
### 4. Quick Simulation Script(Optional)
|
| 422 |
+
|
| 423 |
+
```bash
|
| 424 |
+
python test_quickstart.py
|
| 425 |
+
```
|
| 426 |
+
|
| 427 |
+
该脚本包含更多演示场景(随机噪声、7 天显著异常、缺失/低质量数据)。日志会先跑一遍示例文件推理,然后输出正常/异常窗口、模式聚合与容错样例。**注意**:脚本为了观察边界,会临时把阈值调至 0.50,并引入随机扰动,仅用于体验。
|
| 428 |
+
|
| 429 |
+
---
|
| 430 |
+
|
| 431 |
+
## 🧪 Real Data Tests
|
| 432 |
+
|
| 433 |
+
> The following results were reproduced with the sample code above (normal vs. abnormal users, multi-day trend, baseline update, end-to-end workflow). All tests ran on CPU; the first scenario直接加载 `test_data/example_window.json`.
|
| 434 |
+
|
| 435 |
+
| Scenario | Data Snapshot | Outcome |
|
| 436 |
+
| --- | --- | --- |
|
| 437 |
+
| Real-time (sample file) | HRV≈72 ms, HR≈71 bpm, 12 points | Score ≈0.526 vs. threshold 0.50(演示用阈值) |
|
| 438 |
+
| Real-time (normal) | HRV≈76 ms, HR≈68 bpm, 12 points | Score 0.5393 vs. threshold 0.53 (marginal trigger) |
|
| 439 |
+
| Real-time (abnormal) | HRV≈69 ms, HR≈74 bpm | Score 0.4764 < threshold, requires multi-day confirmation |
|
| 440 |
+
| Pattern aggregation | 7 days, last 3 days gradually down | Detected 3-day continuous anomaly, trend `stable` |
|
| 441 |
+
| Baseline storage/update | Start 75 ± 5, 30 records | After new value 70 ms ⇒ mean 74.84, records 31 |
|
| 442 |
+
| Missing data tolerance | 40% features removed + static info missing | Still flags anomaly (score ≈0.50) thanks to fallback defaults |
|
| 443 |
+
| Full workflow | Detect → Baseline update → LLM text | Completed successfully; 114-char structured summary |
|
| 444 |
+
|
| 445 |
+
Feel free to adapt `test_data/example_window.json` 或脚本内的模拟逻辑,调整 HRV/HR 曲线、窗口大小或缺失比例,观察输出变化。
|
| 446 |
+
|
| 447 |
+
---
|
| 448 |
+
|
| 449 |
+
> Quickstart 脚本默认把阈值临时调至 0.50,以便观测边界场景。实际部署时可根据业务重新设置。
|
| 450 |
+
|
| 451 |
+
## 🔧 Input & Output
|
| 452 |
+
|
| 453 |
+
### Input (Single Data Point)
|
| 454 |
+
|
| 455 |
+
```python
|
| 456 |
+
{
|
| 457 |
+
"timestamp": "2024-01-01T08:00:00",
|
| 458 |
+
"deviceId": "ab60", # Optional, anonymous ID will be created if missing
|
| 459 |
+
"features": {
|
| 460 |
+
"hr": 72.0,
|
| 461 |
+
"hrv_rmssd": 30.0,
|
| 462 |
+
"time_period_primary": "morning",
|
| 463 |
+
"data_quality": "high",
|
| 464 |
+
...
|
| 465 |
+
}
|
| 466 |
+
}
|
| 467 |
+
```
|
| 468 |
+
|
| 469 |
+
- Each window requires 12 data points (default 1 hour)
|
| 470 |
+
- Whether features are required is controlled by `configs/features_config.json`
|
| 471 |
+
- Missing values automatically fall back to default or category_mapping defined values
|
| 472 |
+
|
| 473 |
+
### Output
|
| 474 |
+
|
| 475 |
+
```python
|
| 476 |
+
{
|
| 477 |
+
"is_anomaly": True,
|
| 478 |
+
"anomaly_score": 0.5760,
|
| 479 |
+
"threshold": 0.5300,
|
| 480 |
+
"details": {
|
| 481 |
+
"window_size": 12,
|
| 482 |
+
"model_output": 0.5760,
|
| 483 |
+
"prediction_confidence": 0.0460
|
| 484 |
+
}
|
| 485 |
+
}
|
| 486 |
+
```
|
| 487 |
+
|
| 488 |
+
---
|
| 489 |
+
|
| 490 |
+
## 🧱 Model Architecture & Training
|
| 491 |
+
|
| 492 |
+
- **Model Backbone**: Phased LSTM handles unevenly-spaced sequences + Temporal Fusion Transformer aggregates temporal context
|
| 493 |
+
- **Anomaly Detection Head**: Enhanced attention, multi-layer MLP, optional contrastive learning/type auxiliary head
|
| 494 |
+
- **Feature System**:
|
| 495 |
+
- Physiological: HR, HRV (RMSSD/SDNN/PNN50…)
|
| 496 |
+
- Activity: Steps, distance, energy consumption, acceleration, gyroscope
|
| 497 |
+
- Environmental: Light, day/night labels, data quality
|
| 498 |
+
- Baseline: Adaptive baseline mean/std + deviation features
|
| 499 |
+
- **Label Source**: High-confidence questionnaire labels + low-confidence adaptive baseline labels
|
| 500 |
+
- **Training Pipeline**: Stage1/2/3 data processing ➜ Phase1 self-supervised pre-training ➜ Phase2 supervised fine-tuning ➜ Threshold/case calibration
|
| 501 |
+
|
| 502 |
+
---
|
| 503 |
+
|
| 504 |
+
## 📦 Repository Structure (Partial)
|
| 505 |
+
|
| 506 |
+
```
|
| 507 |
+
├─ configs/
|
| 508 |
+
│ └─ features_config.json # Feature definitions & normalization strategies
|
| 509 |
+
├─ wearable_anomaly_detector.py # Core wrapper: loading, prediction, batch processing
|
| 510 |
+
├─ feature_calculator.py # Configuration-driven feature construction + user history cache
|
| 511 |
+
└─ checkpoints/phase2/... # Model weights & summary
|
| 512 |
+
```
|
| 513 |
+
|
| 514 |
+
---
|
| 515 |
+
|
| 516 |
+
## 🧾 API 文档
|
| 517 |
+
|
| 518 |
+
- `API_USAGE.md`:列出 `WearableAnomalyDetector`、`AnomalyFormatter`、`BaselineStorage` 等核心接口的参数、输入输出示例。
|
| 519 |
+
- `test_quickstart.py`:可直接运行的自检脚本,便于验证接口行为。
|
| 520 |
+
|
| 521 |
+
---
|
| 522 |
+
|
| 523 |
+
## 📚 Data Source & License
|
| 524 |
+
|
| 525 |
+
- Training data is based on **"A continuous real-world dataset comprising wearable-based heart rate variability alongside sleep diaries"** (Baigutanova *et al.*, Scientific Data, 2025) and its Figshare dataset [doi:10.1038/s41597-025-05801-3](https://www.nature.com/articles/s41597-025-05801-3) / [dataset link](https://springernature.figshare.com/articles/dataset/In-situ_wearable-based_dataset_of_continuous_heart_rate_variability_monitoring_accompanied_by_sleep_diaries/28509740).
|
| 526 |
+
- This dataset is released under **Creative Commons Attribution 4.0 (CC BY 4.0)** license, allowing free use, modification, and distribution, but attribution and license link must be retained.
|
| 527 |
+
- This repository follows CC BY 4.0 requirements for original data; if you further process or publish based on this, please continue to retain the above attribution and license information.
|
| 528 |
+
- Code/models can use MIT/Apache or other licenses as needed, but any parts involving data must still follow CC BY 4.0.
|
| 529 |
+
|
| 530 |
+
---
|
| 531 |
+
|
| 532 |
+
## 🤝 Contributions & Extensions
|
| 533 |
+
|
| 534 |
+
Welcome to:
|
| 535 |
+
1. Add new features or data sources ⇒ Update `features_config.json` + submit PR
|
| 536 |
+
2. Integrate new user data management/baseline strategies ⇒ Extend `FeatureCalculator` or contribute `UserDataManager`
|
| 537 |
+
3. Provide feedback on cases or real deployment experiences ⇒ Open Issues or Discussions
|
| 538 |
+
|
| 539 |
+
---
|
| 540 |
+
|
| 541 |
+
## 📄 License
|
| 542 |
+
|
| 543 |
+
- **Model & Code**: Apache-2.0. Can be used/modified/distributed freely while retaining copyright and license notices.
|
| 544 |
+
- **Training Data**: Original wearable HRV dataset uses CC BY 4.0; please continue to retain author attribution and license information when reusing.
|
| 545 |
+
|
| 546 |
+
---
|
| 547 |
+
|
| 548 |
+
## 🔖 Citation
|
| 549 |
+
|
| 550 |
+
```bibtex
|
| 551 |
+
@software{Wearable_TimeSeries_Health_Monitor,
|
| 552 |
+
title = {Wearable\_TimeSeries\_Health\_Monitor},
|
| 553 |
+
author = {oscarzhang},
|
| 554 |
+
year = {2025},
|
| 555 |
+
url = {https://huggingface.co/oscarzhang/Wearable_TimeSeries_Health_Monitor}
|
| 556 |
+
}
|
| 557 |
+
```
|
| 558 |
+
|
__pycache__/feature_calculator.cpython-313.pyc
ADDED
|
Binary file (16.5 kB). View file
|
|
|
__pycache__/gradio_app.cpython-313.pyc
ADDED
|
Binary file (7.18 kB). View file
|
|
|
__pycache__/wearable_anomaly_detector.cpython-313.pyc
ADDED
|
Binary file (34.3 kB). View file
|
|
|
checkpoints/phase2/exp_factor_balanced/best_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f2f056ea3cec48902ffda2399e905189dce62826034470bb6514f8739eba9ff
|
| 3 |
+
size 27270610
|
configs/api_config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"historical_data_platform": {
|
| 3 |
+
"base_url": "",
|
| 4 |
+
"api_key": "",
|
| 5 |
+
"timeout": 30,
|
| 6 |
+
"retry_times": 3,
|
| 7 |
+
"endpoints": {
|
| 8 |
+
"raw_data": "/api/raw-data/{deviceId}",
|
| 9 |
+
"user_profile": "/api/user-profile/{deviceId}",
|
| 10 |
+
"historical_results": "/api/historical-results/{deviceId}"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"baseline": {
|
| 14 |
+
"storage_type": "file",
|
| 15 |
+
"file_path": "data_storage/baselines.json",
|
| 16 |
+
"database": {
|
| 17 |
+
"enabled": false,
|
| 18 |
+
"type": "sqlite",
|
| 19 |
+
"connection_string": "sqlite:///data_storage/baselines.db"
|
| 20 |
+
},
|
| 21 |
+
"auto_update": true,
|
| 22 |
+
"update_on_detect": true,
|
| 23 |
+
"import_from_csv": true,
|
| 24 |
+
"csv_path": "processed_data/stage1/adaptive_baselines.csv"
|
| 25 |
+
},
|
| 26 |
+
"cache": {
|
| 27 |
+
"user_profile_ttl": 86400,
|
| 28 |
+
"baseline_ttl": 3600
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
configs/detector_config.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"detection": {
|
| 3 |
+
"window_size": 12,
|
| 4 |
+
"window_interval_minutes": 5,
|
| 5 |
+
"min_duration_days": 3,
|
| 6 |
+
"default_threshold": 0.53
|
| 7 |
+
},
|
| 8 |
+
"baseline": {
|
| 9 |
+
"update_on_detect": true,
|
| 10 |
+
"update_interval_hours": 1,
|
| 11 |
+
"sliding_window_days": 30
|
| 12 |
+
},
|
| 13 |
+
"pattern_detection": {
|
| 14 |
+
"min_duration_days": 3,
|
| 15 |
+
"trend_threshold": 0.01
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
|
configs/features_config.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"version": "1.0",
|
| 4 |
+
"description": "Wearable anomaly detection feature configuration"
|
| 5 |
+
},
|
| 6 |
+
"time_series": [
|
| 7 |
+
{"name": "hr", "enabled": true, "default": 70.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 8 |
+
{"name": "hr_resting", "enabled": true, "default": 65.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 9 |
+
{"name": "hrv_rmssd", "enabled": true, "default": 30.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 10 |
+
{"name": "hrv_sdnn", "enabled": true, "default": 40.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 11 |
+
{"name": "hrv_pnn50", "enabled": true, "default": 15.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 12 |
+
{"name": "sdnn", "enabled": true, "default": 35.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 13 |
+
{"name": "sdsd", "enabled": true, "default": 25.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 14 |
+
{"name": "rmssd", "enabled": true, "default": 30.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 15 |
+
{"name": "pnn20", "enabled": true, "default": 25.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 16 |
+
{"name": "pnn50", "enabled": true, "default": 12.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 17 |
+
{"name": "ibi", "enabled": true, "default": 0.86, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 18 |
+
{"name": "lf/hf", "enabled": true, "default": 1.8, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 19 |
+
{"name": "steps", "enabled": true, "default": 20.0, "normalization": {"type": "minmax", "min": 0.0, "max": 500.0}},
|
| 20 |
+
{"name": "distance", "enabled": true, "default": 10.0, "normalization": {"type": "minmax", "min": 0.0, "max": 2000.0}},
|
| 21 |
+
{"name": "calories", "enabled": true, "default": 1.5, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 22 |
+
{"name": "acc_x_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 23 |
+
{"name": "acc_y_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 24 |
+
{"name": "acc_z_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 25 |
+
{"name": "grv_x_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 26 |
+
{"name": "grv_y_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 27 |
+
{"name": "grv_z_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 28 |
+
{"name": "grv_w_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 29 |
+
{"name": "gyr_x_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 30 |
+
{"name": "gyr_y_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 31 |
+
{"name": "gyr_z_avg", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 32 |
+
{"name": "light_avg", "enabled": true, "default": 100.0, "normalization": {"type": "minmax", "min": 0.0, "max": 1000.0}},
|
| 33 |
+
{
|
| 34 |
+
"name": "time_period_primary",
|
| 35 |
+
"enabled": true,
|
| 36 |
+
"default": 2.0,
|
| 37 |
+
"normalization": {"type": "none"},
|
| 38 |
+
"category_mapping": {
|
| 39 |
+
"night": 0,
|
| 40 |
+
"morning": 1,
|
| 41 |
+
"day": 2,
|
| 42 |
+
"evening": 3,
|
| 43 |
+
"unknown": 4
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"name": "time_period_secondary",
|
| 48 |
+
"enabled": true,
|
| 49 |
+
"default": 7.0,
|
| 50 |
+
"normalization": {"type": "none"},
|
| 51 |
+
"category_mapping": {
|
| 52 |
+
"commute_morning": 0,
|
| 53 |
+
"breakfast": 1,
|
| 54 |
+
"work_morning": 2,
|
| 55 |
+
"lunch": 3,
|
| 56 |
+
"work_afternoon": 4,
|
| 57 |
+
"commute_evening": 5,
|
| 58 |
+
"dinner": 6,
|
| 59 |
+
"rest_evening": 7,
|
| 60 |
+
"rest_night": 8,
|
| 61 |
+
"exercise": 9,
|
| 62 |
+
"unknown": 10
|
| 63 |
+
}
|
| 64 |
+
},
|
| 65 |
+
{"name": "is_weekend", "enabled": true, "default": 0.0, "normalization": {"type": "none"}},
|
| 66 |
+
{
|
| 67 |
+
"name": "data_quality",
|
| 68 |
+
"enabled": true,
|
| 69 |
+
"default": 0.9,
|
| 70 |
+
"normalization": {"type": "minmax", "min": 0.0, "max": 1.0},
|
| 71 |
+
"category_mapping": {
|
| 72 |
+
"low": 0.3,
|
| 73 |
+
"medium": 0.6,
|
| 74 |
+
"high": 1.0
|
| 75 |
+
}
|
| 76 |
+
},
|
| 77 |
+
{"name": "missingness_score", "enabled": true, "default": 0.0, "normalization": {"type": "minmax", "min": 0.0, "max": 1.0}},
|
| 78 |
+
{"name": "baseline_hrv_mean", "enabled": true, "default": 30.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 79 |
+
{"name": "baseline_hrv_std", "enabled": true, "default": 5.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 80 |
+
{"name": "hrv_deviation_abs", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 81 |
+
{"name": "hrv_deviation_pct", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}},
|
| 82 |
+
{"name": "hrv_z_score", "enabled": true, "default": 0.0, "normalization": {"type": "zscore", "use_norm_params": true}}
|
| 83 |
+
],
|
| 84 |
+
"static": [
|
| 85 |
+
{"name": "age_group", "enabled": true, "default": -1},
|
| 86 |
+
{"name": "age_normalized", "enabled": true, "default": 0.5},
|
| 87 |
+
{"name": "sex", "enabled": true, "default": 0.5},
|
| 88 |
+
{"name": "marriage", "enabled": true, "default": -1},
|
| 89 |
+
{"name": "exercise", "enabled": true, "default": -1},
|
| 90 |
+
{"name": "coffee", "enabled": true, "default": -1},
|
| 91 |
+
{"name": "smoking", "enabled": true, "default": -1},
|
| 92 |
+
{"name": "drinking", "enabled": true, "default": -1},
|
| 93 |
+
{"name": "MEQ", "enabled": true, "default": 0.0},
|
| 94 |
+
{"name": "baseline_commute_morning_mean", "enabled": true, "default": 30.0},
|
| 95 |
+
{"name": "baseline_commute_morning_std", "enabled": true, "default": 5.0}
|
| 96 |
+
],
|
| 97 |
+
"factor_features": {
|
| 98 |
+
"enabled": true,
|
| 99 |
+
"factor_names": ["physio", "activity", "context"],
|
| 100 |
+
"factor_dim": 4
|
| 101 |
+
},
|
| 102 |
+
"known_future": [
|
| 103 |
+
{"name": "hour_of_day", "enabled": true},
|
| 104 |
+
{"name": "day_of_week", "enabled": true},
|
| 105 |
+
{"name": "is_weekend", "enabled": true}
|
| 106 |
+
]
|
| 107 |
+
}
|
| 108 |
+
|
configs/formatter_config.json
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sections": {
|
| 3 |
+
"anomaly_overview": {
|
| 4 |
+
"enabled": true,
|
| 5 |
+
"title": "异常概览",
|
| 6 |
+
"fields": {
|
| 7 |
+
"anomaly_type": {
|
| 8 |
+
"label": "异常类型",
|
| 9 |
+
"format": "string",
|
| 10 |
+
"default": "未知"
|
| 11 |
+
},
|
| 12 |
+
"duration_days": {
|
| 13 |
+
"label": "持续天数",
|
| 14 |
+
"format": "integer",
|
| 15 |
+
"suffix": "天"
|
| 16 |
+
},
|
| 17 |
+
"trend": {
|
| 18 |
+
"label": "异常趋势",
|
| 19 |
+
"format": "string",
|
| 20 |
+
"default": "未知",
|
| 21 |
+
"mapping": {
|
| 22 |
+
"worsening": "持续恶化",
|
| 23 |
+
"stable": "稳定异常",
|
| 24 |
+
"improving": "逐渐改善"
|
| 25 |
+
}
|
| 26 |
+
},
|
| 27 |
+
"is_anomaly": {
|
| 28 |
+
"label": "是否异常",
|
| 29 |
+
"format": "boolean",
|
| 30 |
+
"true_text": "是",
|
| 31 |
+
"false_text": "否"
|
| 32 |
+
},
|
| 33 |
+
"anomaly_score": {
|
| 34 |
+
"label": "异常分数",
|
| 35 |
+
"format": "float",
|
| 36 |
+
"decimal_places": 4
|
| 37 |
+
},
|
| 38 |
+
"threshold": {
|
| 39 |
+
"label": "阈值",
|
| 40 |
+
"format": "float",
|
| 41 |
+
"decimal_places": 4
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"core_indicators": {
|
| 46 |
+
"enabled": true,
|
| 47 |
+
"title": "核心指标",
|
| 48 |
+
"fields": {
|
| 49 |
+
"hrv_rmssd": {
|
| 50 |
+
"label": "HRV RMSSD",
|
| 51 |
+
"format": "float",
|
| 52 |
+
"decimal_places": 2,
|
| 53 |
+
"suffix": " ms"
|
| 54 |
+
},
|
| 55 |
+
"baseline_mean": {
|
| 56 |
+
"label": "基线值",
|
| 57 |
+
"format": "float",
|
| 58 |
+
"decimal_places": 2,
|
| 59 |
+
"suffix": " ms"
|
| 60 |
+
},
|
| 61 |
+
"deviation_pct": {
|
| 62 |
+
"label": "偏离基线",
|
| 63 |
+
"format": "float",
|
| 64 |
+
"decimal_places": 2,
|
| 65 |
+
"suffix": "%"
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
},
|
| 69 |
+
"historical_trend": {
|
| 70 |
+
"enabled": true,
|
| 71 |
+
"title": "历史趋势",
|
| 72 |
+
"fields": {
|
| 73 |
+
"date": {
|
| 74 |
+
"label": "日期",
|
| 75 |
+
"format": "string"
|
| 76 |
+
},
|
| 77 |
+
"hrv_rmssd": {
|
| 78 |
+
"label": "HRV",
|
| 79 |
+
"format": "float",
|
| 80 |
+
"decimal_places": 2,
|
| 81 |
+
"prefix": "HRV=",
|
| 82 |
+
"suffix": " ms"
|
| 83 |
+
},
|
| 84 |
+
"hr": {
|
| 85 |
+
"label": "心率",
|
| 86 |
+
"format": "float",
|
| 87 |
+
"decimal_places": 1,
|
| 88 |
+
"prefix": "心率=",
|
| 89 |
+
"suffix": " bpm"
|
| 90 |
+
},
|
| 91 |
+
"anomaly_score": {
|
| 92 |
+
"label": "异常分数",
|
| 93 |
+
"format": "float",
|
| 94 |
+
"decimal_places": 4,
|
| 95 |
+
"prefix": "异常分数="
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
},
|
| 99 |
+
"related_indicators": {
|
| 100 |
+
"enabled": true,
|
| 101 |
+
"title": "相关健康指标",
|
| 102 |
+
"fields": {
|
| 103 |
+
"activity_level": {
|
| 104 |
+
"label": "活动水平",
|
| 105 |
+
"format": "nested",
|
| 106 |
+
"sub_fields": {
|
| 107 |
+
"level": {
|
| 108 |
+
"label": "水平",
|
| 109 |
+
"format": "string"
|
| 110 |
+
},
|
| 111 |
+
"avg_steps": {
|
| 112 |
+
"label": "平均步数",
|
| 113 |
+
"format": "float",
|
| 114 |
+
"decimal_places": 1,
|
| 115 |
+
"prefix": "(平均步数=",
|
| 116 |
+
"suffix": ")"
|
| 117 |
+
}
|
| 118 |
+
}
|
| 119 |
+
},
|
| 120 |
+
"sleep_quality": {
|
| 121 |
+
"label": "睡眠质量",
|
| 122 |
+
"format": "nested",
|
| 123 |
+
"sub_fields": {
|
| 124 |
+
"quality": {
|
| 125 |
+
"label": "质量",
|
| 126 |
+
"format": "string"
|
| 127 |
+
},
|
| 128 |
+
"available": {
|
| 129 |
+
"label": "可用性",
|
| 130 |
+
"format": "boolean",
|
| 131 |
+
"true_text": "数据可用",
|
| 132 |
+
"false_text": "数据不可用"
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"stress_indicators": {
|
| 137 |
+
"label": "压力指标",
|
| 138 |
+
"format": "nested",
|
| 139 |
+
"sub_fields": {
|
| 140 |
+
"level": {
|
| 141 |
+
"label": "水平",
|
| 142 |
+
"format": "string"
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
},
|
| 148 |
+
"user_profile": {
|
| 149 |
+
"enabled": true,
|
| 150 |
+
"title": "用户背景信息",
|
| 151 |
+
"fields": {
|
| 152 |
+
"estimated_age": {
|
| 153 |
+
"label": "年龄",
|
| 154 |
+
"format": "string_or_nested",
|
| 155 |
+
"fallback": "age_group"
|
| 156 |
+
},
|
| 157 |
+
"sex": {
|
| 158 |
+
"label": "性别",
|
| 159 |
+
"format": "string"
|
| 160 |
+
},
|
| 161 |
+
"exercise": {
|
| 162 |
+
"label": "运动频率",
|
| 163 |
+
"format": "string"
|
| 164 |
+
},
|
| 165 |
+
"coffee": {
|
| 166 |
+
"label": "咖啡消费",
|
| 167 |
+
"format": "string"
|
| 168 |
+
},
|
| 169 |
+
"drinking": {
|
| 170 |
+
"label": "饮酒状况",
|
| 171 |
+
"format": "string"
|
| 172 |
+
},
|
| 173 |
+
"MEQ_type": {
|
| 174 |
+
"label": "MEQ类型",
|
| 175 |
+
"format": "string"
|
| 176 |
+
}
|
| 177 |
+
}
|
| 178 |
+
}
|
| 179 |
+
},
|
| 180 |
+
"formatting": {
|
| 181 |
+
"section_prefix": "## ",
|
| 182 |
+
"section_suffix": "\n",
|
| 183 |
+
"field_prefix": "- ",
|
| 184 |
+
"field_suffix": "\n",
|
| 185 |
+
"line_separator": "\n",
|
| 186 |
+
"header": "# 健康异常检测结果\n"
|
| 187 |
+
}
|
| 188 |
+
}
|
| 189 |
+
|
data_storage/baselines.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"device_id": "test_user",
|
| 4 |
+
"feature_name": "hrv_rmssd",
|
| 5 |
+
"baseline_type": "personal",
|
| 6 |
+
"baseline_mean": 75.45454545454545,
|
| 7 |
+
"baseline_std": 5.0,
|
| 8 |
+
"personal_mean": 75.0,
|
| 9 |
+
"personal_std": 5.0,
|
| 10 |
+
"data_count": 11,
|
| 11 |
+
"time_period_primary": "morning",
|
| 12 |
+
"time_period_secondary": "",
|
| 13 |
+
"is_weekend": 0,
|
| 14 |
+
"last_updated": "2025-11-27T14:24:45.274978"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"device_id": "test_user_003",
|
| 18 |
+
"feature_name": "hrv_rmssd",
|
| 19 |
+
"baseline_type": "personal",
|
| 20 |
+
"baseline_mean": 74.83870967741936,
|
| 21 |
+
"baseline_std": 5.0,
|
| 22 |
+
"personal_mean": 75.0,
|
| 23 |
+
"personal_std": 5.0,
|
| 24 |
+
"group_mean": 75.0,
|
| 25 |
+
"data_count": 31,
|
| 26 |
+
"time_period_primary": "",
|
| 27 |
+
"time_period_secondary": "",
|
| 28 |
+
"is_weekend": 0,
|
| 29 |
+
"last_updated": "2025-11-27T14:34:26.141050"
|
| 30 |
+
}
|
| 31 |
+
]
|
demo_llm_inputs/case_am77_full.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"case_id": "case_am77_full",
|
| 3 |
+
"summary": {
|
| 4 |
+
"anomaly_type": "continuous_anomaly",
|
| 5 |
+
"duration_days": 7,
|
| 6 |
+
"trend": "stable",
|
| 7 |
+
"description": "检测到持续7天的异常模式,趋势:稳定异常,异常分数范围:0.4827 - 0.4890,平均分数:0.4871"
|
| 8 |
+
},
|
| 9 |
+
"user_profile": {
|
| 10 |
+
"age_group": "30-35岁",
|
| 11 |
+
"estimated_age": 32,
|
| 12 |
+
"sex": "男性",
|
| 13 |
+
"exercise": "每周5次以上",
|
| 14 |
+
"coffee": "不喝咖啡",
|
| 15 |
+
"smoking": "不吸烟",
|
| 16 |
+
"drinking": "经常饮酒",
|
| 17 |
+
"MEQ": 64.0,
|
| 18 |
+
"MEQ_type": "晨型"
|
| 19 |
+
},
|
| 20 |
+
"messages": [
|
| 21 |
+
{
|
| 22 |
+
"role": "system",
|
| 23 |
+
"content": "请阅读以下健康异常检测输入描述,生成符合P3与PROCEED框架的个性化健康干预方案,需列出异常点、推理过程、误报判断、紧急处理、长期方案及用户特征说明,且所有建议需为个人可执行。"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"role": "user",
|
| 27 |
+
"content": "# 健康异常检测案例报告\n\n## 异常概览\n\n**异常类型**:持续性异常 \n**持续时间**:7天 \n**异常趋势**:稳定型 \n**严重程度**:轻度异常 \n**状态确认**:**真实异常**(非误报案例)\n\n### 异常评分分析\n- **异常分数范围**:0.4827 - 0.4890\n- **平均异常分数**:0.4871\n- **检测阈值**:0.4800\n- **异常状态**:持续超过阈值但幅度有限\n\n## 核心指标状态\n\n### 心率变异性(HRV)分析\n| 指标 | 当前值 | 偏离基线 | 基线值 | Z-score |\n|------|--------|----------|--------|---------|\n| HRV RMSSD | 68.11 ms | +1.0% | 67.46 ms | 0.02 |\n\n**基线特征**:\n- 基线类型:个人主要基线\n- 基线可靠性:高\n- 个人基线:68.31 ms(标准差:32.86 ms,基于63次历史记录)\n- 群体基线:59.85 ms\n\n## 历史趋势分析\n\n### 关键指标监测数据\n| 日期 | HRV (ms) | 心率 (bpm) | 异常分数 |\n|------|----------|------------|----------|\n| 2021-03-07 | 67.14 | 80.2 | 0.4827 |\n| 2021-03-09 | 84.74 | 71.5 | 0.4869 |\n| 2021-03-14 | 77.41 | 68.6 | 0.4869 |\n| 2021-03-17 | 75.94 | 75.5 | 0.4884 |\n| 2021-03-20 | 66.86 | 68.7 | 0.4875 |\n| 2021-03-27 | 92.36 | 74.8 | 0.4884 |\n| 2021-03-28 | 68.11 | 67.6 | 0.4890 |\n\n## 相关健康指标\n\n### 活动水平分析\n- **总体状态**:低活动水平\n- **平均步数**:3.1步/日\n- **平均卡路里消耗**:0.1千卡/日\n- **趋势特征**:持续下降模式\n\n### 其他健康指标\n- **睡眠质量**:数据不可用\n- **压力指标**:无显著压力表现\n\n## 用户背景信息\n\n### 人口统计学特征\n- **年龄**:约32岁(30-35岁区间)\n- **性别**:男性\n- **昼夜节律类型**:MEQ得分64.0(晨型人格)\n\n### 生活习惯特征\n- **运动频率**:每周5次以上(高频率)\n- **咖啡消费**:不喝咖啡\n- **吸烟状况**:不吸烟\n- **饮酒状况**:经常饮酒\n\n## 临床意义评估\n\n### 异常特征总结\n该异常表现为持续7天的稳定型轻度异常,具有以下特征:\n- HRV指标仅轻微偏离个人基线(+1.0%)\n- Z-score为0.02,表明偏离程度在统计学上不显著\n- 异常分数持续但稳定,无明显恶化趋势\n\n### 风险评估与建议\n考虑到用户的高运动频率和良好的生活习惯基础,此异常更可能属于生理性波动范畴。建议:\n- 继续定期监测相关指标\n- 关注活动水平下降的潜在影响\n- 现阶段无需紧急医疗干预\n- 如异常持续或加重,建议进一步评估"
|
| 28 |
+
}
|
| 29 |
+
]
|
| 30 |
+
}
|
demo_llm_inputs/case_ba30_full.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"case_id": "case_ba30_full",
|
| 3 |
+
"summary": {
|
| 4 |
+
"anomaly_type": "continuous_anomaly",
|
| 5 |
+
"duration_days": 24,
|
| 6 |
+
"trend": "stable",
|
| 7 |
+
"description": "检测到持续24天的异常模式,趋势:稳定异常,异常分数范围:0.4866 - 0.4888,平均分数:0.4881"
|
| 8 |
+
},
|
| 9 |
+
"user_profile": {
|
| 10 |
+
"age_group": "25-30岁",
|
| 11 |
+
"estimated_age": 27,
|
| 12 |
+
"sex": "男性",
|
| 13 |
+
"exercise": "很少运动",
|
| 14 |
+
"coffee": "每天2-3杯",
|
| 15 |
+
"smoking": "不吸烟",
|
| 16 |
+
"drinking": "经常饮酒",
|
| 17 |
+
"MEQ": 62.0,
|
| 18 |
+
"MEQ_type": "晨型"
|
| 19 |
+
},
|
| 20 |
+
"messages": [
|
| 21 |
+
{
|
| 22 |
+
"role": "system",
|
| 23 |
+
"content": "请阅读以下健康异常检测输入描述,生成符合P3与PROCEED框架的个性化健康干预方案,需列出异常点、推理过程、误报判断、紧急处理、长期方案及用户特征说明,且所有建议需为个人可执行。"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"role": "user",
|
| 27 |
+
"content": "# 健康异常检测分析报告\n\n## 异常概况\n\n**异常类型**:持续性异常 \n**持续时间**:24天 \n**异常趋势**:稳定型 \n**严重程度**:轻度异常 \n**状态确认**:确认真实异常(非误报)\n\n## 关键指标分析\n\n### 异常评分特征\n- **异常分数范围**:0.4866 - 0.4888\n- **平均异常分数**:0.4881\n- **检测阈值**:0.4800\n- **异常持续性**:所有检测点均超过阈值\n\n### 当前生理状态\n- **HRV RMSSD当前值**:102.50 ms\n- **相对于基线偏离**:+7.5%\n- **个人基线值**:95.32 ms\n- **Z-score统计值**:0.27(轻度偏离)\n- **基线可靠性**:高可靠性(基于348条个人历史记录)\n\n## 历史趋势数据\n\n| 日期 | HRV (ms) | 心率 (bpm) | 异常分数 |\n|------|----------|------------|----------|\n| 2021-03-11 | 71.07 | 62.4 | 0.4881 |\n| 2021-03-13 | 87.16 | 57.0 | 0.4888 |\n| 2021-03-14 | 107.80 | 56.7 | 0.4876 |\n| 2021-03-16 | 93.93 | 61.1 | 0.4882 |\n| 2021-03-17 | 96.05 | 61.3 | 0.4881 |\n| 2021-03-18 | 82.32 | 64.0 | 0.4882 |\n| 2021-03-19 | 92.42 | 59.8 | 0.4866 |\n| 2021-03-20 | 91.41 | 56.7 | 0.4887 |\n| 2021-03-21 | 84.49 | 59.8 | 0.4882 |\n| 2021-03-23 | 94.86 | 61.7 | 0.4882 |\n| 2021-03-24 | 100.66 | 58.6 | 0.4882 |\n| 2021-03-25 | 102.66 | 62.7 | 0.4882 |\n| 2021-03-26 | 95.19 | 58.2 | 0.4882 |\n| 2021-03-27 | 96.99 | 57.0 | 0.4888 |\n| 2021-03-28 | 77.60 | 58.2 | 0.4876 |\n| 2021-03-29 | 104.94 | 59.4 | 0.4881 |\n| 2021-03-30 | 95.22 | 62.1 | 0.4878 |\n| 2021-03-31 | 95.29 | 59.7 | 0.4882 |\n| 2021-04-01 | 96.67 | 59.8 | 0.4882 |\n| 2021-04-02 | 123.00 | 56.5 | 0.4876 |\n| 2021-04-03 | 93.23 | 67.5 | 0.4883 |\n| 2021-04-05 | 102.81 | 56.0 | 0.4882 |\n| 2021-04-06 | 96.31 | 59.8 | 0.4881 |\n| 2021-04-07 | 102.50 | 61.2 | 0.4878 |\n\n## 相关健康指标\n\n### 活动水平\n- **总体状态**:低水平\n- **平均步数**:13.4步\n- **平均卡路里消耗**:0.6千卡\n- **趋势特征**:缓慢增加\n\n### 其他指标\n- **睡眠质量**:数据不可用\n- **压力指标**:未检测到明显压力信号\n\n## 基线参考信息\n\n### 个人基线\n- **HRV基线值**:96.79 ms\n- **标准差**:26.79 ms\n- **数据基础**:基于348条历史记录\n\n### 群体比较\n- **群体基线**:82.08 ms\n- **个人相对群体**:高于群体平均水平\n\n## 用户个性化档案\n\n### 基本信息\n- **性别**:男性\n- **年龄**:约27岁(25-30岁)\n- **昼夜节律**:MEQ得分62.0(晨型人格)\n\n### 生活习惯\n- **运动习惯**:很少运动\n- **咖啡消费**:每天2-3杯\n- **吸烟状况**:不吸烟\n- **饮酒状况**:经常饮酒\n\n## 临床评估与建议\n\n### 异常特征总结\n该案例显示持续24天的稳定型HRV轻度异常,异常分数持续略高于检测阈值(0.4800)。HRV RMSSD值在监测期间呈现波动,但整体维持在个人基线水平附近。\n\n### 影响因素分析\n考虑到用户的活动水平极低(平均步数13.4步)、经常饮酒的生活习惯,以及相对稳定的心率表现,此异常可能反映了自主神经系统的轻微调节变化。\n\n### 监测建议\n1. 持续监测HRV趋势变化\n2. 结合更多生理参数进行综合评估\n3. 关注生活方式因素对自主神经系统的影响\n4. 建议记录饮酒与活动水平的相关性\n\n**重要提示**:此为真实异常案例,非误报情况,建议保持定期监测。"
|
| 28 |
+
}
|
| 29 |
+
]
|
| 30 |
+
}
|
demo_llm_inputs/case_ej27_full.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"case_id": "case_ej27_full",
|
| 3 |
+
"summary": {
|
| 4 |
+
"anomaly_type": "continuous_anomaly",
|
| 5 |
+
"duration_days": 27,
|
| 6 |
+
"trend": "stable",
|
| 7 |
+
"description": "检测到持续27天的异常模式,趋势:稳定异常,异常分数范围:0.4846 - 0.4895,平均分数:0.4878"
|
| 8 |
+
},
|
| 9 |
+
"user_profile": {
|
| 10 |
+
"age_group": "30-35岁",
|
| 11 |
+
"estimated_age": 30,
|
| 12 |
+
"sex": "女性",
|
| 13 |
+
"exercise": "每周1-2次",
|
| 14 |
+
"coffee": "每天1杯",
|
| 15 |
+
"smoking": "不吸烟",
|
| 16 |
+
"drinking": "不饮酒",
|
| 17 |
+
"MEQ": 49.0,
|
| 18 |
+
"MEQ_type": "中间型"
|
| 19 |
+
},
|
| 20 |
+
"messages": [
|
| 21 |
+
{
|
| 22 |
+
"role": "system",
|
| 23 |
+
"content": "请阅读以下健康异常检测输入描述,生成符合P3与PROCEED框架的个性化健康干预方案,需列出异常点、推理过程、误报判断、紧急处理、长期方案及用户特征说明,且所有建议需为个人可执行。"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"role": "user",
|
| 27 |
+
"content": "# 健康异常检测案例报告\n\n## 异常概况\n\n**异常类型**:持续性异常 \n**持续时间**:27天(2021年3月8日至4月3日) \n**异常趋势**:稳定型异常 \n**严重程度**:轻度异常 \n\n## 异常评分分析\n\n**异常分数范围**:0.4846 - 0.4895 \n**平均异常分数**:0.4878 \n**检测阈值**:0.4800 \n**异常状态**:持续超过阈值水平 \n\n## 当前生理状态\n\n- **HRV RMSSD当前值**:89.09 ms\n- **相对于基线偏离**:3.4%(基线值:86.18 ms)\n- **统计显著性**:Z-score = 0.09(偏离程度轻微)\n- **基线可靠性**:高(基于个人主要基线)\n\n## 历史趋势分析\n\n### 每日生理指标变化\n\n| 日期 | HRV (ms) | 心率 (bpm) | 异常分数 |\n|------|----------|------------|----------|\n| 2021-03-08 | 85.27 | 86.6 | 0.4883 |\n| 2021-03-09 | 90.12 | 83.2 | 0.4877 |\n| 2021-03-10 | 85.98 | 79.2 | 0.4858 |\n| 2021-03-11 | 77.37 | 86.4 | 0.4884 |\n| 2021-03-12 | 75.68 | 83.7 | 0.4882 |\n| 2021-03-13 | 75.46 | 85.9 | 0.4895 |\n| 2021-03-14 | 77.03 | 82.9 | 0.4875 |\n| 2021-03-15 | 79.40 | 89.8 | 0.4883 |\n| 2021-03-16 | 72.56 | 91.8 | 0.4884 |\n| 2021-03-17 | 72.58 | 89.8 | 0.4885 |\n| 2021-03-18 | 73.32 | 79.4 | 0.4875 |\n| 2021-03-19 | 85.93 | 87.6 | 0.4883 |\n| 2021-03-20 | 83.36 | 78.7 | 0.4867 |\n| 2021-03-21 | 77.28 | 81.9 | 0.4876 |\n| 2021-03-22 | 93.25 | 92.6 | 0.4846 |\n| 2021-03-23 | 80.82 | 85.9 | 0.4884 |\n| 2021-03-24 | 77.66 | 88.3 | 0.4884 |\n| 2021-03-25 | 87.05 | 79.9 | 0.4883 |\n| 2021-03-26 | 94.50 | 75.1 | 0.4870 |\n| 2021-03-27 | 97.29 | 83.0 | 0.4860 |\n| 2021-03-28 | 97.68 | 73.7 | 0.4885 |\n| 2021-03-29 | 100.65 | 82.4 | 0.4884 |\n| 2021-03-30 | 75.52 | 86.9 | 0.4883 |\n| 2021-03-31 | 75.52 | 84.1 | 0.4875 |\n| 2021-04-01 | 89.34 | 83.4 | 0.4884 |\n| 2021-04-02 | 89.58 | 80.0 | 0.4881 |\n| 2021-04-03 | 89.09 | 75.0 | 0.4867 |\n\n## 相关健康指标\n\n- **睡眠质量**:数据不可用\n- **活动水平**:低\n - 平均步数:16.0步\n - 平均卡路里消耗:0.6千卡\n - 趋势:下降中\n- **压力指标**:中等水平\n - 具体表现:心率升高\n\n## 基线参考标准\n\n**个人基线**:\n- 均值:88.34 ms\n- 标准差:31.20 ms\n- 数据记录数:83条\n- 基线类型:个人主要基线\n\n**群体基线**:66.76 ms\n\n## 用户背景信息\n\n- **人口统计学**:\n - 年龄:30-35岁女性\n - MEQ得分:49.0(中间型昼夜节律)\n\n- **生活方式**:\n - 运动频率:每周1-2次\n - 咖啡消费:每天1杯\n - 吸烟状况:不吸烟\n - 饮酒状况:不饮酒\n\n## 临床评估\n\n**误报状态**:确认为真实异常(非误报) \n**监测建议**:建议继续监测HRV变化趋势,关注活动水平下降与压力指标的关联性,考虑增加日常活动量以改善整体生理状态。"
|
| 28 |
+
}
|
| 29 |
+
]
|
| 30 |
+
}
|
demo_llm_inputs/manifest.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"case_id": "case_am77_full",
|
| 4 |
+
"title": "case_am77_full:continuous_anomaly (7天)",
|
| 5 |
+
"file": "case_am77_full.json"
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"case_id": "case_ba30_full",
|
| 9 |
+
"title": "case_ba30_full:continuous_anomaly (24天)",
|
| 10 |
+
"file": "case_ba30_full.json"
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"case_id": "case_ej27_full",
|
| 14 |
+
"title": "case_ej27_full:continuous_anomaly (27天)",
|
| 15 |
+
"file": "case_ej27_full.json"
|
| 16 |
+
}
|
| 17 |
+
]
|
feature_calculator.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from typing import Dict, List, Optional, Any
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class FeatureCalculator:
|
| 11 |
+
"""
|
| 12 |
+
统一从配置文件加载特征定义,构建推理/训练需要的窗口结构
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def __init__(
|
| 16 |
+
self,
|
| 17 |
+
config_path: Optional[Path] = None,
|
| 18 |
+
norm_params_path: Optional[Path] = None,
|
| 19 |
+
static_features_path: Optional[Path] = None,
|
| 20 |
+
storage_dir: Optional[Path] = None,
|
| 21 |
+
):
|
| 22 |
+
base_dir = Path(__file__).parent
|
| 23 |
+
self.config_path = Path(config_path or base_dir / "configs" / "features_config.json")
|
| 24 |
+
self.norm_params_path = Path(norm_params_path or base_dir / "processed_data" / "stage3" / "norm_params.json")
|
| 25 |
+
self.static_features_path = Path(static_features_path or base_dir / "processed_data" / "stage2" / "static_features.csv")
|
| 26 |
+
self.storage_dir = Path(storage_dir or base_dir / "data_storage")
|
| 27 |
+
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
self.features_config = self._load_json(self.config_path)
|
| 30 |
+
self.norm_params = self._load_json(self.norm_params_path) if self.norm_params_path.exists() else {}
|
| 31 |
+
self.static_features_dict = self._load_static_features(self.static_features_path)
|
| 32 |
+
|
| 33 |
+
self.time_series_features = [f for f in self.features_config.get("time_series", []) if f.get("enabled", True)]
|
| 34 |
+
self.static_feature_defs = [f for f in self.features_config.get("static", []) if f.get("enabled", True)]
|
| 35 |
+
self.known_future_defs = [f for f in self.features_config.get("known_future", []) if f.get("enabled", True)]
|
| 36 |
+
factor_cfg = self.features_config.get("factor_features", {})
|
| 37 |
+
self.factor_enabled = factor_cfg.get("enabled", False)
|
| 38 |
+
self.factor_names = factor_cfg.get("factor_names", [])
|
| 39 |
+
self.factor_dim = factor_cfg.get("factor_dim", 0)
|
| 40 |
+
|
| 41 |
+
# 简单的内存级历史缓存,便于后续扩展个性化特征
|
| 42 |
+
self.user_histories: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
| 43 |
+
|
| 44 |
+
@staticmethod
|
| 45 |
+
def _load_json(path: Path) -> Dict:
|
| 46 |
+
if not path.exists():
|
| 47 |
+
return {}
|
| 48 |
+
with open(path, "r") as f:
|
| 49 |
+
return json.load(f)
|
| 50 |
+
|
| 51 |
+
@staticmethod
|
| 52 |
+
def _load_static_features(static_file: Path) -> Dict[str, Dict]:
|
| 53 |
+
if not static_file.exists():
|
| 54 |
+
return {}
|
| 55 |
+
df = pd.read_csv(static_file)
|
| 56 |
+
static_dict = {}
|
| 57 |
+
for _, row in df.iterrows():
|
| 58 |
+
device_id = str(row.get("deviceId"))
|
| 59 |
+
if device_id:
|
| 60 |
+
static_dict[device_id] = {
|
| 61 |
+
col: row[col]
|
| 62 |
+
for col in df.columns
|
| 63 |
+
if col != "deviceId"
|
| 64 |
+
}
|
| 65 |
+
return static_dict
|
| 66 |
+
|
| 67 |
+
@staticmethod
|
| 68 |
+
def _to_serializable(value):
|
| 69 |
+
import numpy as np
|
| 70 |
+
from datetime import datetime
|
| 71 |
+
if isinstance(value, (np.integer, )):
|
| 72 |
+
return int(value)
|
| 73 |
+
if isinstance(value, (np.floating, )):
|
| 74 |
+
return float(value)
|
| 75 |
+
if isinstance(value, (pd.Timestamp, datetime)):
|
| 76 |
+
return value.isoformat()
|
| 77 |
+
if isinstance(value, (np.ndarray, )):
|
| 78 |
+
return value.tolist()
|
| 79 |
+
raise TypeError(f"Object of type {type(value)} is not JSON serializable")
|
| 80 |
+
|
| 81 |
+
def register_data_points(self, user_id: str, data_points: List[Dict]):
|
| 82 |
+
"""
|
| 83 |
+
轻量缓存用户数据,并写入 data_storage/users/{user_id}.jsonl
|
| 84 |
+
"""
|
| 85 |
+
if not user_id:
|
| 86 |
+
return
|
| 87 |
+
user_dir = self.storage_dir / "users"
|
| 88 |
+
user_dir.mkdir(exist_ok=True, parents=True)
|
| 89 |
+
history_file = user_dir / f"{user_id}.jsonl"
|
| 90 |
+
|
| 91 |
+
with history_file.open("a", encoding="utf-8") as f:
|
| 92 |
+
for point in data_points:
|
| 93 |
+
serializable = dict(point)
|
| 94 |
+
ts = serializable.get('timestamp')
|
| 95 |
+
if isinstance(ts, (pd.Timestamp, )):
|
| 96 |
+
serializable['timestamp'] = ts.isoformat()
|
| 97 |
+
elif hasattr(ts, "isoformat"):
|
| 98 |
+
serializable['timestamp'] = ts.isoformat()
|
| 99 |
+
f.write(json.dumps(serializable, ensure_ascii=False, default=self._to_serializable) + "\n")
|
| 100 |
+
|
| 101 |
+
self.user_histories[user_id].extend(data_points)
|
| 102 |
+
# 只保留最近 5,000 条在内存,避免占用
|
| 103 |
+
if len(self.user_histories[user_id]) > 5000:
|
| 104 |
+
self.user_histories[user_id] = self.user_histories[user_id][-5000:]
|
| 105 |
+
|
| 106 |
+
def normalize_series(self, values: List[float], feature_name: str, cfg: Dict) -> List[float]:
|
| 107 |
+
arr = np.array(values, dtype=np.float32)
|
| 108 |
+
norm_cfg = cfg.get("normalization", {"type": "none"})
|
| 109 |
+
norm_type = norm_cfg.get("type", "none")
|
| 110 |
+
|
| 111 |
+
if norm_type == "zscore":
|
| 112 |
+
mean, std = self._get_norm_stats(feature_name, norm_cfg)
|
| 113 |
+
if std == 0:
|
| 114 |
+
std = 1.0
|
| 115 |
+
arr = (arr - mean) / std
|
| 116 |
+
elif norm_type == "minmax":
|
| 117 |
+
min_v = norm_cfg.get("min", 0.0)
|
| 118 |
+
max_v = norm_cfg.get("max", 1.0)
|
| 119 |
+
scale = max(max_v - min_v, 1e-6)
|
| 120 |
+
arr = (arr - min_v) / scale
|
| 121 |
+
else:
|
| 122 |
+
# none
|
| 123 |
+
pass
|
| 124 |
+
|
| 125 |
+
arr = np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
|
| 126 |
+
return arr.tolist()
|
| 127 |
+
|
| 128 |
+
@staticmethod
|
| 129 |
+
def _coerce_value(value, feat_cfg):
|
| 130 |
+
default = feat_cfg.get("default", 0.0)
|
| 131 |
+
if value is None or pd.isna(value):
|
| 132 |
+
return default
|
| 133 |
+
category_mapping = feat_cfg.get("category_mapping")
|
| 134 |
+
if isinstance(value, str):
|
| 135 |
+
if category_mapping:
|
| 136 |
+
return category_mapping.get(value, default)
|
| 137 |
+
try:
|
| 138 |
+
return float(value)
|
| 139 |
+
except ValueError:
|
| 140 |
+
return default
|
| 141 |
+
try:
|
| 142 |
+
return float(value)
|
| 143 |
+
except (TypeError, ValueError):
|
| 144 |
+
return default
|
| 145 |
+
|
| 146 |
+
def _get_norm_stats(self, feature_name: str, norm_cfg: Dict) -> (float, float):
|
| 147 |
+
if norm_cfg.get("use_norm_params") and feature_name in self.norm_params:
|
| 148 |
+
stats = self.norm_params[feature_name]
|
| 149 |
+
return stats.get("mean", 0.0), stats.get("std", 1.0)
|
| 150 |
+
return norm_cfg.get("mean", 0.0), norm_cfg.get("std", 1.0)
|
| 151 |
+
|
| 152 |
+
def build_window(self, data_points: List[Dict], user_id: Optional[str] = None) -> Dict:
|
| 153 |
+
if len(data_points) < 12:
|
| 154 |
+
raise ValueError("数据点不足,需要至少12个点构建短期窗口")
|
| 155 |
+
|
| 156 |
+
if user_id:
|
| 157 |
+
self.register_data_points(user_id, data_points)
|
| 158 |
+
|
| 159 |
+
timestamps = []
|
| 160 |
+
input_features = {feat["name"]: [] for feat in self.time_series_features}
|
| 161 |
+
|
| 162 |
+
for point in data_points:
|
| 163 |
+
ts = point.get("timestamp")
|
| 164 |
+
if isinstance(ts, str):
|
| 165 |
+
ts = pd.to_datetime(ts)
|
| 166 |
+
timestamps.append(ts)
|
| 167 |
+
|
| 168 |
+
feature_payload = point.get("features", {})
|
| 169 |
+
for feat_cfg in self.time_series_features:
|
| 170 |
+
name = feat_cfg["name"]
|
| 171 |
+
value = feature_payload.get(name)
|
| 172 |
+
value = self._coerce_value(value, feat_cfg)
|
| 173 |
+
input_features[name].append(value)
|
| 174 |
+
|
| 175 |
+
# delta_t
|
| 176 |
+
delta_t = [0.0]
|
| 177 |
+
for i in range(1, len(timestamps)):
|
| 178 |
+
diff = (timestamps[i] - timestamps[i - 1]).total_seconds()
|
| 179 |
+
delta_t.append(float(diff))
|
| 180 |
+
|
| 181 |
+
# 归一化
|
| 182 |
+
normalized_features = {}
|
| 183 |
+
for feat_cfg in self.time_series_features:
|
| 184 |
+
name = feat_cfg["name"]
|
| 185 |
+
normalized_features[name] = self.normalize_series(input_features[name], name, feat_cfg)
|
| 186 |
+
|
| 187 |
+
static_features = self._build_static_features(data_points[0], user_id)
|
| 188 |
+
factor_features = self._build_factor_features(normalized_features)
|
| 189 |
+
known_future = self._build_known_future(timestamps[-6:] if len(timestamps) >= 6 else timestamps)
|
| 190 |
+
|
| 191 |
+
return {
|
| 192 |
+
"input_timestamp": timestamps[:12],
|
| 193 |
+
"input_delta_t": delta_t[:12],
|
| 194 |
+
"input_features": normalized_features,
|
| 195 |
+
"target_timestamp": timestamps[12:] if len(timestamps) > 12 else [],
|
| 196 |
+
"target_delta_t": delta_t[12:] if len(delta_t) > 12 else [],
|
| 197 |
+
"static_features": static_features,
|
| 198 |
+
"known_future_features": known_future,
|
| 199 |
+
"factor_features": factor_features,
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
def _build_static_features(self, first_point: Dict, user_id: Optional[str]) -> Dict:
|
| 203 |
+
static_payload = dict(first_point.get("static_features", {}))
|
| 204 |
+
device_id = first_point.get("deviceId") or user_id
|
| 205 |
+
|
| 206 |
+
if device_id and str(device_id) in self.static_features_dict:
|
| 207 |
+
for key, value in self.static_features_dict[str(device_id)].items():
|
| 208 |
+
static_payload.setdefault(key, value)
|
| 209 |
+
|
| 210 |
+
result = {}
|
| 211 |
+
for feat_cfg in self.static_feature_defs:
|
| 212 |
+
name = feat_cfg["name"]
|
| 213 |
+
result[name] = static_payload.get(name, feat_cfg.get("default", 0.0))
|
| 214 |
+
return result
|
| 215 |
+
|
| 216 |
+
def _build_factor_features(self, normalized_features: Dict[str, List[float]]) -> Optional[Dict[str, List[float]]]:
|
| 217 |
+
if not self.factor_enabled or not self.factor_names:
|
| 218 |
+
return None
|
| 219 |
+
|
| 220 |
+
factor_vectors = {}
|
| 221 |
+
for factor_name in self.factor_names:
|
| 222 |
+
# 目前采用简单均值/最大值/最小值/最后值,方便后续替换
|
| 223 |
+
merged = []
|
| 224 |
+
for feat_name, values in normalized_features.items():
|
| 225 |
+
if factor_name == "physio" and feat_name.startswith("hrv"):
|
| 226 |
+
merged.extend(values)
|
| 227 |
+
elif factor_name == "activity" and feat_name in {"steps", "distance", "calories"}:
|
| 228 |
+
merged.extend(values)
|
| 229 |
+
elif factor_name == "context" and feat_name in {"time_period_primary", "time_period_secondary", "is_weekend"}:
|
| 230 |
+
merged.extend(values)
|
| 231 |
+
|
| 232 |
+
if not merged:
|
| 233 |
+
factor_vectors[factor_name] = [0.0] * self.factor_dim
|
| 234 |
+
else:
|
| 235 |
+
arr = np.array(merged, dtype=np.float32)
|
| 236 |
+
stats = [
|
| 237 |
+
float(arr.mean()),
|
| 238 |
+
float(arr.std()),
|
| 239 |
+
float(arr.max()),
|
| 240 |
+
float(arr.min())
|
| 241 |
+
]
|
| 242 |
+
factor_vectors[factor_name] = stats[: self.factor_dim] if len(stats) >= self.factor_dim else stats + [0.0] * (self.factor_dim - len(stats))
|
| 243 |
+
return factor_vectors
|
| 244 |
+
|
| 245 |
+
def _build_known_future(self, timestamps: List[pd.Timestamp]) -> Dict[str, List[float]]:
|
| 246 |
+
hours, days, weekends = [], [], []
|
| 247 |
+
for ts in timestamps:
|
| 248 |
+
if pd.isna(ts):
|
| 249 |
+
hours.append(12.0)
|
| 250 |
+
days.append(3.0)
|
| 251 |
+
weekends.append(0.0)
|
| 252 |
+
else:
|
| 253 |
+
hours.append(float(ts.hour))
|
| 254 |
+
days.append(float(ts.weekday()))
|
| 255 |
+
weekends.append(float(1 if ts.weekday() >= 5 else 0))
|
| 256 |
+
|
| 257 |
+
result = {}
|
| 258 |
+
for cfg in self.known_future_defs:
|
| 259 |
+
name = cfg["name"]
|
| 260 |
+
if name == "hour_of_day":
|
| 261 |
+
result[name] = hours
|
| 262 |
+
elif name == "day_of_week":
|
| 263 |
+
result[name] = days
|
| 264 |
+
elif name == "is_weekend":
|
| 265 |
+
result[name] = weekends
|
| 266 |
+
return result
|
| 267 |
+
|
| 268 |
+
def get_enabled_feature_names(self) -> List[str]:
|
| 269 |
+
return [feat["name"] for feat in self.time_series_features]
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
__all__ = ["FeatureCalculator"]
|
| 273 |
+
|
processed_data/stage3/norm_params.json
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hr_mean": {
|
| 3 |
+
"mean": 79.88385009765625,
|
| 4 |
+
"std": 15.546831130981445,
|
| 5 |
+
"min": 33.0,
|
| 6 |
+
"max": 200.2244873046875
|
| 7 |
+
},
|
| 8 |
+
"hr_std": {
|
| 9 |
+
"mean": 12.757049560546875,
|
| 10 |
+
"std": 3.9224278926849365,
|
| 11 |
+
"min": 0.0,
|
| 12 |
+
"max": 32.2431755065918
|
| 13 |
+
},
|
| 14 |
+
"hr_median": {
|
| 15 |
+
"mean": 76.4555892944336,
|
| 16 |
+
"std": 6.908801555633545,
|
| 17 |
+
"min": 48.0,
|
| 18 |
+
"max": 104.0
|
| 19 |
+
},
|
| 20 |
+
"hr_resting": {
|
| 21 |
+
"mean": 65.74867248535156,
|
| 22 |
+
"std": 7.843548774719238,
|
| 23 |
+
"min": 44.12284469604492,
|
| 24 |
+
"max": 86.0
|
| 25 |
+
},
|
| 26 |
+
"hr_nrem": {
|
| 27 |
+
"mean": 61.779720306396484,
|
| 28 |
+
"std": 11.666051864624023,
|
| 29 |
+
"min": 0.0,
|
| 30 |
+
"max": 92.5469970703125
|
| 31 |
+
},
|
| 32 |
+
"hrv_rmssd": {
|
| 33 |
+
"mean": 83.4627685546875,
|
| 34 |
+
"std": 62.30027389526367,
|
| 35 |
+
"min": 0.0,
|
| 36 |
+
"max": 855.8391723632812
|
| 37 |
+
},
|
| 38 |
+
"hrv_sdnn": {
|
| 39 |
+
"mean": 100.59049987792969,
|
| 40 |
+
"std": 43.545467376708984,
|
| 41 |
+
"min": 0.0,
|
| 42 |
+
"max": 393.35162353515625
|
| 43 |
+
},
|
| 44 |
+
"steps": {
|
| 45 |
+
"mean": 342.7657470703125,
|
| 46 |
+
"std": 823.3682861328125,
|
| 47 |
+
"min": 0.0,
|
| 48 |
+
"max": 27004.0
|
| 49 |
+
},
|
| 50 |
+
"distance": {
|
| 51 |
+
"mean": 225.4749755859375,
|
| 52 |
+
"std": 504.8075866699219,
|
| 53 |
+
"min": 0.0,
|
| 54 |
+
"max": 10460.2998046875
|
| 55 |
+
},
|
| 56 |
+
"calories": {
|
| 57 |
+
"mean": 104.05133819580078,
|
| 58 |
+
"std": 211.85128784179688,
|
| 59 |
+
"min": 0.0,
|
| 60 |
+
"max": 2962.070068359375
|
| 61 |
+
},
|
| 62 |
+
"sleep_duration_total": {
|
| 63 |
+
"mean": 418.6901550292969,
|
| 64 |
+
"std": 142.2774200439453,
|
| 65 |
+
"min": 0.0,
|
| 66 |
+
"max": 1110.0
|
| 67 |
+
},
|
| 68 |
+
"sleep_efficiency": {
|
| 69 |
+
"mean": 93.89789581298828,
|
| 70 |
+
"std": 7.327056884765625,
|
| 71 |
+
"min": 34.0,
|
| 72 |
+
"max": 100.0
|
| 73 |
+
},
|
| 74 |
+
"sleep_deep_ratio": {
|
| 75 |
+
"mean": 1.00419020652771,
|
| 76 |
+
"std": 0.3390481770038605,
|
| 77 |
+
"min": 0.0,
|
| 78 |
+
"max": 4.310344696044922
|
| 79 |
+
},
|
| 80 |
+
"sleep_rem_ratio": {
|
| 81 |
+
"mean": 1.00448739528656,
|
| 82 |
+
"std": 0.35869544744491577,
|
| 83 |
+
"min": 0.0,
|
| 84 |
+
"max": 3.9259259700775146
|
| 85 |
+
},
|
| 86 |
+
"sleep_light_ratio": {
|
| 87 |
+
"mean": 0.9923003315925598,
|
| 88 |
+
"std": 0.23265497386455536,
|
| 89 |
+
"min": 0.0,
|
| 90 |
+
"max": 3.034313678741455
|
| 91 |
+
},
|
| 92 |
+
"spo2": {
|
| 93 |
+
"mean": 95.9047622680664,
|
| 94 |
+
"std": 1.04403817653656,
|
| 95 |
+
"min": 92.4000015258789,
|
| 96 |
+
"max": 100.0
|
| 97 |
+
},
|
| 98 |
+
"stress_score": {
|
| 99 |
+
"mean": 65.94886779785156,
|
| 100 |
+
"std": 28.051528930664062,
|
| 101 |
+
"min": 0.0,
|
| 102 |
+
"max": 93.0
|
| 103 |
+
},
|
| 104 |
+
"ALERT": {
|
| 105 |
+
"mean": 0.07375683635473251,
|
| 106 |
+
"std": 0.2613747715950012,
|
| 107 |
+
"min": 0.0,
|
| 108 |
+
"max": 1.0
|
| 109 |
+
},
|
| 110 |
+
"HAPPY": {
|
| 111 |
+
"mean": 0.1726546734571457,
|
| 112 |
+
"std": 0.37794846296310425,
|
| 113 |
+
"min": 0.0,
|
| 114 |
+
"max": 1.0
|
| 115 |
+
},
|
| 116 |
+
"NEUTRAL": {
|
| 117 |
+
"mean": 0.1967589408159256,
|
| 118 |
+
"std": 0.3975485563278198,
|
| 119 |
+
"min": 0.0,
|
| 120 |
+
"max": 1.0
|
| 121 |
+
},
|
| 122 |
+
"RESTED/RELAXED": {
|
| 123 |
+
"mean": 0.23211927711963654,
|
| 124 |
+
"std": 0.42218467593193054,
|
| 125 |
+
"min": 0.0,
|
| 126 |
+
"max": 1.0
|
| 127 |
+
},
|
| 128 |
+
"SAD": {
|
| 129 |
+
"mean": 0.018068943172693253,
|
| 130 |
+
"std": 0.13320080935955048,
|
| 131 |
+
"min": 0.0,
|
| 132 |
+
"max": 1.0
|
| 133 |
+
},
|
| 134 |
+
"TENSE/ANXIOUS": {
|
| 135 |
+
"mean": 0.10590820014476776,
|
| 136 |
+
"std": 0.3077200949192047,
|
| 137 |
+
"min": 0.0,
|
| 138 |
+
"max": 1.0
|
| 139 |
+
},
|
| 140 |
+
"TIRED": {
|
| 141 |
+
"mean": 0.20073312520980835,
|
| 142 |
+
"std": 0.4005488157272339,
|
| 143 |
+
"min": 0.0,
|
| 144 |
+
"max": 1.0
|
| 145 |
+
}
|
| 146 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.1.0
|
| 2 |
+
numpy>=1.24
|
| 3 |
+
pandas>=2.0
|
| 4 |
+
huggingface_hub>=0.23
|
| 5 |
+
scikit-learn>=1.3.0
|
| 6 |
+
requests>=2.31.0
|
run_official_inference.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
run_official_inference.py
|
| 4 |
+
|
| 5 |
+
最小化测试脚本:读取一个窗口 JSON 文件 -> 调用 WearableAnomalyDetector -> 打印模型输出及格式化文本。
|
| 6 |
+
|
| 7 |
+
使用方式:
|
| 8 |
+
python run_official_inference.py \
|
| 9 |
+
--window-file test_data/example_window.json \
|
| 10 |
+
--model-dir checkpoints/phase2/exp_factor_balanced
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import argparse
|
| 16 |
+
import json
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import List, Dict, Any
|
| 19 |
+
|
| 20 |
+
import importlib.util
|
| 21 |
+
|
| 22 |
+
from wearable_anomaly_detector import WearableAnomalyDetector
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def load_formatter():
|
| 26 |
+
formatter_path = Path(__file__).parent / "utils" / "formatter.py"
|
| 27 |
+
spec = importlib.util.spec_from_file_location("formatter", formatter_path)
|
| 28 |
+
module = importlib.util.module_from_spec(spec)
|
| 29 |
+
spec.loader.exec_module(module)
|
| 30 |
+
return module.AnomalyFormatter
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def load_window(path: Path) -> List[Dict[str, Any]]:
|
| 34 |
+
if path.suffix == ".jsonl":
|
| 35 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 36 |
+
data = [json.loads(line) for line in f if line.strip()]
|
| 37 |
+
else:
|
| 38 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 39 |
+
data = json.load(f)
|
| 40 |
+
if isinstance(data, dict):
|
| 41 |
+
data = data.get("records") or data.get("data") or [data]
|
| 42 |
+
if not isinstance(data, list) or not data:
|
| 43 |
+
raise ValueError("窗口文件必须是非空列表")
|
| 44 |
+
if len(data) < 12:
|
| 45 |
+
raise ValueError("窗口数据至少需要 12 条记录")
|
| 46 |
+
return data[-12:]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def build_baseline_info(window: List[Dict[str, Any]]) -> Dict[str, float]:
|
| 50 |
+
# 优先使用输入中的 baseline 字段,否则简单按窗口平均值估算
|
| 51 |
+
for point in window:
|
| 52 |
+
baseline_mean = point["features"].get("baseline_hrv_mean")
|
| 53 |
+
baseline_std = point["features"].get("baseline_hrv_std")
|
| 54 |
+
if baseline_mean is not None and baseline_std is not None:
|
| 55 |
+
current = point["features"].get("hrv_rmssd")
|
| 56 |
+
deviation = 0.0
|
| 57 |
+
if current is not None:
|
| 58 |
+
deviation = (current - baseline_mean) / baseline_mean * 100
|
| 59 |
+
return {
|
| 60 |
+
"baseline_mean": float(baseline_mean),
|
| 61 |
+
"baseline_std": float(baseline_std),
|
| 62 |
+
"current_value": float(current or baseline_mean),
|
| 63 |
+
"deviation_pct": float(deviation),
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
avg_hrv = sum(pt["features"].get("hrv_rmssd", 0.0) for pt in window) / len(window)
|
| 67 |
+
return {
|
| 68 |
+
"baseline_mean": avg_hrv,
|
| 69 |
+
"baseline_std": 5.0,
|
| 70 |
+
"current_value": avg_hrv,
|
| 71 |
+
"deviation_pct": 0.0,
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def main() -> None:
|
| 76 |
+
parser = argparse.ArgumentParser(description="Run wearable anomaly detector on a JSON window file.")
|
| 77 |
+
parser.add_argument(
|
| 78 |
+
"--window-file",
|
| 79 |
+
type=Path,
|
| 80 |
+
default=Path("test_data/example_window.json"),
|
| 81 |
+
help="包含 12 条数据点的 JSON 文件路径",
|
| 82 |
+
)
|
| 83 |
+
parser.add_argument(
|
| 84 |
+
"--model-dir",
|
| 85 |
+
type=Path,
|
| 86 |
+
default=Path("checkpoints/phase2/exp_factor_balanced"),
|
| 87 |
+
help="Phase2 最佳模型所在目录",
|
| 88 |
+
)
|
| 89 |
+
parser.add_argument(
|
| 90 |
+
"--device",
|
| 91 |
+
type=str,
|
| 92 |
+
default=None,
|
| 93 |
+
help="可选:cpu / cuda / cuda:0 等",
|
| 94 |
+
)
|
| 95 |
+
args = parser.parse_args()
|
| 96 |
+
|
| 97 |
+
if not args.window_file.exists():
|
| 98 |
+
raise FileNotFoundError(f"窗口文件不存在:{args.window_file}")
|
| 99 |
+
|
| 100 |
+
window = load_window(args.window_file)
|
| 101 |
+
detector = WearableAnomalyDetector(model_dir=args.model_dir, device=args.device)
|
| 102 |
+
result = detector.detect_realtime(window, update_baseline=False, return_details=True)
|
| 103 |
+
|
| 104 |
+
print("\n=== 模型输出(JSON)===")
|
| 105 |
+
print(json.dumps(result, ensure_ascii=False, indent=2))
|
| 106 |
+
|
| 107 |
+
formatter_cls = load_formatter()
|
| 108 |
+
formatter = formatter_cls()
|
| 109 |
+
baseline_info = build_baseline_info(window)
|
| 110 |
+
formatted = formatter.format_for_llm(
|
| 111 |
+
anomaly_result=result,
|
| 112 |
+
baseline_info=baseline_info,
|
| 113 |
+
daily_results=None,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
print("\n=== LLM 文本 ===")
|
| 117 |
+
print(formatted)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
main()
|
| 122 |
+
|
test_data/example_window.json
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"timestamp": "2025-01-15T08:00:00",
|
| 4 |
+
"deviceId": "sample_user",
|
| 5 |
+
"features": {
|
| 6 |
+
"hr": 68.5,
|
| 7 |
+
"hr_resting": 64.0,
|
| 8 |
+
"hrv_rmssd": 78.5,
|
| 9 |
+
"hrv_sdnn": 94.2,
|
| 10 |
+
"time_period_primary": "morning",
|
| 11 |
+
"time_period_secondary": "weekday",
|
| 12 |
+
"is_weekend": 0,
|
| 13 |
+
"data_quality": "high",
|
| 14 |
+
"baseline_hrv_mean": 76.0,
|
| 15 |
+
"baseline_hrv_std": 5.0
|
| 16 |
+
},
|
| 17 |
+
"static_features": {
|
| 18 |
+
"age_group": 2,
|
| 19 |
+
"sex": 0,
|
| 20 |
+
"exercise": 1,
|
| 21 |
+
"coffee": 1,
|
| 22 |
+
"drinking": 0,
|
| 23 |
+
"MEQ": 52.0
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"timestamp": "2025-01-15T08:05:00",
|
| 28 |
+
"deviceId": "sample_user",
|
| 29 |
+
"features": {
|
| 30 |
+
"hr": 69.0,
|
| 31 |
+
"hr_resting": 64.0,
|
| 32 |
+
"hrv_rmssd": 77.3,
|
| 33 |
+
"hrv_sdnn": 92.7,
|
| 34 |
+
"time_period_primary": "morning",
|
| 35 |
+
"time_period_secondary": "weekday",
|
| 36 |
+
"is_weekend": 0,
|
| 37 |
+
"data_quality": "high",
|
| 38 |
+
"baseline_hrv_mean": 76.0,
|
| 39 |
+
"baseline_hrv_std": 5.0
|
| 40 |
+
},
|
| 41 |
+
"static_features": {
|
| 42 |
+
"age_group": 2,
|
| 43 |
+
"sex": 0,
|
| 44 |
+
"exercise": 1,
|
| 45 |
+
"coffee": 1,
|
| 46 |
+
"drinking": 0,
|
| 47 |
+
"MEQ": 52.0
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"timestamp": "2025-01-15T08:10:00",
|
| 52 |
+
"deviceId": "sample_user",
|
| 53 |
+
"features": {
|
| 54 |
+
"hr": 69.4,
|
| 55 |
+
"hr_resting": 64.0,
|
| 56 |
+
"hrv_rmssd": 76.1,
|
| 57 |
+
"hrv_sdnn": 91.3,
|
| 58 |
+
"time_period_primary": "morning",
|
| 59 |
+
"time_period_secondary": "weekday",
|
| 60 |
+
"is_weekend": 0,
|
| 61 |
+
"data_quality": "high",
|
| 62 |
+
"baseline_hrv_mean": 76.0,
|
| 63 |
+
"baseline_hrv_std": 5.0
|
| 64 |
+
},
|
| 65 |
+
"static_features": {
|
| 66 |
+
"age_group": 2,
|
| 67 |
+
"sex": 0,
|
| 68 |
+
"exercise": 1,
|
| 69 |
+
"coffee": 1,
|
| 70 |
+
"drinking": 0,
|
| 71 |
+
"MEQ": 52.0
|
| 72 |
+
}
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"timestamp": "2025-01-15T08:15:00",
|
| 76 |
+
"deviceId": "sample_user",
|
| 77 |
+
"features": {
|
| 78 |
+
"hr": 69.8,
|
| 79 |
+
"hr_resting": 64.0,
|
| 80 |
+
"hrv_rmssd": 74.2,
|
| 81 |
+
"hrv_sdnn": 89.0,
|
| 82 |
+
"time_period_primary": "morning",
|
| 83 |
+
"time_period_secondary": "weekday",
|
| 84 |
+
"is_weekend": 0,
|
| 85 |
+
"data_quality": "high",
|
| 86 |
+
"baseline_hrv_mean": 76.0,
|
| 87 |
+
"baseline_hrv_std": 5.0
|
| 88 |
+
},
|
| 89 |
+
"static_features": {
|
| 90 |
+
"age_group": 2,
|
| 91 |
+
"sex": 0,
|
| 92 |
+
"exercise": 1,
|
| 93 |
+
"coffee": 1,
|
| 94 |
+
"drinking": 0,
|
| 95 |
+
"MEQ": 52.0
|
| 96 |
+
}
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"timestamp": "2025-01-15T08:20:00",
|
| 100 |
+
"deviceId": "sample_user",
|
| 101 |
+
"features": {
|
| 102 |
+
"hr": 70.2,
|
| 103 |
+
"hr_resting": 64.0,
|
| 104 |
+
"hrv_rmssd": 73.8,
|
| 105 |
+
"hrv_sdnn": 88.6,
|
| 106 |
+
"time_period_primary": "morning",
|
| 107 |
+
"time_period_secondary": "weekday",
|
| 108 |
+
"is_weekend": 0,
|
| 109 |
+
"data_quality": "high",
|
| 110 |
+
"baseline_hrv_mean": 76.0,
|
| 111 |
+
"baseline_hrv_std": 5.0
|
| 112 |
+
},
|
| 113 |
+
"static_features": {
|
| 114 |
+
"age_group": 2,
|
| 115 |
+
"sex": 0,
|
| 116 |
+
"exercise": 1,
|
| 117 |
+
"coffee": 1,
|
| 118 |
+
"drinking": 0,
|
| 119 |
+
"MEQ": 52.0
|
| 120 |
+
}
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"timestamp": "2025-01-15T08:25:00",
|
| 124 |
+
"deviceId": "sample_user",
|
| 125 |
+
"features": {
|
| 126 |
+
"hr": 70.7,
|
| 127 |
+
"hr_resting": 64.0,
|
| 128 |
+
"hrv_rmssd": 72.1,
|
| 129 |
+
"hrv_sdnn": 86.5,
|
| 130 |
+
"time_period_primary": "morning",
|
| 131 |
+
"time_period_secondary": "weekday",
|
| 132 |
+
"is_weekend": 0,
|
| 133 |
+
"data_quality": "high",
|
| 134 |
+
"baseline_hrv_mean": 76.0,
|
| 135 |
+
"baseline_hrv_std": 5.0
|
| 136 |
+
},
|
| 137 |
+
"static_features": {
|
| 138 |
+
"age_group": 2,
|
| 139 |
+
"sex": 0,
|
| 140 |
+
"exercise": 1,
|
| 141 |
+
"coffee": 1,
|
| 142 |
+
"drinking": 0,
|
| 143 |
+
"MEQ": 52.0
|
| 144 |
+
}
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"timestamp": "2025-01-15T08:30:00",
|
| 148 |
+
"deviceId": "sample_user",
|
| 149 |
+
"features": {
|
| 150 |
+
"hr": 71.1,
|
| 151 |
+
"hr_resting": 64.0,
|
| 152 |
+
"hrv_rmssd": 71.8,
|
| 153 |
+
"hrv_sdnn": 86.1,
|
| 154 |
+
"time_period_primary": "morning",
|
| 155 |
+
"time_period_secondary": "weekday",
|
| 156 |
+
"is_weekend": 0,
|
| 157 |
+
"data_quality": "high",
|
| 158 |
+
"baseline_hrv_mean": 76.0,
|
| 159 |
+
"baseline_hrv_std": 5.0
|
| 160 |
+
},
|
| 161 |
+
"static_features": {
|
| 162 |
+
"age_group": 2,
|
| 163 |
+
"sex": 0,
|
| 164 |
+
"exercise": 1,
|
| 165 |
+
"coffee": 1,
|
| 166 |
+
"drinking": 0,
|
| 167 |
+
"MEQ": 52.0
|
| 168 |
+
}
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"timestamp": "2025-01-15T08:35:00",
|
| 172 |
+
"deviceId": "sample_user",
|
| 173 |
+
"features": {
|
| 174 |
+
"hr": 71.6,
|
| 175 |
+
"hr_resting": 64.0,
|
| 176 |
+
"hrv_rmssd": 70.5,
|
| 177 |
+
"hrv_sdnn": 84.6,
|
| 178 |
+
"time_period_primary": "morning",
|
| 179 |
+
"time_period_secondary": "weekday",
|
| 180 |
+
"is_weekend": 0,
|
| 181 |
+
"data_quality": "high",
|
| 182 |
+
"baseline_hrv_mean": 76.0,
|
| 183 |
+
"baseline_hrv_std": 5.0
|
| 184 |
+
},
|
| 185 |
+
"static_features": {
|
| 186 |
+
"age_group": 2,
|
| 187 |
+
"sex": 0,
|
| 188 |
+
"exercise": 1,
|
| 189 |
+
"coffee": 1,
|
| 190 |
+
"drinking": 0,
|
| 191 |
+
"MEQ": 52.0
|
| 192 |
+
}
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"timestamp": "2025-01-15T08:40:00",
|
| 196 |
+
"deviceId": "sample_user",
|
| 197 |
+
"features": {
|
| 198 |
+
"hr": 72.0,
|
| 199 |
+
"hr_resting": 64.0,
|
| 200 |
+
"hrv_rmssd": 69.4,
|
| 201 |
+
"hrv_sdnn": 83.3,
|
| 202 |
+
"time_period_primary": "morning",
|
| 203 |
+
"time_period_secondary": "weekday",
|
| 204 |
+
"is_weekend": 0,
|
| 205 |
+
"data_quality": "medium",
|
| 206 |
+
"baseline_hrv_mean": 76.0,
|
| 207 |
+
"baseline_hrv_std": 5.0
|
| 208 |
+
},
|
| 209 |
+
"static_features": {
|
| 210 |
+
"age_group": 2,
|
| 211 |
+
"sex": 0,
|
| 212 |
+
"exercise": 1,
|
| 213 |
+
"coffee": 1,
|
| 214 |
+
"drinking": 0,
|
| 215 |
+
"MEQ": 52.0
|
| 216 |
+
}
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"timestamp": "2025-01-15T08:45:00",
|
| 220 |
+
"deviceId": "sample_user",
|
| 221 |
+
"features": {
|
| 222 |
+
"hr": 72.5,
|
| 223 |
+
"hr_resting": 64.0,
|
| 224 |
+
"hrv_rmssd": 68.7,
|
| 225 |
+
"hrv_sdnn": 82.4,
|
| 226 |
+
"time_period_primary": "morning",
|
| 227 |
+
"time_period_secondary": "weekday",
|
| 228 |
+
"is_weekend": 0,
|
| 229 |
+
"data_quality": "medium",
|
| 230 |
+
"baseline_hrv_mean": 76.0,
|
| 231 |
+
"baseline_hrv_std": 5.0
|
| 232 |
+
},
|
| 233 |
+
"static_features": {
|
| 234 |
+
"age_group": 2,
|
| 235 |
+
"sex": 0,
|
| 236 |
+
"exercise": 1,
|
| 237 |
+
"coffee": 1,
|
| 238 |
+
"drinking": 0,
|
| 239 |
+
"MEQ": 52.0
|
| 240 |
+
}
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"timestamp": "2025-01-15T08:50:00",
|
| 244 |
+
"deviceId": "sample_user",
|
| 245 |
+
"features": {
|
| 246 |
+
"hr": 72.9,
|
| 247 |
+
"hr_resting": 64.0,
|
| 248 |
+
"hrv_rmssd": 68.1,
|
| 249 |
+
"hrv_sdnn": 81.7,
|
| 250 |
+
"time_period_primary": "morning",
|
| 251 |
+
"time_period_secondary": "weekday",
|
| 252 |
+
"is_weekend": 0,
|
| 253 |
+
"data_quality": "medium",
|
| 254 |
+
"baseline_hrv_mean": 76.0,
|
| 255 |
+
"baseline_hrv_std": 5.0
|
| 256 |
+
},
|
| 257 |
+
"static_features": {
|
| 258 |
+
"age_group": 2,
|
| 259 |
+
"sex": 0,
|
| 260 |
+
"exercise": 1,
|
| 261 |
+
"coffee": 1,
|
| 262 |
+
"drinking": 0,
|
| 263 |
+
"MEQ": 52.0
|
| 264 |
+
}
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"timestamp": "2025-01-15T08:55:00",
|
| 268 |
+
"deviceId": "sample_user",
|
| 269 |
+
"features": {
|
| 270 |
+
"hr": 73.4,
|
| 271 |
+
"hr_resting": 64.0,
|
| 272 |
+
"hrv_rmssd": 67.5,
|
| 273 |
+
"hrv_sdnn": 81.0,
|
| 274 |
+
"time_period_primary": "morning",
|
| 275 |
+
"time_period_secondary": "weekday",
|
| 276 |
+
"is_weekend": 0,
|
| 277 |
+
"data_quality": "medium",
|
| 278 |
+
"baseline_hrv_mean": 76.0,
|
| 279 |
+
"baseline_hrv_std": 5.0
|
| 280 |
+
},
|
| 281 |
+
"static_features": {
|
| 282 |
+
"age_group": 2,
|
| 283 |
+
"sex": 0,
|
| 284 |
+
"exercise": 1,
|
| 285 |
+
"coffee": 1,
|
| 286 |
+
"drinking": 0,
|
| 287 |
+
"MEQ": 52.0
|
| 288 |
+
}
|
| 289 |
+
}
|
| 290 |
+
]
|
| 291 |
+
|
test_quickstart.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
test_quickstart.py
|
| 4 |
+
|
| 5 |
+
功能:
|
| 6 |
+
1. 构造 1 小时窗口,演示实时异常检测(正常 / 异常两种场景)
|
| 7 |
+
2. 构造 7 天数据,演示异常模式聚合
|
| 8 |
+
3. 输出格式化的LLM文案,方便直接接入大模型
|
| 9 |
+
|
| 10 |
+
运行方式:
|
| 11 |
+
python test_quickstart.py
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import sys
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from datetime import datetime, timedelta
|
| 19 |
+
import json
|
| 20 |
+
import numpy as np
|
| 21 |
+
import random
|
| 22 |
+
import random
|
| 23 |
+
|
| 24 |
+
ROOT_DIR = Path(__file__).parent.resolve()
|
| 25 |
+
sys.path.insert(0, str(ROOT_DIR))
|
| 26 |
+
|
| 27 |
+
from wearable_anomaly_detector import WearableAnomalyDetector
|
| 28 |
+
import importlib.util
|
| 29 |
+
|
| 30 |
+
# 动态导入 utils.formatter,避免相对路径问题
|
| 31 |
+
formatter_spec = importlib.util.spec_from_file_location(
|
| 32 |
+
"formatter", ROOT_DIR / "utils" / "formatter.py"
|
| 33 |
+
)
|
| 34 |
+
formatter_module = importlib.util.module_from_spec(formatter_spec)
|
| 35 |
+
formatter_spec.loader.exec_module(formatter_module)
|
| 36 |
+
AnomalyFormatter = formatter_module.AnomalyFormatter
|
| 37 |
+
FORMATTER = AnomalyFormatter()
|
| 38 |
+
TEST_WINDOW_FILE = ROOT_DIR / "test_data" / "example_window.json"
|
| 39 |
+
|
| 40 |
+
WINDOW_SIZE = 12 # 12 * 5 分钟 = 1 小时
|
| 41 |
+
INTERVAL_MINUTES = 5
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def make_point(ts: datetime, device_id: str, hrv: float, hr: float, include_static: bool = True) -> dict:
|
| 45 |
+
"""构造单个数据点"""
|
| 46 |
+
return {
|
| 47 |
+
"timestamp": ts.isoformat(),
|
| 48 |
+
"deviceId": device_id,
|
| 49 |
+
"features": {
|
| 50 |
+
"hr": float(hr),
|
| 51 |
+
"hr_resting": 65.0,
|
| 52 |
+
"hrv_rmssd": float(hrv),
|
| 53 |
+
"hrv_sdnn": float(hrv * 1.2),
|
| 54 |
+
"time_period_primary": "day",
|
| 55 |
+
"time_period_secondary": "workday",
|
| 56 |
+
"is_weekend": 0.0,
|
| 57 |
+
"data_quality": "high",
|
| 58 |
+
"baseline_hrv_mean": 75.0,
|
| 59 |
+
"baseline_hrv_std": 5.0,
|
| 60 |
+
},
|
| 61 |
+
"static_features": {
|
| 62 |
+
"age_group": 2,
|
| 63 |
+
"sex": 0,
|
| 64 |
+
"exercise": 1,
|
| 65 |
+
"coffee": 1,
|
| 66 |
+
"drinking": 0,
|
| 67 |
+
"MEQ": 50.0,
|
| 68 |
+
} if include_static else {},
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def generate_window(
|
| 73 |
+
device_id: str,
|
| 74 |
+
start: datetime,
|
| 75 |
+
base_hrv: float,
|
| 76 |
+
base_hr: float,
|
| 77 |
+
anomaly_level: float = 0.0,
|
| 78 |
+
include_static: bool = True,
|
| 79 |
+
missing_ratio: float = 0.0,
|
| 80 |
+
) -> list:
|
| 81 |
+
"""生成 1 小时窗口数据"""
|
| 82 |
+
data = []
|
| 83 |
+
base_hrv_for_day = max(30, base_hrv - 18 * anomaly_level)
|
| 84 |
+
base_hr_for_day = min(125, base_hr + 10 * anomaly_level)
|
| 85 |
+
for i in range(WINDOW_SIZE):
|
| 86 |
+
noise_hrv = np.random.normal(0, 3)
|
| 87 |
+
noise_hr = np.random.normal(0, 1.5)
|
| 88 |
+
decline = -15 * anomaly_level * (i / WINDOW_SIZE)
|
| 89 |
+
increase = 8 * anomaly_level * (i / WINDOW_SIZE)
|
| 90 |
+
hrv = max(25, base_hrv_for_day + noise_hrv + decline)
|
| 91 |
+
hr = min(125, base_hr_for_day + noise_hr + increase)
|
| 92 |
+
ts = start + timedelta(minutes=INTERVAL_MINUTES * i)
|
| 93 |
+
point = make_point(ts, device_id, hrv, hr, include_static=include_static)
|
| 94 |
+
|
| 95 |
+
if missing_ratio > 0 and random.random() < missing_ratio:
|
| 96 |
+
point["features"].pop("hr_resting", None)
|
| 97 |
+
point["features"].pop("baseline_hrv_mean", None)
|
| 98 |
+
point["features"].pop("baseline_hrv_std", None)
|
| 99 |
+
if random.random() < 0.5:
|
| 100 |
+
point["static_features"] = {}
|
| 101 |
+
|
| 102 |
+
data.append(point)
|
| 103 |
+
return data
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def load_window_from_file(path: Path) -> list | None:
|
| 107 |
+
try:
|
| 108 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 109 |
+
data = json.load(f)
|
| 110 |
+
assert isinstance(data, list) and data, "JSON needs to be a non-empty list"
|
| 111 |
+
return data
|
| 112 |
+
except Exception as exc:
|
| 113 |
+
print(f" ⚠️ 读取 {path.name} 失败: {exc}")
|
| 114 |
+
return None
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def demo_from_file(detector: WearableAnomalyDetector) -> None:
|
| 118 |
+
print("\n" + "=" * 80)
|
| 119 |
+
print("示例文件推理(test_data/example_window.json)")
|
| 120 |
+
print("=" * 80)
|
| 121 |
+
|
| 122 |
+
if not TEST_WINDOW_FILE.exists():
|
| 123 |
+
print(f" ⚠️ 未找到 {TEST_WINDOW_FILE}, 请确认仓库中存在该文件")
|
| 124 |
+
return
|
| 125 |
+
|
| 126 |
+
window = load_window_from_file(TEST_WINDOW_FILE)
|
| 127 |
+
if not window:
|
| 128 |
+
return
|
| 129 |
+
|
| 130 |
+
avg_hrv = np.nanmean([pt["features"]["hrv_rmssd"] for pt in window])
|
| 131 |
+
avg_hr = np.nanmean([pt["features"]["hr"] for pt in window])
|
| 132 |
+
print(f" - 数据点数: {len(window)}")
|
| 133 |
+
print(f" - 平均 HRV: {avg_hrv:.2f} ms, 平均心率: {avg_hr:.1f} bpm")
|
| 134 |
+
|
| 135 |
+
result = detector.detect_realtime(window, update_baseline=False)
|
| 136 |
+
print(
|
| 137 |
+
f" -> 是否异常: {'是 ⚠️' if result.get('is_anomaly') else '否'} | "
|
| 138 |
+
f"分数: {result.get('anomaly_score', 0):.4f} | 阈值: {result.get('threshold', 0):.4f}"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
baseline_info = {
|
| 142 |
+
"baseline_mean": 76.0,
|
| 143 |
+
"baseline_std": 5.0,
|
| 144 |
+
"current_value": avg_hrv,
|
| 145 |
+
"deviation_pct": (avg_hrv - 76.0) / 76.0 * 100,
|
| 146 |
+
}
|
| 147 |
+
llm_text = FORMATTER.format_for_llm(result, baseline_info=baseline_info)
|
| 148 |
+
print("\n LLM 文本片段(前 350 字符):")
|
| 149 |
+
print("-" * 60)
|
| 150 |
+
print(llm_text[:350])
|
| 151 |
+
print("...")
|
| 152 |
+
print("-" * 60)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def demo_realtime(detector: WearableAnomalyDetector) -> None:
|
| 156 |
+
print("\n" + "=" * 80)
|
| 157 |
+
print("实时检测示例")
|
| 158 |
+
print("=" * 80)
|
| 159 |
+
|
| 160 |
+
start = datetime.now() - timedelta(hours=1)
|
| 161 |
+
normal_window = generate_window("demo_normal", start, base_hrv=76, base_hr=68, anomaly_level=0.0)
|
| 162 |
+
anomaly_window = generate_window("demo_anomaly", start, base_hrv=74, base_hr=70, anomaly_level=0.7)
|
| 163 |
+
|
| 164 |
+
for title, window in [("正常窗口", normal_window), ("异常窗口", anomaly_window)]:
|
| 165 |
+
avg_hrv = np.mean([pt["features"]["hrv_rmssd"] for pt in window])
|
| 166 |
+
avg_hr = np.mean([pt["features"]["hr"] for pt in window])
|
| 167 |
+
print(f"\n[{title}] HRV≈{avg_hrv:.2f} ms, HR≈{avg_hr:.1f} bpm")
|
| 168 |
+
result = detector.detect_realtime(window, update_baseline=False)
|
| 169 |
+
print(
|
| 170 |
+
f" -> 是否异常: {'是 ⚠️' if result.get('is_anomaly') else '否'} | "
|
| 171 |
+
f"分数: {result.get('anomaly_score', 0):.4f} | 阈值: {result.get('threshold', 0):.4f}"
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def demo_pattern(detector: WearableAnomalyDetector) -> None:
|
| 176 |
+
print("\n" + "=" * 80)
|
| 177 |
+
print("7 天异常模式聚合示例")
|
| 178 |
+
print("=" * 80)
|
| 179 |
+
|
| 180 |
+
base_date = datetime.now() - timedelta(days=7)
|
| 181 |
+
daily_data = []
|
| 182 |
+
anomaly_plan = [0.0, 0.1, 0.3, 1.0, 1.4, 1.8, 1.8]
|
| 183 |
+
avg_hrv_per_day = []
|
| 184 |
+
|
| 185 |
+
for day, anomaly_level in enumerate(anomaly_plan):
|
| 186 |
+
day_start = base_date + timedelta(days=day)
|
| 187 |
+
window = generate_window(
|
| 188 |
+
device_id="demo_pattern",
|
| 189 |
+
start=day_start.replace(hour=8, minute=0, second=0, microsecond=0),
|
| 190 |
+
base_hrv=75,
|
| 191 |
+
base_hr=69,
|
| 192 |
+
anomaly_level=anomaly_level,
|
| 193 |
+
)
|
| 194 |
+
daily_data.append(window)
|
| 195 |
+
avg_hrv_per_day.append(np.mean([pt["features"]["hrv_rmssd"] for pt in window]))
|
| 196 |
+
|
| 197 |
+
print(" 日均HRV轨迹: " + ", ".join(f"{val:.1f}" for val in avg_hrv_per_day))
|
| 198 |
+
|
| 199 |
+
result = detector.detect_pattern(
|
| 200 |
+
daily_data,
|
| 201 |
+
days=len(daily_data),
|
| 202 |
+
min_duration_days=2,
|
| 203 |
+
format_for_llm=True
|
| 204 |
+
)
|
| 205 |
+
pattern = result.get("anomaly_pattern", {})
|
| 206 |
+
print(
|
| 207 |
+
f" -> 是否有模式: {'是' if pattern.get('has_pattern') else '否'} | "
|
| 208 |
+
f"持续天数: {pattern.get('duration_days', 0)} | 趋势: {pattern.get('trend', '未知')}"
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
if "formatted_for_llm" in result:
|
| 212 |
+
print("\n格式化输出(前 400 字符):")
|
| 213 |
+
print("-" * 60)
|
| 214 |
+
print(result["formatted_for_llm"][:400])
|
| 215 |
+
print("...")
|
| 216 |
+
print("-" * 60)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def demo_missing_data(detector: WearableAnomalyDetector) -> None:
|
| 220 |
+
print("\n" + "=" * 80)
|
| 221 |
+
print("数据缺失 / 质量下降示例")
|
| 222 |
+
print("=" * 80)
|
| 223 |
+
|
| 224 |
+
start = datetime.now() - timedelta(hours=1)
|
| 225 |
+
incomplete_window = generate_window(
|
| 226 |
+
device_id="demo_missing",
|
| 227 |
+
start=start,
|
| 228 |
+
base_hrv=74,
|
| 229 |
+
base_hr=71,
|
| 230 |
+
anomaly_level=0.5,
|
| 231 |
+
include_static=True,
|
| 232 |
+
missing_ratio=0.4,
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
# 模拟传感器丢包:移除 2 个时间点 & 降低数据质量
|
| 236 |
+
for idx in (3, 7):
|
| 237 |
+
incomplete_window[idx]["features"]["data_quality"] = "low"
|
| 238 |
+
incomplete_window[idx]["features"]["hr"] = float("nan")
|
| 239 |
+
|
| 240 |
+
avg_hrv = np.nanmean([pt["features"].get("hrv_rmssd", np.nan) for pt in incomplete_window])
|
| 241 |
+
available_static = sum(bool(pt["static_features"]) for pt in incomplete_window)
|
| 242 |
+
print(f" - 有效静态特征点数: {available_static}/{len(incomplete_window)}")
|
| 243 |
+
print(f" - 平均 HRV(忽略缺失): {avg_hrv:.2f} ms")
|
| 244 |
+
|
| 245 |
+
result = detector.detect_realtime(incomplete_window, update_baseline=False)
|
| 246 |
+
print(
|
| 247 |
+
f" -> 是否异常: {'是' if result.get('is_anomaly') else '否'} | "
|
| 248 |
+
f"分数: {result.get('anomaly_score', 0):.4f} | 阈值: {result.get('threshold', 0):.4f}"
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def main() -> None:
|
| 253 |
+
model_dir = ROOT_DIR / "checkpoints" / "phase2" / "exp_factor_balanced"
|
| 254 |
+
detector = WearableAnomalyDetector(model_dir=model_dir, device="cpu")
|
| 255 |
+
detector.update_threshold(0.50)
|
| 256 |
+
demo_from_file(detector)
|
| 257 |
+
demo_realtime(detector)
|
| 258 |
+
demo_pattern(detector)
|
| 259 |
+
demo_missing_data(detector)
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
if __name__ == "__main__":
|
| 263 |
+
main()
|
| 264 |
+
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
工具模块
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .baseline_storage import BaselineStorage
|
| 6 |
+
from .api_client import HistoricalDataPlatformClient
|
| 7 |
+
from .formatter import AnomalyFormatter
|
| 8 |
+
|
| 9 |
+
__all__ = ['BaselineStorage', 'HistoricalDataPlatformClient', 'AnomalyFormatter']
|
| 10 |
+
|
utils/__pycache__/formatter.cpython-313.pyc
ADDED
|
Binary file (11.8 kB). View file
|
|
|
utils/api_client.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
历史数据平台API客户端
|
| 3 |
+
最小化实现,只包含必要的功能
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import requests
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Dict, List, Optional
|
| 10 |
+
from datetime import datetime, timedelta
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class HistoricalDataPlatformClient:
|
| 14 |
+
"""
|
| 15 |
+
历史数据平台API客户端
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(
|
| 19 |
+
self,
|
| 20 |
+
base_url: str = "",
|
| 21 |
+
api_key: Optional[str] = None,
|
| 22 |
+
timeout: int = 30,
|
| 23 |
+
retry_times: int = 3
|
| 24 |
+
):
|
| 25 |
+
"""
|
| 26 |
+
初始化API客户端
|
| 27 |
+
|
| 28 |
+
参数:
|
| 29 |
+
base_url: API基础URL
|
| 30 |
+
api_key: API密钥(可选)
|
| 31 |
+
timeout: 超时时间(秒)
|
| 32 |
+
retry_times: 重试次数
|
| 33 |
+
"""
|
| 34 |
+
self.base_url = base_url.rstrip('/')
|
| 35 |
+
self.api_key = api_key
|
| 36 |
+
self.timeout = timeout
|
| 37 |
+
self.retry_times = retry_times
|
| 38 |
+
|
| 39 |
+
# 从配置文件加载(如果base_url为空)
|
| 40 |
+
if not self.base_url:
|
| 41 |
+
self._load_config()
|
| 42 |
+
|
| 43 |
+
def _load_config(self):
|
| 44 |
+
"""从配置文件加载API配置"""
|
| 45 |
+
try:
|
| 46 |
+
config_path = Path(__file__).parent.parent / "configs" / "api_config.json"
|
| 47 |
+
if config_path.exists():
|
| 48 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 49 |
+
config = json.load(f)
|
| 50 |
+
api_config = config.get('historical_data_platform', {})
|
| 51 |
+
self.base_url = api_config.get('base_url', '')
|
| 52 |
+
self.api_key = api_config.get('api_key') or self.api_key
|
| 53 |
+
self.timeout = api_config.get('timeout', 30)
|
| 54 |
+
self.retry_times = api_config.get('retry_times', 3)
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"⚠️ 加载API配置失败: {e}")
|
| 57 |
+
|
| 58 |
+
def _request(
|
| 59 |
+
self,
|
| 60 |
+
method: str,
|
| 61 |
+
endpoint: str,
|
| 62 |
+
params: Optional[Dict] = None,
|
| 63 |
+
data: Optional[Dict] = None
|
| 64 |
+
) -> Optional[Dict]:
|
| 65 |
+
"""发送HTTP请求"""
|
| 66 |
+
if not self.base_url:
|
| 67 |
+
print("⚠️ API base_url未配置")
|
| 68 |
+
return None
|
| 69 |
+
|
| 70 |
+
url = f"{self.base_url}{endpoint}"
|
| 71 |
+
headers = {'Content-Type': 'application/json'}
|
| 72 |
+
if self.api_key:
|
| 73 |
+
headers['Authorization'] = f'Bearer {self.api_key}'
|
| 74 |
+
|
| 75 |
+
for attempt in range(self.retry_times):
|
| 76 |
+
try:
|
| 77 |
+
if method.upper() == 'GET':
|
| 78 |
+
response = requests.get(url, params=params, headers=headers, timeout=self.timeout)
|
| 79 |
+
else:
|
| 80 |
+
response = requests.post(url, json=data, params=params, headers=headers, timeout=self.timeout)
|
| 81 |
+
|
| 82 |
+
response.raise_for_status()
|
| 83 |
+
return response.json()
|
| 84 |
+
except requests.exceptions.RequestException as e:
|
| 85 |
+
if attempt == self.retry_times - 1:
|
| 86 |
+
print(f"⚠️ API请求失败: {e}")
|
| 87 |
+
return None
|
| 88 |
+
continue
|
| 89 |
+
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
+
def get_raw_data(
|
| 93 |
+
self,
|
| 94 |
+
device_id: str,
|
| 95 |
+
days: int = 7,
|
| 96 |
+
start_date: Optional[str] = None,
|
| 97 |
+
end_date: Optional[str] = None
|
| 98 |
+
) -> Optional[Dict]:
|
| 99 |
+
"""
|
| 100 |
+
获取用户原始数据
|
| 101 |
+
|
| 102 |
+
参数:
|
| 103 |
+
device_id: 用户ID
|
| 104 |
+
days: 过去N天(如果start_date和end_date未提供)
|
| 105 |
+
start_date: 开始日期(YYYY-MM-DD格式)
|
| 106 |
+
end_date: 结束日期(YYYY-MM-DD格式)
|
| 107 |
+
|
| 108 |
+
返回:
|
| 109 |
+
{
|
| 110 |
+
"deviceId": "user123",
|
| 111 |
+
"data_points": [...],
|
| 112 |
+
"total_count": 100
|
| 113 |
+
}
|
| 114 |
+
"""
|
| 115 |
+
endpoint = f"/api/raw-data/{device_id}"
|
| 116 |
+
params = {}
|
| 117 |
+
|
| 118 |
+
if start_date and end_date:
|
| 119 |
+
params['start_date'] = start_date
|
| 120 |
+
params['end_date'] = end_date
|
| 121 |
+
else:
|
| 122 |
+
params['days'] = days
|
| 123 |
+
|
| 124 |
+
return self._request('GET', endpoint, params=params)
|
| 125 |
+
|
| 126 |
+
def get_user_profile(self, device_id: str) -> Optional[Dict]:
|
| 127 |
+
"""
|
| 128 |
+
获取用户个性化信息
|
| 129 |
+
|
| 130 |
+
返回:
|
| 131 |
+
{
|
| 132 |
+
"deviceId": "user123",
|
| 133 |
+
"age_group": "30-35岁",
|
| 134 |
+
"sex": "男性",
|
| 135 |
+
...
|
| 136 |
+
}
|
| 137 |
+
"""
|
| 138 |
+
endpoint = f"/api/user-profile/{device_id}"
|
| 139 |
+
return self._request('GET', endpoint)
|
| 140 |
+
|
| 141 |
+
def get_historical_results(
|
| 142 |
+
self,
|
| 143 |
+
device_id: str,
|
| 144 |
+
days: int = 7
|
| 145 |
+
) -> Optional[Dict]:
|
| 146 |
+
"""
|
| 147 |
+
获取历史检测结果
|
| 148 |
+
|
| 149 |
+
返回:
|
| 150 |
+
{
|
| 151 |
+
"deviceId": "user123",
|
| 152 |
+
"daily_results": [...]
|
| 153 |
+
}
|
| 154 |
+
"""
|
| 155 |
+
endpoint = f"/api/historical-results/{device_id}"
|
| 156 |
+
params = {'days': days}
|
| 157 |
+
return self._request('GET', endpoint, params=params)
|
| 158 |
+
|
utils/baseline_storage.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
基线存储模块 - 支持文件存储和增量更新
|
| 3 |
+
最小化改动,复用现有的FeatureCalculator.get_baseline_info()
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import sqlite3
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Dict, Optional, List
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
import pandas as pd
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class BaselineStorage:
|
| 15 |
+
"""
|
| 16 |
+
基线存储管理器
|
| 17 |
+
- 支持文件存储(JSON格式,兼容现有)
|
| 18 |
+
- 支持数据库存储(SQLite,可选)
|
| 19 |
+
- 支持增量更新基线
|
| 20 |
+
- 支持从现有CSV文件导入
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(
|
| 24 |
+
self,
|
| 25 |
+
storage_type: str = "file",
|
| 26 |
+
file_path: Optional[Path] = None,
|
| 27 |
+
database_path: Optional[str] = None,
|
| 28 |
+
import_from_csv: bool = True,
|
| 29 |
+
csv_path: Optional[Path] = None
|
| 30 |
+
):
|
| 31 |
+
"""
|
| 32 |
+
初始化基线存储
|
| 33 |
+
|
| 34 |
+
参数:
|
| 35 |
+
storage_type: 存储类型 ("file" 或 "database")
|
| 36 |
+
file_path: 文件存储路径(JSON格式)
|
| 37 |
+
database_path: 数据库连接字符串(SQLite)
|
| 38 |
+
import_from_csv: 是否从现有CSV文件导入
|
| 39 |
+
csv_path: CSV文件路径(adaptive_baselines.csv)
|
| 40 |
+
"""
|
| 41 |
+
self.storage_type = storage_type
|
| 42 |
+
base_dir = Path(__file__).parent.parent
|
| 43 |
+
|
| 44 |
+
# 文件存储
|
| 45 |
+
if file_path is None:
|
| 46 |
+
file_path = base_dir / "data_storage" / "baselines.json"
|
| 47 |
+
self.file_path = Path(file_path)
|
| 48 |
+
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
| 49 |
+
|
| 50 |
+
# 数据库存储
|
| 51 |
+
if database_path is None:
|
| 52 |
+
database_path = str(base_dir / "data_storage" / "baselines.db")
|
| 53 |
+
self.database_path = database_path
|
| 54 |
+
|
| 55 |
+
# CSV导入路径
|
| 56 |
+
if csv_path is None:
|
| 57 |
+
csv_path = base_dir / "processed_data" / "stage1" / "adaptive_baselines.csv"
|
| 58 |
+
self.csv_path = Path(csv_path)
|
| 59 |
+
|
| 60 |
+
# 初始化存储
|
| 61 |
+
if storage_type == "database":
|
| 62 |
+
self._init_database()
|
| 63 |
+
|
| 64 |
+
# 从CSV导入(如果启用且文件存在)
|
| 65 |
+
if import_from_csv and self.csv_path.exists():
|
| 66 |
+
self._import_from_csv()
|
| 67 |
+
|
| 68 |
+
def _init_database(self):
|
| 69 |
+
"""初始化数据库表"""
|
| 70 |
+
conn = sqlite3.connect(self.database_path)
|
| 71 |
+
cursor = conn.cursor()
|
| 72 |
+
cursor.execute("""
|
| 73 |
+
CREATE TABLE IF NOT EXISTS baselines (
|
| 74 |
+
device_id TEXT,
|
| 75 |
+
feature_name TEXT,
|
| 76 |
+
baseline_type TEXT,
|
| 77 |
+
baseline_mean REAL,
|
| 78 |
+
baseline_std REAL,
|
| 79 |
+
personal_mean REAL,
|
| 80 |
+
personal_std REAL,
|
| 81 |
+
group_mean REAL,
|
| 82 |
+
data_count INTEGER,
|
| 83 |
+
time_period_primary TEXT,
|
| 84 |
+
time_period_secondary TEXT,
|
| 85 |
+
is_weekend INTEGER,
|
| 86 |
+
last_updated TEXT,
|
| 87 |
+
PRIMARY KEY (device_id, feature_name, time_period_primary, time_period_secondary, is_weekend)
|
| 88 |
+
)
|
| 89 |
+
""")
|
| 90 |
+
conn.commit()
|
| 91 |
+
conn.close()
|
| 92 |
+
|
| 93 |
+
def _import_from_csv(self):
|
| 94 |
+
"""从现有CSV文件导入基线数据"""
|
| 95 |
+
try:
|
| 96 |
+
if not self.csv_path.exists():
|
| 97 |
+
return
|
| 98 |
+
|
| 99 |
+
df = pd.read_csv(self.csv_path)
|
| 100 |
+
|
| 101 |
+
# 转换为存储格式
|
| 102 |
+
baselines = []
|
| 103 |
+
for _, row in df.iterrows():
|
| 104 |
+
baseline = {
|
| 105 |
+
'device_id': str(row.get('deviceId', '')),
|
| 106 |
+
'feature_name': 'hrv_rmssd', # 默认特征
|
| 107 |
+
'baseline_type': row.get('baseline_type', 'unknown'),
|
| 108 |
+
'baseline_mean': float(row.get('final_mean', 0.0)),
|
| 109 |
+
'baseline_std': float(row.get('final_std', 1.0)),
|
| 110 |
+
'personal_mean': float(row.get('personal_mean', 0.0)) if pd.notna(row.get('personal_mean')) else None,
|
| 111 |
+
'personal_std': float(row.get('personal_std', 0.0)) if pd.notna(row.get('personal_std')) else None,
|
| 112 |
+
'group_mean': float(row.get('group_mean', 0.0)) if pd.notna(row.get('group_mean')) else None,
|
| 113 |
+
'data_count': int(row.get('personal_record_count', 0)),
|
| 114 |
+
'time_period_primary': row.get('time_period_primary', ''),
|
| 115 |
+
'time_period_secondary': row.get('time_period_secondary', ''),
|
| 116 |
+
'is_weekend': int(row.get('is_weekend', 0)),
|
| 117 |
+
'last_updated': datetime.now().isoformat()
|
| 118 |
+
}
|
| 119 |
+
baselines.append(baseline)
|
| 120 |
+
|
| 121 |
+
# 批量保存
|
| 122 |
+
for baseline in baselines:
|
| 123 |
+
self.save_baseline(baseline)
|
| 124 |
+
|
| 125 |
+
print(f"✅ 已从CSV导入 {len(baselines)} 条基线数据")
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"⚠️ 从CSV导入基线失败: {e}")
|
| 128 |
+
|
| 129 |
+
def get_baseline(
|
| 130 |
+
self,
|
| 131 |
+
device_id: str,
|
| 132 |
+
feature_name: str = "hrv_rmssd",
|
| 133 |
+
time_period_primary: Optional[str] = None,
|
| 134 |
+
time_period_secondary: Optional[str] = None,
|
| 135 |
+
is_weekend: Optional[bool] = None
|
| 136 |
+
) -> Optional[Dict]:
|
| 137 |
+
"""
|
| 138 |
+
获取基线信息
|
| 139 |
+
|
| 140 |
+
返回:
|
| 141 |
+
基线信息字典,如果不存在返回None
|
| 142 |
+
"""
|
| 143 |
+
if self.storage_type == "database":
|
| 144 |
+
return self._get_from_database(device_id, feature_name, time_period_primary, time_period_secondary, is_weekend)
|
| 145 |
+
else:
|
| 146 |
+
return self._get_from_file(device_id, feature_name, time_period_primary, time_period_secondary, is_weekend)
|
| 147 |
+
|
| 148 |
+
def _get_from_file(self, device_id: str, feature_name: str,
|
| 149 |
+
time_period_primary: Optional[str],
|
| 150 |
+
time_period_secondary: Optional[str],
|
| 151 |
+
is_weekend: Optional[bool]) -> Optional[Dict]:
|
| 152 |
+
"""从文件获取基线"""
|
| 153 |
+
if not self.file_path.exists():
|
| 154 |
+
return None
|
| 155 |
+
|
| 156 |
+
try:
|
| 157 |
+
with open(self.file_path, 'r', encoding='utf-8') as f:
|
| 158 |
+
all_baselines = json.load(f)
|
| 159 |
+
|
| 160 |
+
# 查找匹配的基线
|
| 161 |
+
for baseline in all_baselines:
|
| 162 |
+
if (baseline.get('device_id') == device_id and
|
| 163 |
+
baseline.get('feature_name') == feature_name):
|
| 164 |
+
# 匹配时间段(如果提供)
|
| 165 |
+
if time_period_primary and baseline.get('time_period_primary') != time_period_primary:
|
| 166 |
+
continue
|
| 167 |
+
if time_period_secondary and baseline.get('time_period_secondary') != time_period_secondary:
|
| 168 |
+
continue
|
| 169 |
+
if is_weekend is not None and baseline.get('is_weekend') != (1 if is_weekend else 0):
|
| 170 |
+
continue
|
| 171 |
+
return baseline
|
| 172 |
+
return None
|
| 173 |
+
except Exception as e:
|
| 174 |
+
print(f"⚠️ 从文件读取基线失败: {e}")
|
| 175 |
+
return None
|
| 176 |
+
|
| 177 |
+
def _get_from_database(self, device_id: str, feature_name: str,
|
| 178 |
+
time_period_primary: Optional[str],
|
| 179 |
+
time_period_secondary: Optional[str],
|
| 180 |
+
is_weekend: Optional[bool]) -> Optional[Dict]:
|
| 181 |
+
"""从数据库获取基线"""
|
| 182 |
+
try:
|
| 183 |
+
conn = sqlite3.connect(self.database_path)
|
| 184 |
+
conn.row_factory = sqlite3.Row
|
| 185 |
+
cursor = conn.cursor()
|
| 186 |
+
|
| 187 |
+
query = """
|
| 188 |
+
SELECT * FROM baselines
|
| 189 |
+
WHERE device_id = ? AND feature_name = ?
|
| 190 |
+
"""
|
| 191 |
+
params = [device_id, feature_name]
|
| 192 |
+
|
| 193 |
+
if time_period_primary:
|
| 194 |
+
query += " AND time_period_primary = ?"
|
| 195 |
+
params.append(time_period_primary)
|
| 196 |
+
if time_period_secondary:
|
| 197 |
+
query += " AND time_period_secondary = ?"
|
| 198 |
+
params.append(time_period_secondary)
|
| 199 |
+
if is_weekend is not None:
|
| 200 |
+
query += " AND is_weekend = ?"
|
| 201 |
+
params.append(1 if is_weekend else 0)
|
| 202 |
+
|
| 203 |
+
cursor.execute(query, params)
|
| 204 |
+
row = cursor.fetchone()
|
| 205 |
+
conn.close()
|
| 206 |
+
|
| 207 |
+
if row:
|
| 208 |
+
return dict(row)
|
| 209 |
+
return None
|
| 210 |
+
except Exception as e:
|
| 211 |
+
print(f"⚠️ 从数据库读取基线失败: {e}")
|
| 212 |
+
return None
|
| 213 |
+
|
| 214 |
+
def save_baseline(self, baseline: Dict):
|
| 215 |
+
"""保存基线信息"""
|
| 216 |
+
if self.storage_type == "database":
|
| 217 |
+
self._save_to_database(baseline)
|
| 218 |
+
else:
|
| 219 |
+
self._save_to_file(baseline)
|
| 220 |
+
|
| 221 |
+
def _save_to_file(self, baseline: Dict):
|
| 222 |
+
"""保存到文件"""
|
| 223 |
+
try:
|
| 224 |
+
# 读取现有数据
|
| 225 |
+
if self.file_path.exists():
|
| 226 |
+
with open(self.file_path, 'r', encoding='utf-8') as f:
|
| 227 |
+
all_baselines = json.load(f)
|
| 228 |
+
else:
|
| 229 |
+
all_baselines = []
|
| 230 |
+
|
| 231 |
+
# 查找是否已存在
|
| 232 |
+
key = (
|
| 233 |
+
baseline.get('device_id'),
|
| 234 |
+
baseline.get('feature_name'),
|
| 235 |
+
baseline.get('time_period_primary'),
|
| 236 |
+
baseline.get('time_period_secondary'),
|
| 237 |
+
baseline.get('is_weekend')
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
found = False
|
| 241 |
+
for i, existing in enumerate(all_baselines):
|
| 242 |
+
existing_key = (
|
| 243 |
+
existing.get('device_id'),
|
| 244 |
+
existing.get('feature_name'),
|
| 245 |
+
existing.get('time_period_primary'),
|
| 246 |
+
existing.get('time_period_secondary'),
|
| 247 |
+
existing.get('is_weekend')
|
| 248 |
+
)
|
| 249 |
+
if existing_key == key:
|
| 250 |
+
all_baselines[i] = baseline
|
| 251 |
+
found = True
|
| 252 |
+
break
|
| 253 |
+
|
| 254 |
+
if not found:
|
| 255 |
+
all_baselines.append(baseline)
|
| 256 |
+
|
| 257 |
+
# 保存
|
| 258 |
+
with open(self.file_path, 'w', encoding='utf-8') as f:
|
| 259 |
+
json.dump(all_baselines, f, indent=2, ensure_ascii=False)
|
| 260 |
+
except Exception as e:
|
| 261 |
+
print(f"⚠️ 保存基线到文件失败: {e}")
|
| 262 |
+
|
| 263 |
+
def _save_to_database(self, baseline: Dict):
|
| 264 |
+
"""保存到数据库"""
|
| 265 |
+
try:
|
| 266 |
+
conn = sqlite3.connect(self.database_path)
|
| 267 |
+
cursor = conn.cursor()
|
| 268 |
+
|
| 269 |
+
cursor.execute("""
|
| 270 |
+
INSERT OR REPLACE INTO baselines
|
| 271 |
+
(device_id, feature_name, baseline_type, baseline_mean, baseline_std,
|
| 272 |
+
personal_mean, personal_std, group_mean, data_count,
|
| 273 |
+
time_period_primary, time_period_secondary, is_weekend, last_updated)
|
| 274 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 275 |
+
""", (
|
| 276 |
+
baseline.get('device_id'),
|
| 277 |
+
baseline.get('feature_name'),
|
| 278 |
+
baseline.get('baseline_type'),
|
| 279 |
+
baseline.get('baseline_mean'),
|
| 280 |
+
baseline.get('baseline_std'),
|
| 281 |
+
baseline.get('personal_mean'),
|
| 282 |
+
baseline.get('personal_std'),
|
| 283 |
+
baseline.get('group_mean'),
|
| 284 |
+
baseline.get('data_count', 0),
|
| 285 |
+
baseline.get('time_period_primary'),
|
| 286 |
+
baseline.get('time_period_secondary'),
|
| 287 |
+
baseline.get('is_weekend', 0),
|
| 288 |
+
baseline.get('last_updated', datetime.now().isoformat())
|
| 289 |
+
))
|
| 290 |
+
|
| 291 |
+
conn.commit()
|
| 292 |
+
conn.close()
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print(f"⚠️ 保存基线到数据库失败: {e}")
|
| 295 |
+
|
| 296 |
+
def update_baseline_incremental(
|
| 297 |
+
self,
|
| 298 |
+
device_id: str,
|
| 299 |
+
feature_name: str,
|
| 300 |
+
new_value: float,
|
| 301 |
+
data_count: int,
|
| 302 |
+
time_period_primary: Optional[str] = None,
|
| 303 |
+
time_period_secondary: Optional[str] = None,
|
| 304 |
+
is_weekend: Optional[bool] = None
|
| 305 |
+
):
|
| 306 |
+
"""
|
| 307 |
+
增量更新基线
|
| 308 |
+
|
| 309 |
+
使用公式:新基线 = 旧基线 + (新值 - 旧值) / 数据量
|
| 310 |
+
"""
|
| 311 |
+
# 获取现有基线
|
| 312 |
+
existing = self.get_baseline(device_id, feature_name, time_period_primary, time_period_secondary, is_weekend)
|
| 313 |
+
|
| 314 |
+
if existing:
|
| 315 |
+
# 增量更新
|
| 316 |
+
old_mean = existing.get('baseline_mean', 0.0)
|
| 317 |
+
old_count = existing.get('data_count', 0)
|
| 318 |
+
|
| 319 |
+
# 计算新均值(简化版:滑动平均)
|
| 320 |
+
if old_count > 0:
|
| 321 |
+
new_mean = (old_mean * old_count + new_value) / (old_count + 1)
|
| 322 |
+
else:
|
| 323 |
+
new_mean = new_value
|
| 324 |
+
|
| 325 |
+
# 更新基线
|
| 326 |
+
existing['baseline_mean'] = new_mean
|
| 327 |
+
existing['data_count'] = old_count + 1
|
| 328 |
+
existing['last_updated'] = datetime.now().isoformat()
|
| 329 |
+
|
| 330 |
+
self.save_baseline(existing)
|
| 331 |
+
else:
|
| 332 |
+
# 创建新基线
|
| 333 |
+
new_baseline = {
|
| 334 |
+
'device_id': device_id,
|
| 335 |
+
'feature_name': feature_name,
|
| 336 |
+
'baseline_type': 'personal',
|
| 337 |
+
'baseline_mean': new_value,
|
| 338 |
+
'baseline_std': 1.0, # 默认标准差
|
| 339 |
+
'personal_mean': new_value,
|
| 340 |
+
'personal_std': 1.0,
|
| 341 |
+
'data_count': 1,
|
| 342 |
+
'time_period_primary': time_period_primary or '',
|
| 343 |
+
'time_period_secondary': time_period_secondary or '',
|
| 344 |
+
'is_weekend': 1 if is_weekend else 0,
|
| 345 |
+
'last_updated': datetime.now().isoformat()
|
| 346 |
+
}
|
| 347 |
+
self.save_baseline(new_baseline)
|
| 348 |
+
|
utils/formatter.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
异常检测结果格式化器
|
| 3 |
+
将检测结果格式化为LLM需要的文本格式
|
| 4 |
+
完全基于配置文件,方便扩展和定制
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Dict, List, Optional, Any
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class AnomalyFormatter:
|
| 13 |
+
"""
|
| 14 |
+
异常检测结果格式化器
|
| 15 |
+
所有格式都从配置文件读取,支持完全自定义
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, config_path: Optional[Path] = None):
|
| 19 |
+
"""
|
| 20 |
+
初始化格式化器
|
| 21 |
+
|
| 22 |
+
参数:
|
| 23 |
+
config_path: 配置文件路径,如果为None则使用默认配置
|
| 24 |
+
"""
|
| 25 |
+
if config_path is None:
|
| 26 |
+
config_path = Path(__file__).parent.parent / "configs" / "formatter_config.json"
|
| 27 |
+
|
| 28 |
+
self.config_path = Path(config_path)
|
| 29 |
+
self.config = self._load_config()
|
| 30 |
+
|
| 31 |
+
def _load_config(self) -> Dict:
|
| 32 |
+
"""加载配置文件"""
|
| 33 |
+
if self.config_path.exists():
|
| 34 |
+
try:
|
| 35 |
+
with open(self.config_path, 'r', encoding='utf-8') as f:
|
| 36 |
+
return json.load(f)
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"⚠️ 加载格式化配置失败: {e},使用默认配置")
|
| 39 |
+
|
| 40 |
+
# 返回默认配置
|
| 41 |
+
return self._get_default_config()
|
| 42 |
+
|
| 43 |
+
def _get_default_config(self) -> Dict:
|
| 44 |
+
"""获取默认配置(向后兼容)"""
|
| 45 |
+
return {
|
| 46 |
+
"sections": {
|
| 47 |
+
"anomaly_overview": {"enabled": True, "title": "异常概览"},
|
| 48 |
+
"core_indicators": {"enabled": True, "title": "核心指标"},
|
| 49 |
+
"historical_trend": {"enabled": True, "title": "历史趋势"},
|
| 50 |
+
"related_indicators": {"enabled": True, "title": "相关健康指标"},
|
| 51 |
+
"user_profile": {"enabled": True, "title": "用户背景信息"}
|
| 52 |
+
},
|
| 53 |
+
"formatting": {
|
| 54 |
+
"section_prefix": "## ",
|
| 55 |
+
"section_suffix": "\n",
|
| 56 |
+
"field_prefix": "- ",
|
| 57 |
+
"field_suffix": "\n",
|
| 58 |
+
"line_separator": "\n",
|
| 59 |
+
"header": "# 健康异常检测结果\n"
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
def _format_value(self, value: Any, field_config: Dict) -> str:
|
| 64 |
+
"""格式化单个字段值(完全基于配置)"""
|
| 65 |
+
if value is None:
|
| 66 |
+
default = field_config.get("default", "")
|
| 67 |
+
return default if default else ""
|
| 68 |
+
|
| 69 |
+
format_type = field_config.get("format", "string")
|
| 70 |
+
decimal_places = field_config.get("decimal_places", 2)
|
| 71 |
+
prefix = field_config.get("prefix", "")
|
| 72 |
+
suffix = field_config.get("suffix", "")
|
| 73 |
+
default = field_config.get("default", "")
|
| 74 |
+
mapping = field_config.get("mapping", {})
|
| 75 |
+
|
| 76 |
+
# 处理映射(如trend: "worsening" -> "持续恶化")
|
| 77 |
+
if mapping and str(value) in mapping:
|
| 78 |
+
value = mapping[str(value)]
|
| 79 |
+
|
| 80 |
+
# 格式化
|
| 81 |
+
try:
|
| 82 |
+
if format_type == "float":
|
| 83 |
+
formatted = f"{float(value):.{decimal_places}f}"
|
| 84 |
+
elif format_type == "integer":
|
| 85 |
+
formatted = f"{int(value)}"
|
| 86 |
+
elif format_type == "boolean":
|
| 87 |
+
true_text = field_config.get("true_text", "是")
|
| 88 |
+
false_text = field_config.get("false_text", "否")
|
| 89 |
+
formatted = true_text if value else false_text
|
| 90 |
+
else:
|
| 91 |
+
formatted = str(value) if value else default
|
| 92 |
+
except (ValueError, TypeError):
|
| 93 |
+
formatted = default if default else ""
|
| 94 |
+
|
| 95 |
+
return f"{prefix}{formatted}{suffix}"
|
| 96 |
+
|
| 97 |
+
def _format_section(
|
| 98 |
+
self,
|
| 99 |
+
section_key: str,
|
| 100 |
+
data: Dict,
|
| 101 |
+
section_config: Dict
|
| 102 |
+
) -> List[str]:
|
| 103 |
+
"""格式化一个章节(完全基于配置)"""
|
| 104 |
+
lines = []
|
| 105 |
+
|
| 106 |
+
if not section_config.get("enabled", True):
|
| 107 |
+
return lines
|
| 108 |
+
|
| 109 |
+
formatting = self.config.get("formatting", {})
|
| 110 |
+
section_prefix = formatting.get("section_prefix", "## ")
|
| 111 |
+
section_suffix = formatting.get("section_suffix", "\n")
|
| 112 |
+
field_prefix = formatting.get("field_prefix", "- ")
|
| 113 |
+
field_suffix = formatting.get("field_suffix", "\n")
|
| 114 |
+
|
| 115 |
+
# 添加章节标题
|
| 116 |
+
title = section_config.get("title", section_key)
|
| 117 |
+
lines.append(f"{section_prefix}{title}{section_suffix}")
|
| 118 |
+
|
| 119 |
+
# 格式化字段
|
| 120 |
+
fields_config = section_config.get("fields", {})
|
| 121 |
+
for field_key, field_config in fields_config.items():
|
| 122 |
+
if not field_config.get("enabled", True):
|
| 123 |
+
continue
|
| 124 |
+
|
| 125 |
+
field_label = field_config.get("label", field_key)
|
| 126 |
+
format_type = field_config.get("format", "string")
|
| 127 |
+
|
| 128 |
+
if format_type == "nested":
|
| 129 |
+
# 嵌套字段(如activity_level.level)
|
| 130 |
+
nested_data = data.get(field_key, {})
|
| 131 |
+
if nested_data:
|
| 132 |
+
sub_fields = field_config.get("sub_fields", {})
|
| 133 |
+
sub_values = []
|
| 134 |
+
for sub_key, sub_config in sub_fields.items():
|
| 135 |
+
if not sub_config.get("enabled", True):
|
| 136 |
+
continue
|
| 137 |
+
sub_value = nested_data.get(sub_key)
|
| 138 |
+
if sub_value is not None:
|
| 139 |
+
formatted_sub = self._format_value(sub_value, sub_config)
|
| 140 |
+
sub_values.append(formatted_sub)
|
| 141 |
+
|
| 142 |
+
if sub_values:
|
| 143 |
+
line = f"{field_prefix}{field_label}:{''.join(sub_values)}{field_suffix}"
|
| 144 |
+
lines.append(line)
|
| 145 |
+
elif format_type == "string_or_nested":
|
| 146 |
+
# 尝试直接值,如果不存在则尝试fallback字段
|
| 147 |
+
value = data.get(field_key)
|
| 148 |
+
fallback_key = field_config.get("fallback")
|
| 149 |
+
if value is None and fallback_key:
|
| 150 |
+
value = data.get(fallback_key)
|
| 151 |
+
|
| 152 |
+
if value is not None:
|
| 153 |
+
formatted = self._format_value(value, field_config)
|
| 154 |
+
line = f"{field_prefix}{field_label}:{formatted}{field_suffix}"
|
| 155 |
+
lines.append(line)
|
| 156 |
+
else:
|
| 157 |
+
# 普通字段
|
| 158 |
+
value = data.get(field_key)
|
| 159 |
+
if value is not None:
|
| 160 |
+
formatted = self._format_value(value, field_config)
|
| 161 |
+
line = f"{field_prefix}{field_label}:{formatted}{field_suffix}"
|
| 162 |
+
lines.append(line)
|
| 163 |
+
|
| 164 |
+
# 添加章节分隔
|
| 165 |
+
lines.append(formatting.get("line_separator", "\n"))
|
| 166 |
+
|
| 167 |
+
return lines
|
| 168 |
+
|
| 169 |
+
def _format_historical_trend(
|
| 170 |
+
self,
|
| 171 |
+
daily_results: List[Dict],
|
| 172 |
+
section_config: Dict
|
| 173 |
+
) -> List[str]:
|
| 174 |
+
"""格式化历史趋势(特殊处理,因为是多条记录)"""
|
| 175 |
+
lines = []
|
| 176 |
+
|
| 177 |
+
if not section_config.get("enabled", True):
|
| 178 |
+
return lines
|
| 179 |
+
|
| 180 |
+
formatting = self.config.get("formatting", {})
|
| 181 |
+
section_prefix = formatting.get("section_prefix", "## ")
|
| 182 |
+
section_suffix = formatting.get("section_suffix", "\n")
|
| 183 |
+
field_prefix = formatting.get("field_prefix", "- ")
|
| 184 |
+
field_suffix = formatting.get("field_suffix", "\n")
|
| 185 |
+
|
| 186 |
+
# 添加章节标题
|
| 187 |
+
title = section_config.get("title", "历史趋势")
|
| 188 |
+
lines.append(f"{section_prefix}{title}{section_suffix}")
|
| 189 |
+
|
| 190 |
+
# 格式化每条记录
|
| 191 |
+
fields_config = section_config.get("fields", {})
|
| 192 |
+
for result in daily_results:
|
| 193 |
+
parts = []
|
| 194 |
+
for field_key, field_config in fields_config.items():
|
| 195 |
+
if not field_config.get("enabled", True):
|
| 196 |
+
continue
|
| 197 |
+
|
| 198 |
+
value = result.get(field_key)
|
| 199 |
+
if value is not None:
|
| 200 |
+
formatted = self._format_value(value, field_config)
|
| 201 |
+
parts.append(formatted)
|
| 202 |
+
|
| 203 |
+
if parts:
|
| 204 |
+
date = result.get("date", "")
|
| 205 |
+
line = f"{field_prefix}{date}:{''.join(parts)}{field_suffix}"
|
| 206 |
+
lines.append(line)
|
| 207 |
+
|
| 208 |
+
lines.append(formatting.get("line_separator", "\n"))
|
| 209 |
+
return lines
|
| 210 |
+
|
| 211 |
+
def format_for_llm(
|
| 212 |
+
self,
|
| 213 |
+
anomaly_result: Dict,
|
| 214 |
+
baseline_info: Optional[Dict] = None,
|
| 215 |
+
related_indicators: Optional[Dict] = None,
|
| 216 |
+
user_profile: Optional[Dict] = None,
|
| 217 |
+
daily_results: Optional[List[Dict]] = None
|
| 218 |
+
) -> str:
|
| 219 |
+
"""
|
| 220 |
+
格式化异常检测结果为文本(给LLM)
|
| 221 |
+
|
| 222 |
+
只提供数据,不做判断
|
| 223 |
+
所有格式都从配置文件读取,方便扩展
|
| 224 |
+
"""
|
| 225 |
+
lines = []
|
| 226 |
+
formatting = self.config.get("formatting", {})
|
| 227 |
+
sections = self.config.get("sections", {})
|
| 228 |
+
|
| 229 |
+
# 添加标题
|
| 230 |
+
header = formatting.get("header", "# 健康异常检测结果\n")
|
| 231 |
+
lines.append(header)
|
| 232 |
+
|
| 233 |
+
# 异常概览章节
|
| 234 |
+
if "anomaly_overview" in sections:
|
| 235 |
+
section_config = sections["anomaly_overview"]
|
| 236 |
+
if section_config.get("enabled", True):
|
| 237 |
+
if "anomaly_pattern" in anomaly_result:
|
| 238 |
+
# 异常模式格式
|
| 239 |
+
pattern_data = anomaly_result["anomaly_pattern"]
|
| 240 |
+
lines.extend(self._format_section("anomaly_overview", pattern_data, section_config))
|
| 241 |
+
elif "is_anomaly" in anomaly_result:
|
| 242 |
+
# 实时检测格式
|
| 243 |
+
lines.extend(self._format_section("anomaly_overview", anomaly_result, section_config))
|
| 244 |
+
|
| 245 |
+
# 核心指标章节
|
| 246 |
+
if baseline_info and "core_indicators" in sections:
|
| 247 |
+
section_config = sections["core_indicators"]
|
| 248 |
+
# 重命名字段以匹配配置
|
| 249 |
+
core_data = {
|
| 250 |
+
"hrv_rmssd": baseline_info.get("current_value"),
|
| 251 |
+
"baseline_mean": baseline_info.get("baseline_mean"),
|
| 252 |
+
"deviation_pct": baseline_info.get("deviation_pct")
|
| 253 |
+
}
|
| 254 |
+
lines.extend(self._format_section("core_indicators", core_data, section_config))
|
| 255 |
+
|
| 256 |
+
# 历史趋势章节
|
| 257 |
+
if daily_results and "historical_trend" in sections:
|
| 258 |
+
section_config = sections["historical_trend"]
|
| 259 |
+
lines.extend(self._format_historical_trend(daily_results, section_config))
|
| 260 |
+
|
| 261 |
+
# 相关健康指标章节
|
| 262 |
+
if related_indicators and "related_indicators" in sections:
|
| 263 |
+
section_config = sections["related_indicators"]
|
| 264 |
+
lines.extend(self._format_section("related_indicators", related_indicators, section_config))
|
| 265 |
+
|
| 266 |
+
# 用户背景信息章节
|
| 267 |
+
if user_profile and "user_profile" in sections:
|
| 268 |
+
section_config = sections["user_profile"]
|
| 269 |
+
lines.extend(self._format_section("user_profile", user_profile, section_config))
|
| 270 |
+
|
| 271 |
+
return "".join(lines)
|
| 272 |
+
|
| 273 |
+
@staticmethod
|
| 274 |
+
def format_realtime_result(result: Dict, config_path: Optional[Path] = None) -> str:
|
| 275 |
+
"""格式化实时检测结果(静态方法,向后兼容)"""
|
| 276 |
+
formatter = AnomalyFormatter(config_path)
|
| 277 |
+
return formatter.format_for_llm(result)
|
wearable_anomaly_detector.py
ADDED
|
@@ -0,0 +1,785 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Wearable健康异常检测模型 - 标准化封装
|
| 3 |
+
提供简单的API接口,用于实时异常检测
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import numpy as np
|
| 8 |
+
import json
|
| 9 |
+
import pickle
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Dict, List, Optional, Union
|
| 12 |
+
from datetime import datetime, timedelta
|
| 13 |
+
import pandas as pd
|
| 14 |
+
|
| 15 |
+
# 添加项目根目录到路径
|
| 16 |
+
import sys
|
| 17 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 18 |
+
|
| 19 |
+
from models.phased_lstm_tft import PhasedLSTM_TFT, PhasedLSTM_TFT_WithEnhancedAnomalyDetection
|
| 20 |
+
from feature_calculator import FeatureCalculator
|
| 21 |
+
|
| 22 |
+
# 导入工具模块(可选,如果不存在则使用None)
|
| 23 |
+
try:
|
| 24 |
+
from utils.baseline_storage import BaselineStorage
|
| 25 |
+
from utils.api_client import HistoricalDataPlatformClient
|
| 26 |
+
from utils.formatter import AnomalyFormatter
|
| 27 |
+
except ImportError:
|
| 28 |
+
BaselineStorage = None
|
| 29 |
+
HistoricalDataPlatformClient = None
|
| 30 |
+
AnomalyFormatter = None
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class WearableAnomalyDetector:
|
| 34 |
+
"""
|
| 35 |
+
Wearable健康异常检测器
|
| 36 |
+
|
| 37 |
+
使用示例:
|
| 38 |
+
detector = WearableAnomalyDetector(model_dir="checkpoints/phase2/exp_factor_balanced")
|
| 39 |
+
result = detector.predict(data_points)
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
def __init__(
|
| 43 |
+
self,
|
| 44 |
+
model_dir: Union[str, Path],
|
| 45 |
+
device: Optional[str] = None,
|
| 46 |
+
threshold: Optional[float] = None
|
| 47 |
+
):
|
| 48 |
+
"""
|
| 49 |
+
初始化异常检测器
|
| 50 |
+
|
| 51 |
+
参数:
|
| 52 |
+
model_dir: 模型目录路径(包含best_model.pt和配置文件)
|
| 53 |
+
device: 设备('cuda'或'cpu'),如果为None则自动选择
|
| 54 |
+
threshold: 异常阈值,如果为None则从配置中读取
|
| 55 |
+
"""
|
| 56 |
+
self.model_dir = Path(model_dir)
|
| 57 |
+
self.device = torch.device(device or ('cuda' if torch.cuda.is_available() else 'cpu'))
|
| 58 |
+
|
| 59 |
+
# 加载配置
|
| 60 |
+
self.config = self._load_config()
|
| 61 |
+
|
| 62 |
+
# 确定阈值
|
| 63 |
+
if threshold is not None:
|
| 64 |
+
self.threshold = float(threshold)
|
| 65 |
+
else:
|
| 66 |
+
config_threshold = self.config.get('threshold')
|
| 67 |
+
if config_threshold is not None:
|
| 68 |
+
self.threshold = float(config_threshold)
|
| 69 |
+
else:
|
| 70 |
+
self.threshold = 0.53 # 默认阈值
|
| 71 |
+
# 不打印警告,因为使用默认阈值是正常情况
|
| 72 |
+
|
| 73 |
+
# 配置驱动特征计算(在加载模型之前,用于获取特征列表)
|
| 74 |
+
self.feature_calculator = FeatureCalculator(
|
| 75 |
+
config_path=self.config.get('feature_config_path'),
|
| 76 |
+
norm_params_path=Path(__file__).parent / 'processed_data' / 'stage3' / 'norm_params.json',
|
| 77 |
+
static_features_path=Path(__file__).parent / 'processed_data' / 'stage2' / 'static_features.csv',
|
| 78 |
+
storage_dir=Path(self.config.get('storage_dir', Path(__file__).parent / 'data_storage'))
|
| 79 |
+
)
|
| 80 |
+
self.features = self.feature_calculator.get_enabled_feature_names()
|
| 81 |
+
self.static_feature_names = [cfg["name"] for cfg in self.feature_calculator.static_feature_defs]
|
| 82 |
+
self.known_future_dim = max(len(self.feature_calculator.known_future_defs), 1)
|
| 83 |
+
self.factor_metadata = {
|
| 84 |
+
'enabled': self.feature_calculator.factor_enabled,
|
| 85 |
+
'factor_names': self.feature_calculator.factor_names,
|
| 86 |
+
'factor_dim': self.feature_calculator.factor_dim
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
# 加载模型(会从Phase2权重推断正确的特征数量)
|
| 90 |
+
self.model = self._load_model()
|
| 91 |
+
self.model.eval()
|
| 92 |
+
|
| 93 |
+
# 加载归一化参数(维持向后兼容)
|
| 94 |
+
self.norm_params = self._load_norm_params()
|
| 95 |
+
|
| 96 |
+
print(f"✅ 模型加载成功")
|
| 97 |
+
print(f" - 设备: {self.device}")
|
| 98 |
+
print(f" - 阈值: {self.threshold:.4f}")
|
| 99 |
+
print(f" - 配置的特征数: {len(self.features)}")
|
| 100 |
+
print(f" - 模型实际特征数: {self.model.base_model.tft.output_layer.weight.shape[0] if hasattr(self.model, 'base_model') else '未知'}")
|
| 101 |
+
|
| 102 |
+
def _load_config(self) -> Dict:
|
| 103 |
+
"""加载模型配置"""
|
| 104 |
+
# 尝试多个可能的配置文件路径
|
| 105 |
+
config_paths = [
|
| 106 |
+
self.model_dir / 'config.json',
|
| 107 |
+
self.model_dir.parent / 'config.json',
|
| 108 |
+
Path(__file__).parent / 'config.json',
|
| 109 |
+
Path(__file__).parent / 'configs' / 'model_config.json',
|
| 110 |
+
]
|
| 111 |
+
|
| 112 |
+
for config_file in config_paths:
|
| 113 |
+
if config_file.exists():
|
| 114 |
+
try:
|
| 115 |
+
with open(config_file, 'r', encoding='utf-8') as f:
|
| 116 |
+
config = json.load(f)
|
| 117 |
+
print(f" ✅ 找到配置文件: {config_file}")
|
| 118 |
+
return config
|
| 119 |
+
except Exception as e:
|
| 120 |
+
print(f" ⚠️ 读取配置文件失败 {config_file}: {e}")
|
| 121 |
+
continue
|
| 122 |
+
|
| 123 |
+
# 尝试从summary.json读取
|
| 124 |
+
summary_file = self.model_dir / 'summary.json'
|
| 125 |
+
if summary_file.exists():
|
| 126 |
+
try:
|
| 127 |
+
with open(summary_file, 'r', encoding='utf-8') as f:
|
| 128 |
+
summary = json.load(f)
|
| 129 |
+
config = {
|
| 130 |
+
'threshold': summary.get('best_threshold'),
|
| 131 |
+
'features': [], # 需要从其他地方获取
|
| 132 |
+
}
|
| 133 |
+
print(f" ✅ 从summary.json读取配置")
|
| 134 |
+
return config
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f" ⚠️ 读取summary.json失败: {e}")
|
| 137 |
+
|
| 138 |
+
# 如果都没有,返回空配置(使用默认值,这是正常的)
|
| 139 |
+
# 不打印警告,因为使用默认配置是正常情况
|
| 140 |
+
return {}
|
| 141 |
+
|
| 142 |
+
def _load_model(self):
|
| 143 |
+
"""加载模型 - 直接从Phase2 checkpoint加载(包含完整的base_model权重)"""
|
| 144 |
+
# 加载Phase2 checkpoint(最终模型)
|
| 145 |
+
phase2_model_path = self.model_dir / 'best_model.pt'
|
| 146 |
+
if not phase2_model_path.exists():
|
| 147 |
+
raise FileNotFoundError(f"Phase2模型不存在: {phase2_model_path}")
|
| 148 |
+
|
| 149 |
+
print(f" 📦 加载Phase2 checkpoint: {phase2_model_path}")
|
| 150 |
+
checkpoint_phase2 = torch.load(phase2_model_path, map_location=self.device, weights_only=False)
|
| 151 |
+
phase2_state_dict = checkpoint_phase2['model_state_dict']
|
| 152 |
+
|
| 153 |
+
# 从Phase2权重形状推断模型配置(Phase2 checkpoint包含完整的base_model权重)
|
| 154 |
+
if 'base_model.phased_lstm.lstm_layers.0.W_ih.weight' in phase2_state_dict:
|
| 155 |
+
inferred_num_features = phase2_state_dict['base_model.phased_lstm.lstm_layers.0.W_ih.weight'].shape[1]
|
| 156 |
+
else:
|
| 157 |
+
# 如果找不到,使用当前特征数量
|
| 158 |
+
inferred_num_features = len(self.features) if hasattr(self, 'features') else 24
|
| 159 |
+
|
| 160 |
+
if 'base_model.tft.static_embedding.weight' in phase2_state_dict:
|
| 161 |
+
# static_embedding shape: [embedding_dim, num_static_features]
|
| 162 |
+
inferred_num_static = phase2_state_dict['base_model.tft.static_embedding.weight'].shape[1]
|
| 163 |
+
else:
|
| 164 |
+
inferred_num_static = len(self.static_feature_names) if hasattr(self, 'static_feature_names') else 2
|
| 165 |
+
|
| 166 |
+
# 从权重推断其他配置
|
| 167 |
+
if 'base_model.tft.hidden_size' in phase2_state_dict:
|
| 168 |
+
# 如果checkpoint中有配置,可以直接读取
|
| 169 |
+
pass
|
| 170 |
+
|
| 171 |
+
# 检查是否有factor_features
|
| 172 |
+
has_factor_fusion = 'factor_fusion.projection.weight' in phase2_state_dict
|
| 173 |
+
|
| 174 |
+
print(f" 📊 从Phase2权重推断的模型配置:")
|
| 175 |
+
print(f" - 时间序列特征: {inferred_num_features}")
|
| 176 |
+
print(f" - 静态特征: {inferred_num_static}")
|
| 177 |
+
print(f" - Factor融合: {'是' if has_factor_fusion else '否'}")
|
| 178 |
+
|
| 179 |
+
# 构建模型配置(从权重推断,不依赖Phase1)
|
| 180 |
+
model_config = {
|
| 181 |
+
'num_features': inferred_num_features,
|
| 182 |
+
'num_static_features': inferred_num_static,
|
| 183 |
+
'num_known_future_features': 3, # 通常是3(hour_of_day, day_of_week, is_weekend)
|
| 184 |
+
'lstm_hidden_size': 128, # 从权重形状可以推断,这里用默认值
|
| 185 |
+
'lstm_layers': 2, # 从权重键名可以推断
|
| 186 |
+
'lstm_alpha': 0.0001, # 默认值
|
| 187 |
+
'tft_hidden_size': 128, # 从权重形状可以推断
|
| 188 |
+
'tft_num_heads': 4, # 默认值
|
| 189 |
+
'tft_num_encoder_layers': 3, # 默认值
|
| 190 |
+
'tft_num_decoder_layers': 3, # 默认值
|
| 191 |
+
'tft_dim_feedforward': 512, # 默认值
|
| 192 |
+
'dropout': 0.1, # 默认值
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
# 创建基础模型(使用推断的配置)
|
| 196 |
+
base_model = PhasedLSTM_TFT(model_config)
|
| 197 |
+
base_model = base_model.to(self.device)
|
| 198 |
+
|
| 199 |
+
# 加载factor_config
|
| 200 |
+
factor_config = self._load_factor_config()
|
| 201 |
+
|
| 202 |
+
# 创建Phase2模型
|
| 203 |
+
model = PhasedLSTM_TFT_WithEnhancedAnomalyDetection(
|
| 204 |
+
base_model,
|
| 205 |
+
num_anomaly_types=4,
|
| 206 |
+
use_enhanced_head=True,
|
| 207 |
+
use_multi_source_heads=False,
|
| 208 |
+
use_domain_adversarial=False,
|
| 209 |
+
factor_config=factor_config
|
| 210 |
+
)
|
| 211 |
+
model = model.to(self.device)
|
| 212 |
+
|
| 213 |
+
# 直接加载Phase2的完整权重(包括base_model和anomaly_head)
|
| 214 |
+
print(f" 🔄 加载Phase2完整权重(包括base_model和anomaly_head)...")
|
| 215 |
+
try:
|
| 216 |
+
model.load_state_dict(phase2_state_dict, strict=True)
|
| 217 |
+
print(f" ✅ Phase2模型权重加载成功(严格模式)")
|
| 218 |
+
except RuntimeError as e:
|
| 219 |
+
print(f" ⚠️ 严格模式加载失败,尝试宽松模式: {str(e)[:150]}...")
|
| 220 |
+
missing_keys, unexpected_keys = model.load_state_dict(phase2_state_dict, strict=False)
|
| 221 |
+
if missing_keys:
|
| 222 |
+
print(f" ⚠️ 缺失的键 ({len(missing_keys)}个): {missing_keys[:3]}..." if len(missing_keys) > 3 else f" ⚠️ 缺失的键: {missing_keys}")
|
| 223 |
+
if unexpected_keys:
|
| 224 |
+
print(f" ⚠️ ��外的键 ({len(unexpected_keys)}个): {unexpected_keys[:3]}..." if len(unexpected_keys) > 3 else f" ⚠️ 意外的键: {unexpected_keys}")
|
| 225 |
+
print(f" ✅ Phase2模型权重加载成功(宽松模式)")
|
| 226 |
+
|
| 227 |
+
return model
|
| 228 |
+
|
| 229 |
+
def _load_factor_config(self) -> Optional[Dict]:
|
| 230 |
+
"""加载因子特征配置"""
|
| 231 |
+
# 方法1: 从config.json读取(如果已加载)
|
| 232 |
+
if hasattr(self, 'factor_metadata') and self.factor_metadata:
|
| 233 |
+
if self.factor_metadata.get('enabled'):
|
| 234 |
+
return {
|
| 235 |
+
'num_factors': len(self.factor_metadata.get('factor_names', [])),
|
| 236 |
+
'factor_dim': self.factor_metadata.get('factor_dim', 0),
|
| 237 |
+
'factor_names': self.factor_metadata.get('factor_names', []),
|
| 238 |
+
'min_weight': 0.2,
|
| 239 |
+
'dropout': 0.1,
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
# 方法2: 从窗口信息文件读取
|
| 243 |
+
window_info_file = Path(__file__).parent / 'processed_data' / 'stage3' / 'window_info_multi_scale.json'
|
| 244 |
+
if window_info_file.exists():
|
| 245 |
+
with open(window_info_file, 'r') as f:
|
| 246 |
+
window_info = json.load(f)
|
| 247 |
+
factor_metadata = window_info.get('factor_features', {})
|
| 248 |
+
if factor_metadata and factor_metadata.get('enabled'):
|
| 249 |
+
return {
|
| 250 |
+
'num_factors': len(factor_metadata.get('factor_names', [])),
|
| 251 |
+
'factor_dim': factor_metadata.get('factor_dim', 0),
|
| 252 |
+
'factor_names': factor_metadata.get('factor_names', []),
|
| 253 |
+
'min_weight': 0.2,
|
| 254 |
+
'dropout': 0.1,
|
| 255 |
+
}
|
| 256 |
+
return None
|
| 257 |
+
|
| 258 |
+
def _load_norm_params(self) -> Optional[Dict]:
|
| 259 |
+
"""加载归一化参数"""
|
| 260 |
+
norm_file = Path(__file__).parent / 'processed_data' / 'stage3' / 'norm_params.json'
|
| 261 |
+
if norm_file.exists():
|
| 262 |
+
with open(norm_file, 'r') as f:
|
| 263 |
+
return json.load(f)
|
| 264 |
+
return None
|
| 265 |
+
|
| 266 |
+
def predict(
|
| 267 |
+
self,
|
| 268 |
+
data_points: List[Dict],
|
| 269 |
+
return_score: bool = True,
|
| 270 |
+
return_details: bool = False
|
| 271 |
+
) -> Dict:
|
| 272 |
+
"""
|
| 273 |
+
预测异常
|
| 274 |
+
|
| 275 |
+
参数:
|
| 276 |
+
data_points: 数据点列表,每个数据点是一个字典,包含:
|
| 277 |
+
- timestamp: 时间戳(datetime或字符串)
|
| 278 |
+
- features: 特征字典,包含所有需要的特征值
|
| 279 |
+
- static_features: 静态特征字典(可选)
|
| 280 |
+
return_score: 是否返回异常分数
|
| 281 |
+
return_details: 是否返回详细信息
|
| 282 |
+
|
| 283 |
+
返回:
|
| 284 |
+
{
|
| 285 |
+
'is_anomaly': bool, # 是否异常
|
| 286 |
+
'anomaly_score': float, # 异常分数(0-1)
|
| 287 |
+
'threshold': float, # 使用的阈值
|
| 288 |
+
'details': dict (可选) # 详细信息
|
| 289 |
+
}
|
| 290 |
+
"""
|
| 291 |
+
user_id = data_points[0].get('deviceId') or data_points[0].get('user_id')
|
| 292 |
+
window = self.feature_calculator.build_window(data_points, user_id=user_id)
|
| 293 |
+
|
| 294 |
+
# 转换为模型输入格式
|
| 295 |
+
model_input = self._prepare_model_input(window)
|
| 296 |
+
|
| 297 |
+
# 模型预测
|
| 298 |
+
with torch.no_grad():
|
| 299 |
+
# 模型forward方法接受位置参数,需要按顺序传递
|
| 300 |
+
outputs = self.model(
|
| 301 |
+
model_input['x'],
|
| 302 |
+
model_input['delta_t'],
|
| 303 |
+
model_input['static_features'],
|
| 304 |
+
model_input['known_future_features'],
|
| 305 |
+
mask=model_input.get('mask'),
|
| 306 |
+
return_contrastive_features=model_input.get('return_contrastive_features', False),
|
| 307 |
+
source=None,
|
| 308 |
+
return_domain_features=False,
|
| 309 |
+
factor_features=model_input.get('factor_features')
|
| 310 |
+
)
|
| 311 |
+
anomaly_score = outputs['anomaly_score'].cpu().item()
|
| 312 |
+
|
| 313 |
+
# 判断是否异常
|
| 314 |
+
is_anomaly = anomaly_score >= self.threshold
|
| 315 |
+
|
| 316 |
+
result = {
|
| 317 |
+
'is_anomaly': bool(is_anomaly),
|
| 318 |
+
'threshold': float(self.threshold),
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
if return_score:
|
| 322 |
+
result['anomaly_score'] = float(anomaly_score)
|
| 323 |
+
|
| 324 |
+
if return_details:
|
| 325 |
+
result['details'] = {
|
| 326 |
+
'window_size': len(data_points),
|
| 327 |
+
'model_output': float(anomaly_score),
|
| 328 |
+
'prediction_confidence': abs(anomaly_score - self.threshold),
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
return result
|
| 332 |
+
|
| 333 |
+
def _prepare_model_input(self, window: Dict) -> Dict:
|
| 334 |
+
"""准备模型输入"""
|
| 335 |
+
# 获取窗口大小(从window或默认12)
|
| 336 |
+
window_size = len(window.get('input_features', {}).get(self.features[0] if self.features else 'hr', []))
|
| 337 |
+
if window_size == 0:
|
| 338 |
+
window_size = 12 # 默认值
|
| 339 |
+
|
| 340 |
+
input_features_list = []
|
| 341 |
+
for feat in self.features:
|
| 342 |
+
values = window['input_features'].get(feat, [0.0] * window_size)
|
| 343 |
+
input_features_list.append(values)
|
| 344 |
+
|
| 345 |
+
# 转换为tensor
|
| 346 |
+
input_features = torch.tensor(
|
| 347 |
+
np.stack(input_features_list, axis=1),
|
| 348 |
+
dtype=torch.float32
|
| 349 |
+
).unsqueeze(0).to(self.device) # [1, window_size, num_features]
|
| 350 |
+
|
| 351 |
+
delta_t = torch.tensor(
|
| 352 |
+
window['input_delta_t'],
|
| 353 |
+
dtype=torch.float32
|
| 354 |
+
).unsqueeze(-1).unsqueeze(0).to(self.device) # [1, window_size, 1]
|
| 355 |
+
|
| 356 |
+
# 静态特征
|
| 357 |
+
static_feature_values = []
|
| 358 |
+
static_keys = self.static_feature_names or sorted(window['static_features'].keys())
|
| 359 |
+
for key in static_keys:
|
| 360 |
+
value = window['static_features'].get(key, 0.0)
|
| 361 |
+
static_feature_values.append(float(value))
|
| 362 |
+
|
| 363 |
+
if len(static_feature_values) == 0:
|
| 364 |
+
static_feature_values = [0.0]
|
| 365 |
+
|
| 366 |
+
static_features = torch.tensor(
|
| 367 |
+
static_feature_values,
|
| 368 |
+
dtype=torch.float32
|
| 369 |
+
).unsqueeze(0).to(self.device) # [1, num_static]
|
| 370 |
+
|
| 371 |
+
# 已知未来特征
|
| 372 |
+
pred_len = len(window.get('target_timestamp', []))
|
| 373 |
+
if pred_len == 0:
|
| 374 |
+
pred_len = 6 # 默认预测长度
|
| 375 |
+
|
| 376 |
+
known_future = torch.zeros(1, pred_len, self.known_future_dim, dtype=torch.float32).to(self.device)
|
| 377 |
+
if 'known_future_features' in window:
|
| 378 |
+
kf = window['known_future_features']
|
| 379 |
+
for idx, cfg in enumerate(self.feature_calculator.known_future_defs):
|
| 380 |
+
name = cfg['name']
|
| 381 |
+
if name in kf:
|
| 382 |
+
series = kf[name][:pred_len]
|
| 383 |
+
if name == 'hour_of_day':
|
| 384 |
+
values = torch.tensor([float(h) / 23.0 for h in series], dtype=torch.float32)
|
| 385 |
+
elif name == 'day_of_week':
|
| 386 |
+
values = torch.tensor([float(d) / 6.0 for d in series], dtype=torch.float32)
|
| 387 |
+
else:
|
| 388 |
+
values = torch.tensor([float(v) for v in series], dtype=torch.float32)
|
| 389 |
+
known_future[0, :len(series), idx] = values
|
| 390 |
+
|
| 391 |
+
# 输入mask(假设所有数据都有效)
|
| 392 |
+
window_size = input_features.shape[1] # 从实际输入获取窗口大小
|
| 393 |
+
input_mask = torch.ones(1, window_size, len(self.features), dtype=torch.float32).to(self.device)
|
| 394 |
+
|
| 395 |
+
# 因子特征
|
| 396 |
+
factor_features = None
|
| 397 |
+
if window.get('factor_features'):
|
| 398 |
+
factor_names = self.factor_metadata.get('factor_names', [])
|
| 399 |
+
factor_dim = self.factor_metadata.get('factor_dim', 4)
|
| 400 |
+
factor_vectors = []
|
| 401 |
+
for name in factor_names:
|
| 402 |
+
vec = window['factor_features'].get(name, [0.0] * factor_dim)
|
| 403 |
+
factor_vectors.append(vec[:factor_dim])
|
| 404 |
+
if factor_vectors:
|
| 405 |
+
factor_features = torch.tensor(
|
| 406 |
+
factor_vectors,
|
| 407 |
+
dtype=torch.float32
|
| 408 |
+
).unsqueeze(0).to(self.device) # [1, num_factors, factor_dim]
|
| 409 |
+
|
| 410 |
+
return {
|
| 411 |
+
'x': input_features,
|
| 412 |
+
'delta_t': delta_t,
|
| 413 |
+
'static_features': static_features,
|
| 414 |
+
'known_future_features': known_future,
|
| 415 |
+
'mask': input_mask,
|
| 416 |
+
'factor_features': factor_features,
|
| 417 |
+
'return_contrastive_features': False,
|
| 418 |
+
'source': None,
|
| 419 |
+
'return_domain_features': False,
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
def batch_predict(
|
| 423 |
+
self,
|
| 424 |
+
windows: List[List[Dict]],
|
| 425 |
+
return_scores: bool = True
|
| 426 |
+
) -> List[Dict]:
|
| 427 |
+
"""
|
| 428 |
+
批量预测
|
| 429 |
+
|
| 430 |
+
参数:
|
| 431 |
+
windows: 窗口列表,每个窗口是一个数据点列表
|
| 432 |
+
return_scores: 是否返回异常分数
|
| 433 |
+
|
| 434 |
+
返回:
|
| 435 |
+
预测结果列表
|
| 436 |
+
"""
|
| 437 |
+
results = []
|
| 438 |
+
for window_data in windows:
|
| 439 |
+
result = self.predict(window_data, return_score=return_scores)
|
| 440 |
+
results.append(result)
|
| 441 |
+
return results
|
| 442 |
+
|
| 443 |
+
def update_threshold(self, threshold: float):
|
| 444 |
+
"""更新异常阈值"""
|
| 445 |
+
self.threshold = threshold
|
| 446 |
+
print(f"✅ 阈值已更新为: {threshold:.4f}")
|
| 447 |
+
|
| 448 |
+
def detect_realtime(
|
| 449 |
+
self,
|
| 450 |
+
data_points: List[Dict],
|
| 451 |
+
update_baseline: bool = True,
|
| 452 |
+
return_score: bool = True,
|
| 453 |
+
return_details: bool = False
|
| 454 |
+
) -> Dict:
|
| 455 |
+
"""
|
| 456 |
+
模式1:实时异常检测(短期数据)
|
| 457 |
+
|
| 458 |
+
参数:
|
| 459 |
+
data_points: 数据点列表(至少12个,1小时数据)
|
| 460 |
+
update_baseline: 是否自动更新基线(默认True)
|
| 461 |
+
return_score: 是否返回异常分数
|
| 462 |
+
return_details: 是否返回详细信息
|
| 463 |
+
|
| 464 |
+
返回:
|
| 465 |
+
检测结果字典
|
| 466 |
+
"""
|
| 467 |
+
# 复用现有的predict方法
|
| 468 |
+
result = self.predict(data_points, return_score=return_score, return_details=return_details)
|
| 469 |
+
|
| 470 |
+
# 可选:更新基线
|
| 471 |
+
if update_baseline and BaselineStorage:
|
| 472 |
+
try:
|
| 473 |
+
user_id = data_points[0].get('deviceId') or data_points[0].get('user_id')
|
| 474 |
+
if user_id:
|
| 475 |
+
# 获取HRV值(用于更新基线)
|
| 476 |
+
hrv_values = [dp.get('features', {}).get('hrv_rmssd') for dp in data_points]
|
| 477 |
+
hrv_values = [v for v in hrv_values if v is not None]
|
| 478 |
+
if hrv_values:
|
| 479 |
+
avg_hrv = np.mean(hrv_values)
|
| 480 |
+
# 这里需要初始化BaselineStorage(简化处理)
|
| 481 |
+
# 实际使用时应该在__init__中初始化
|
| 482 |
+
pass
|
| 483 |
+
except Exception as e:
|
| 484 |
+
print(f"⚠️ 更新基线失败: {e}")
|
| 485 |
+
|
| 486 |
+
return result
|
| 487 |
+
|
| 488 |
+
def detect_pattern(
|
| 489 |
+
self,
|
| 490 |
+
data_points: List[Dict],
|
| 491 |
+
days: Optional[int] = None,
|
| 492 |
+
min_duration_days: Optional[int] = None,
|
| 493 |
+
format_for_llm: bool = False,
|
| 494 |
+
window_size: Optional[int] = None
|
| 495 |
+
) -> Dict:
|
| 496 |
+
"""
|
| 497 |
+
模式2:异常模式聚合(多天数据)
|
| 498 |
+
|
| 499 |
+
参数:
|
| 500 |
+
data_points: 多天数据点列表
|
| 501 |
+
days: 天数(如果data_points是按天组织的)
|
| 502 |
+
min_duration_days: 最小持续天数(可选,默认从配置文件读取或3)
|
| 503 |
+
format_for_llm: 是否格式化输出给LLM
|
| 504 |
+
window_size: 窗口大小(可选,默认从配置文件读取或12)
|
| 505 |
+
|
| 506 |
+
返回:
|
| 507 |
+
异常模式聚合结果
|
| 508 |
+
"""
|
| 509 |
+
# 从配置文件读取参数(如果未提供)
|
| 510 |
+
if min_duration_days is None:
|
| 511 |
+
try:
|
| 512 |
+
config_path = Path(__file__).parent / "configs" / "detector_config.json"
|
| 513 |
+
if config_path.exists():
|
| 514 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 515 |
+
detector_config = json.load(f)
|
| 516 |
+
min_duration_days = detector_config.get("pattern_detection", {}).get("min_duration_days", 3)
|
| 517 |
+
else:
|
| 518 |
+
min_duration_days = 3 # 默认值
|
| 519 |
+
except Exception:
|
| 520 |
+
min_duration_days = 3 # 默认值
|
| 521 |
+
|
| 522 |
+
if window_size is None:
|
| 523 |
+
try:
|
| 524 |
+
config_path = Path(__file__).parent / "configs" / "detector_config.json"
|
| 525 |
+
if config_path.exists():
|
| 526 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 527 |
+
detector_config = json.load(f)
|
| 528 |
+
window_size = detector_config.get("detection", {}).get("window_size", 12)
|
| 529 |
+
else:
|
| 530 |
+
window_size = 12 # 默认值
|
| 531 |
+
except Exception:
|
| 532 |
+
window_size = 12 # 默认值
|
| 533 |
+
|
| 534 |
+
# 按天分组数据
|
| 535 |
+
daily_results = []
|
| 536 |
+
|
| 537 |
+
# 如果data_points是按天组织的(每个元素是一天的数据)
|
| 538 |
+
if days and isinstance(data_points[0], list):
|
| 539 |
+
# data_points是[[day1_data], [day2_data], ...]格式
|
| 540 |
+
for day_data in data_points:
|
| 541 |
+
if len(day_data) >= window_size: # 至少window_size个数据点
|
| 542 |
+
result = self.predict(day_data, return_score=True)
|
| 543 |
+
daily_results.append({
|
| 544 |
+
'date': day_data[0].get('timestamp', ''),
|
| 545 |
+
'anomaly_score': result.get('anomaly_score', 0.0),
|
| 546 |
+
'is_anomaly': result.get('is_anomaly', False),
|
| 547 |
+
'hrv_rmssd': np.mean([dp.get('features', {}).get('hrv_rmssd', 0)
|
| 548 |
+
for dp in day_data if dp.get('features', {}).get('hrv_rmssd')]),
|
| 549 |
+
'hr': np.mean([dp.get('features', {}).get('hr', 0)
|
| 550 |
+
for dp in day_data if dp.get('features', {}).get('hr')])
|
| 551 |
+
})
|
| 552 |
+
else:
|
| 553 |
+
# data_points是扁平列表,需要按天分组
|
| 554 |
+
# 简化处理:假设数据已经按时间排序
|
| 555 |
+
# 实际使用时应该按timestamp分组
|
| 556 |
+
pass
|
| 557 |
+
|
| 558 |
+
# 检测异常模式
|
| 559 |
+
pattern_result = self._detect_anomaly_pattern(daily_results, min_duration_days)
|
| 560 |
+
|
| 561 |
+
# 获取基线信息
|
| 562 |
+
user_id = data_points[0].get('deviceId') if isinstance(data_points[0], dict) else None
|
| 563 |
+
baseline_info = None
|
| 564 |
+
if user_id:
|
| 565 |
+
try:
|
| 566 |
+
baseline_info = self.feature_calculator.get_baseline_info(
|
| 567 |
+
user_id=user_id,
|
| 568 |
+
feature_name='hrv_rmssd'
|
| 569 |
+
)
|
| 570 |
+
except Exception as e:
|
| 571 |
+
print(f"⚠️ 获取基线信息失败: {e}")
|
| 572 |
+
|
| 573 |
+
# 获取相关指标
|
| 574 |
+
related_indicators = None
|
| 575 |
+
try:
|
| 576 |
+
if isinstance(data_points[0], dict):
|
| 577 |
+
related_indicators = self.feature_calculator.get_related_indicators(data_points)
|
| 578 |
+
except Exception as e:
|
| 579 |
+
print(f"⚠️ 获取相关指标���败: {e}")
|
| 580 |
+
|
| 581 |
+
result = {
|
| 582 |
+
'anomaly_pattern': pattern_result,
|
| 583 |
+
'baseline_info': baseline_info,
|
| 584 |
+
'related_indicators': related_indicators,
|
| 585 |
+
'daily_results': daily_results
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
# 格式化输出(如果需要)
|
| 589 |
+
if format_for_llm and AnomalyFormatter:
|
| 590 |
+
formatter = AnomalyFormatter()
|
| 591 |
+
result['formatted_for_llm'] = formatter.format_for_llm(
|
| 592 |
+
result,
|
| 593 |
+
baseline_info=baseline_info,
|
| 594 |
+
related_indicators=related_indicators,
|
| 595 |
+
daily_results=daily_results
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
return result
|
| 599 |
+
|
| 600 |
+
def _detect_anomaly_pattern(
|
| 601 |
+
self,
|
| 602 |
+
daily_results: List[Dict],
|
| 603 |
+
min_duration_days: int = 3
|
| 604 |
+
) -> Dict:
|
| 605 |
+
"""
|
| 606 |
+
检测异常模式(内部方法)
|
| 607 |
+
复用wearable_branch中的逻辑
|
| 608 |
+
"""
|
| 609 |
+
if not daily_results:
|
| 610 |
+
return {
|
| 611 |
+
'has_pattern': False,
|
| 612 |
+
'pattern_description': '无检测数据'
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
# 按日期排序
|
| 616 |
+
sorted_results = sorted(
|
| 617 |
+
daily_results,
|
| 618 |
+
key=lambda x: self._parse_date(x.get('date') or x.get('timestamp'))
|
| 619 |
+
)
|
| 620 |
+
|
| 621 |
+
# 提取异常日期和分数
|
| 622 |
+
anomaly_dates = []
|
| 623 |
+
anomaly_scores = []
|
| 624 |
+
|
| 625 |
+
for result in sorted_results:
|
| 626 |
+
date = self._parse_date(result.get('date') or result.get('timestamp'))
|
| 627 |
+
if date is None:
|
| 628 |
+
continue
|
| 629 |
+
|
| 630 |
+
date_str = date.strftime('%Y-%m-%d') if hasattr(date, 'strftime') else str(date)
|
| 631 |
+
score = result.get('anomaly_score', 0.0)
|
| 632 |
+
is_anomaly = result.get('is_anomaly', score >= self.threshold)
|
| 633 |
+
|
| 634 |
+
if is_anomaly:
|
| 635 |
+
anomaly_dates.append(date_str)
|
| 636 |
+
anomaly_scores.append(score)
|
| 637 |
+
|
| 638 |
+
# 判断是否存在异常模式
|
| 639 |
+
if len(anomaly_dates) < min_duration_days:
|
| 640 |
+
return {
|
| 641 |
+
'has_pattern': False,
|
| 642 |
+
'duration_days': len(anomaly_dates),
|
| 643 |
+
'anomaly_dates': anomaly_dates,
|
| 644 |
+
'anomaly_scores': anomaly_scores,
|
| 645 |
+
'pattern_description': f'异常仅持续{len(anomaly_dates)}天,未达到最小持续天数{min_duration_days}天'
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
# 计算趋势
|
| 649 |
+
trend = self._calculate_trend(anomaly_scores)
|
| 650 |
+
|
| 651 |
+
# 计算统计信息
|
| 652 |
+
max_score = max(anomaly_scores) if anomaly_scores else 0.0
|
| 653 |
+
min_score = min(anomaly_scores) if anomaly_scores else 0.0
|
| 654 |
+
avg_score = sum(anomaly_scores) / len(anomaly_scores) if anomaly_scores else 0.0
|
| 655 |
+
|
| 656 |
+
# 生成模式描述
|
| 657 |
+
trend_desc = {
|
| 658 |
+
'worsening': '持续恶化',
|
| 659 |
+
'stable': '稳定异常',
|
| 660 |
+
'improving': '逐渐改善'
|
| 661 |
+
}.get(trend, '未知趋势')
|
| 662 |
+
|
| 663 |
+
pattern_description = (
|
| 664 |
+
f"检测到持续{len(anomaly_dates)}天的异常模式,"
|
| 665 |
+
f"趋势:{trend_desc},"
|
| 666 |
+
f"异常分数范围:{min_score:.4f} - {max_score:.4f},"
|
| 667 |
+
f"平均异常分数:{avg_score:.4f}"
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
return {
|
| 671 |
+
'has_pattern': True,
|
| 672 |
+
'anomaly_type': 'continuous_anomaly',
|
| 673 |
+
'duration_days': len(anomaly_dates),
|
| 674 |
+
'trend': trend,
|
| 675 |
+
'anomaly_scores': anomaly_scores,
|
| 676 |
+
'anomaly_dates': anomaly_dates,
|
| 677 |
+
'pattern_description': pattern_description,
|
| 678 |
+
'first_anomaly_date': anomaly_dates[0] if anomaly_dates else '',
|
| 679 |
+
'last_anomaly_date': anomaly_dates[-1] if anomaly_dates else '',
|
| 680 |
+
'max_score': max_score,
|
| 681 |
+
'min_score': min_score,
|
| 682 |
+
'avg_score': avg_score
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
def _parse_date(self, date_input):
|
| 686 |
+
"""解析日期"""
|
| 687 |
+
if date_input is None:
|
| 688 |
+
return None
|
| 689 |
+
if isinstance(date_input, datetime):
|
| 690 |
+
return date_input
|
| 691 |
+
if isinstance(date_input, str):
|
| 692 |
+
try:
|
| 693 |
+
return pd.to_datetime(date_input)
|
| 694 |
+
except:
|
| 695 |
+
return None
|
| 696 |
+
return None
|
| 697 |
+
|
| 698 |
+
def _calculate_trend(self, scores: List[float]) -> str:
|
| 699 |
+
"""计算趋势"""
|
| 700 |
+
if len(scores) < 2:
|
| 701 |
+
return 'stable'
|
| 702 |
+
|
| 703 |
+
# 简单线性回归判断趋势
|
| 704 |
+
n = len(scores)
|
| 705 |
+
x = list(range(n))
|
| 706 |
+
y = scores
|
| 707 |
+
|
| 708 |
+
sum_x = sum(x)
|
| 709 |
+
sum_y = sum(y)
|
| 710 |
+
sum_xy = sum(x[i] * y[i] for i in range(n))
|
| 711 |
+
sum_x2 = sum(x[i] ** 2 for i in range(n))
|
| 712 |
+
|
| 713 |
+
slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x ** 2) if (n * sum_x2 - sum_x ** 2) != 0 else 0
|
| 714 |
+
|
| 715 |
+
if slope > 0.01:
|
| 716 |
+
return 'worsening'
|
| 717 |
+
elif slope < -0.01:
|
| 718 |
+
return 'improving'
|
| 719 |
+
else:
|
| 720 |
+
return 'stable'
|
| 721 |
+
|
| 722 |
+
|
| 723 |
+
def load_detector(model_dir: Union[str, Path], **kwargs) -> WearableAnomalyDetector:
|
| 724 |
+
"""
|
| 725 |
+
便捷函数:加载异常检测器
|
| 726 |
+
|
| 727 |
+
参数:
|
| 728 |
+
model_dir: 模型目录路径
|
| 729 |
+
**kwargs: 其他参数(device, threshold等)
|
| 730 |
+
|
| 731 |
+
返回:
|
| 732 |
+
WearableAnomalyDetector实例
|
| 733 |
+
"""
|
| 734 |
+
return WearableAnomalyDetector(model_dir, **kwargs)
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
if __name__ == '__main__':
|
| 738 |
+
# 使用示例
|
| 739 |
+
print("=" * 80)
|
| 740 |
+
print("Wearable健康异常检测器 - 使用示例")
|
| 741 |
+
print("=" * 80)
|
| 742 |
+
|
| 743 |
+
# 加载模型
|
| 744 |
+
model_dir = Path(__file__).parent / 'checkpoints' / 'phase2' / 'exp_factor_balanced'
|
| 745 |
+
detector = load_detector(model_dir)
|
| 746 |
+
|
| 747 |
+
# 模拟数据点(实际使用时应该从实时数据流获取)
|
| 748 |
+
print("\n模拟数据点...")
|
| 749 |
+
data_points = []
|
| 750 |
+
base_time = datetime.now()
|
| 751 |
+
|
| 752 |
+
# 使用一个真实的deviceId(如果静态特征表存在)
|
| 753 |
+
# 或者提供一个完整的静态特征示例
|
| 754 |
+
example_device_id = None
|
| 755 |
+
static_dict = detector.feature_calculator.static_features_dict
|
| 756 |
+
if static_dict:
|
| 757 |
+
example_device_id = list(static_dict.keys())[0]
|
| 758 |
+
print(f" 使用示例用户ID: {example_device_id}")
|
| 759 |
+
|
| 760 |
+
for i in range(12):
|
| 761 |
+
data_point = {
|
| 762 |
+
'timestamp': base_time.replace(minute=i*5),
|
| 763 |
+
'deviceId': example_device_id, # 提供deviceId以便加载完整静态特征
|
| 764 |
+
'features': {
|
| 765 |
+
'hr': 70.0 + np.random.randn() * 5,
|
| 766 |
+
'hrv_rmssd': 30.0 + np.random.randn() * 3,
|
| 767 |
+
# ... 其他特征(简化示例,实际需要所有36个特征)
|
| 768 |
+
},
|
| 769 |
+
'static_features': {
|
| 770 |
+
# 可以只提供部分特征,系统会自动从静态特征表补充
|
| 771 |
+
# 或者不提供,完全从静态特征表加载
|
| 772 |
+
}
|
| 773 |
+
}
|
| 774 |
+
data_points.append(data_point)
|
| 775 |
+
|
| 776 |
+
# 预测
|
| 777 |
+
result = detector.predict(data_points, return_score=True, return_details=True)
|
| 778 |
+
|
| 779 |
+
print(f"\n预测结果:")
|
| 780 |
+
print(f" - 是否异常: {result['is_anomaly']}")
|
| 781 |
+
print(f" - 异常分数: {result['anomaly_score']:.4f}")
|
| 782 |
+
print(f" - 阈值: {result['threshold']:.4f}")
|
| 783 |
+
if 'details' in result:
|
| 784 |
+
print(f" - 详细信息: {result['details']}")
|
| 785 |
+
|