Spaces:
Running
Running
feat: Add data source configuration and implement yfinance integration for US stock indices
Browse files- Created a new configuration file `data_source_config.py` to manage data source versioning between yfinance and akshare.
- Implemented `preprocess_yfinance.py` to initialize stock index data using yfinance, including error handling and data formatting.
- Developed `us_stock_yfinance.py` to fetch stock data, manage stock indices, and handle real-time price retrieval with caching and retry mechanisms.
- Added functions for processing stock history and extracting sentiment scores from news text.
- Introduced asynchronous data fetching for improved performance and responsiveness.
- data_source_config.py +30 -0
- preprocess.py +131 -4
- preprocess_yfinance.py +248 -0
- requirements.txt +21 -3
- us_stock.py +254 -62
- us_stock_yfinance.py +577 -0
data_source_config.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 配置文件:控制数据源版本切换
|
| 2 |
+
# ================================
|
| 3 |
+
# 数据源版本切换配置
|
| 4 |
+
# ================================
|
| 5 |
+
|
| 6 |
+
# 设置为 True 使用新版 yfinance 实现
|
| 7 |
+
# 设置为 False 使用旧版 akshare 实现
|
| 8 |
+
USE_YFINANCE_VERSION = True
|
| 9 |
+
|
| 10 |
+
# 你可以在这里快速切换版本:
|
| 11 |
+
# USE_YFINANCE_VERSION = False # 切换到 akshare
|
| 12 |
+
# USE_YFINANCE_VERSION = True # 切换到 yfinance
|
| 13 |
+
|
| 14 |
+
# ================================
|
| 15 |
+
# 其他配置选项
|
| 16 |
+
# ================================
|
| 17 |
+
|
| 18 |
+
# 数据缓存时间(分钟)
|
| 19 |
+
PRICE_CACHE_MINUTES = 30
|
| 20 |
+
|
| 21 |
+
# API 超时时间(秒)
|
| 22 |
+
API_TIMEOUT_SECONDS = 30
|
| 23 |
+
|
| 24 |
+
# 最大重试次数
|
| 25 |
+
MAX_RETRY_ATTEMPTS = 3
|
| 26 |
+
|
| 27 |
+
# 调试模式
|
| 28 |
+
DEBUG_MODE = False
|
| 29 |
+
|
| 30 |
+
print(f"📊 数据源配置: {'yfinance (新版)' if USE_YFINANCE_VERSION else 'akshare (旧版)'}")
|
preprocess.py
CHANGED
|
@@ -1,3 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
import sys
|
| 3 |
import os
|
|
@@ -18,6 +52,14 @@ from transformers import pipeline
|
|
| 18 |
|
| 19 |
# 还需要导入 pickle 模块(如果你在代码的其他部分使用了它来处理序列化/反序列化)
|
| 20 |
import pickle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
from gensim.models import KeyedVectors
|
| 22 |
import akshare as ak
|
| 23 |
|
|
@@ -81,10 +123,95 @@ def get_tokenizer_and_model(model_type="one"):
|
|
| 81 |
|
| 82 |
return _models[model_type]
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
|
| 90 |
class LazyWord2Vec:
|
|
|
|
| 1 |
+
# ================================
|
| 2 |
+
# 版本切换开关 - 从配置文件导入
|
| 3 |
+
# ================================
|
| 4 |
+
from data_source_config import USE_YFINANCE_VERSION
|
| 5 |
+
|
| 6 |
+
import re
|
| 7 |
+
import sys
|
| 8 |
+
import os
|
| 9 |
+
import trace
|
| 10 |
+
import traceback
|
| 11 |
+
from typing import final
|
| 12 |
+
import numpy as np
|
| 13 |
+
from collections import defaultdict
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import time
|
| 16 |
+
|
| 17 |
+
# 如果使用 spaCy 进行 NLP 处理
|
| 18 |
+
from regex import R
|
| 19 |
+
import spacy
|
| 20 |
+
|
| 21 |
+
# 如果使用某种情感分析工具,比如 Hugging Face 的模型
|
| 22 |
+
from transformers import pipeline
|
| 23 |
+
|
| 24 |
+
# 还需要导入 pickle 模块(如果你在代码的其他部分使用了它来处理序列化/反序列化)
|
| 25 |
+
import pickle
|
| 26 |
+
|
| 27 |
+
# 根据开关导入不同的模块
|
| 28 |
+
if USE_YFINANCE_VERSION:
|
| 29 |
+
import yfinance as yf
|
| 30 |
+
print("🔄 Using yfinance version in preprocess (new)")
|
| 31 |
+
else:
|
| 32 |
+
import akshare as ak
|
| 33 |
+
print("🔄 Using akshare version in preprocess (old)")
|
| 34 |
+
|
| 35 |
import re
|
| 36 |
import sys
|
| 37 |
import os
|
|
|
|
| 52 |
|
| 53 |
# 还需要导入 pickle 模块(如果你在代码的其他部分使用了它来处理序列化/反序列化)
|
| 54 |
import pickle
|
| 55 |
+
|
| 56 |
+
# 根据开关导入不同的模块
|
| 57 |
+
if USE_YFINANCE_VERSION:
|
| 58 |
+
import yfinance as yf
|
| 59 |
+
print("🔄 Using yfinance version in preprocess (new)")
|
| 60 |
+
else:
|
| 61 |
+
import akshare as ak
|
| 62 |
+
print("🔄 Using akshare version in preprocess (old)")
|
| 63 |
from gensim.models import KeyedVectors
|
| 64 |
import akshare as ak
|
| 65 |
|
|
|
|
| 123 |
|
| 124 |
return _models[model_type]
|
| 125 |
|
| 126 |
+
# 初始化股票指数数据,根据开关选择不同的实现
|
| 127 |
+
def init_stock_indices():
|
| 128 |
+
"""根据版本开关初始化股票指数数据"""
|
| 129 |
+
global index_us_stock_index_INX, index_us_stock_index_DJI, index_us_stock_index_IXIC, index_us_stock_index_NDX
|
| 130 |
+
|
| 131 |
+
if USE_YFINANCE_VERSION:
|
| 132 |
+
print("Initializing stock indices using yfinance...")
|
| 133 |
+
try:
|
| 134 |
+
from datetime import datetime, timedelta
|
| 135 |
+
|
| 136 |
+
# 计算日期范围
|
| 137 |
+
end_date = datetime.now()
|
| 138 |
+
start_date = end_date - timedelta(weeks=8)
|
| 139 |
+
|
| 140 |
+
# 定义指数映射
|
| 141 |
+
indices = {
|
| 142 |
+
'^GSPC': 'INX', # S&P 500
|
| 143 |
+
'^DJI': 'DJI', # Dow Jones
|
| 144 |
+
'^IXIC': 'IXIC', # NASDAQ Composite
|
| 145 |
+
'^NDX': 'NDX' # NASDAQ 100
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
results = {}
|
| 149 |
+
|
| 150 |
+
for yf_symbol, var_name in indices.items():
|
| 151 |
+
try:
|
| 152 |
+
ticker = yf.Ticker(yf_symbol)
|
| 153 |
+
hist_data = ticker.history(start=start_date, end=end_date)
|
| 154 |
+
|
| 155 |
+
if not hist_data.empty:
|
| 156 |
+
# 转换为与akshare相同的格式
|
| 157 |
+
formatted_data = pd.DataFrame({
|
| 158 |
+
'date': hist_data.index.strftime('%Y-%m-%d'),
|
| 159 |
+
'开盘': hist_data['Open'].values,
|
| 160 |
+
'收盘': hist_data['Close'].values,
|
| 161 |
+
'最高': hist_data['High'].values,
|
| 162 |
+
'最低': hist_data['Low'].values,
|
| 163 |
+
'成交量': hist_data['Volume'].values,
|
| 164 |
+
'成交额': (hist_data['Close'] * hist_data['Volume']).values
|
| 165 |
+
})
|
| 166 |
+
results[var_name] = formatted_data
|
| 167 |
+
else:
|
| 168 |
+
results[var_name] = pd.DataFrame()
|
| 169 |
+
|
| 170 |
+
except Exception as e:
|
| 171 |
+
print(f"Error fetching {yf_symbol}: {e}")
|
| 172 |
+
results[var_name] = pd.DataFrame()
|
| 173 |
+
|
| 174 |
+
# 设置全局变量
|
| 175 |
+
index_us_stock_index_INX = results.get('INX', pd.DataFrame())
|
| 176 |
+
index_us_stock_index_DJI = results.get('DJI', pd.DataFrame())
|
| 177 |
+
index_us_stock_index_IXIC = results.get('IXIC', pd.DataFrame())
|
| 178 |
+
index_us_stock_index_NDX = results.get('NDX', pd.DataFrame())
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
print(f"Error initializing indices with yfinance: {e}")
|
| 182 |
+
# 设置空DataFrame作为fallback
|
| 183 |
+
index_us_stock_index_INX = pd.DataFrame()
|
| 184 |
+
index_us_stock_index_DJI = pd.DataFrame()
|
| 185 |
+
index_us_stock_index_IXIC = pd.DataFrame()
|
| 186 |
+
index_us_stock_index_NDX = pd.DataFrame()
|
| 187 |
+
else:
|
| 188 |
+
print("Initializing stock indices using akshare...")
|
| 189 |
+
try:
|
| 190 |
+
index_us_stock_index_INX = ak.index_us_stock_sina(symbol=".INX")
|
| 191 |
+
index_us_stock_index_DJI = ak.index_us_stock_sina(symbol=".DJI")
|
| 192 |
+
index_us_stock_index_IXIC = ak.index_us_stock_sina(symbol=".IXIC")
|
| 193 |
+
index_us_stock_index_NDX = ak.index_us_stock_sina(symbol=".NDX")
|
| 194 |
+
except Exception as e:
|
| 195 |
+
print(f"Error initializing indices with akshare: {e}")
|
| 196 |
+
index_us_stock_index_INX = pd.DataFrame()
|
| 197 |
+
index_us_stock_index_DJI = pd.DataFrame()
|
| 198 |
+
index_us_stock_index_IXIC = pd.DataFrame()
|
| 199 |
+
index_us_stock_index_NDX = pd.DataFrame()
|
| 200 |
+
|
| 201 |
+
# 延迟初始化索引数据
|
| 202 |
+
import threading
|
| 203 |
+
def delayed_init():
|
| 204 |
+
time.sleep(5) # 等待5秒
|
| 205 |
+
init_stock_indices()
|
| 206 |
+
|
| 207 |
+
init_thread = threading.Thread(target=delayed_init, daemon=True)
|
| 208 |
+
init_thread.start()
|
| 209 |
+
|
| 210 |
+
# 设置初始值为None,等待延迟初始化
|
| 211 |
+
index_us_stock_index_INX = None
|
| 212 |
+
index_us_stock_index_DJI = None
|
| 213 |
+
index_us_stock_index_IXIC = None
|
| 214 |
+
index_us_stock_index_NDX = None
|
| 215 |
|
| 216 |
|
| 217 |
class LazyWord2Vec:
|
preprocess_yfinance.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from datetime import datetime, timedelta, date
|
| 3 |
+
import numpy as np
|
| 4 |
+
import asyncio
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
import yfinance as yf
|
| 8 |
+
|
| 9 |
+
# 索引变量初始化
|
| 10 |
+
# 以下变量在外部模块中定义并在运行时更新
|
| 11 |
+
index_us_stock_index_INX = None
|
| 12 |
+
index_us_stock_index_DJI = None
|
| 13 |
+
index_us_stock_index_IXIC = None
|
| 14 |
+
index_us_stock_index_NDX = None
|
| 15 |
+
|
| 16 |
+
def init_stock_index_data():
|
| 17 |
+
"""初始化股票指数数据,使用 yfinance"""
|
| 18 |
+
global index_us_stock_index_INX, index_us_stock_index_DJI, index_us_stock_index_IXIC, index_us_stock_index_NDX
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
# 计算日期范围
|
| 22 |
+
end_date = datetime.now()
|
| 23 |
+
start_date = end_date - timedelta(weeks=8)
|
| 24 |
+
|
| 25 |
+
# 定义指数映射
|
| 26 |
+
indices = {
|
| 27 |
+
'^GSPC': 'INX', # S&P 500
|
| 28 |
+
'^DJI': 'DJI', # Dow Jones
|
| 29 |
+
'^IXIC': 'IXIC', # NASDAQ Composite
|
| 30 |
+
'^NDX': 'NDX' # NASDAQ 100
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
results = {}
|
| 34 |
+
|
| 35 |
+
for yf_symbol, var_name in indices.items():
|
| 36 |
+
try:
|
| 37 |
+
print(f"Fetching {var_name} data using yfinance...")
|
| 38 |
+
ticker = yf.Ticker(yf_symbol)
|
| 39 |
+
hist_data = ticker.history(start=start_date, end=end_date)
|
| 40 |
+
|
| 41 |
+
if not hist_data.empty:
|
| 42 |
+
# 转换为与原来相同的格式
|
| 43 |
+
formatted_data = pd.DataFrame({
|
| 44 |
+
'date': hist_data.index.strftime('%Y-%m-%d'),
|
| 45 |
+
'开盘': hist_data['Open'].values,
|
| 46 |
+
'收盘': hist_data['Close'].values,
|
| 47 |
+
'最高': hist_data['High'].values,
|
| 48 |
+
'最低': hist_data['Low'].values,
|
| 49 |
+
'成交量': hist_data['Volume'].values,
|
| 50 |
+
'成交额': (hist_data['Close'] * hist_data['Volume']).values
|
| 51 |
+
})
|
| 52 |
+
results[var_name] = formatted_data
|
| 53 |
+
print(f"Successfully fetched {var_name}: {len(formatted_data)} records")
|
| 54 |
+
else:
|
| 55 |
+
print(f"No data for {yf_symbol}")
|
| 56 |
+
results[var_name] = pd.DataFrame()
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Error fetching {yf_symbol}: {e}")
|
| 60 |
+
results[var_name] = pd.DataFrame()
|
| 61 |
+
|
| 62 |
+
# 设置全局变量
|
| 63 |
+
index_us_stock_index_INX = results.get('INX', pd.DataFrame())
|
| 64 |
+
index_us_stock_index_DJI = results.get('DJI', pd.DataFrame())
|
| 65 |
+
index_us_stock_index_IXIC = results.get('IXIC', pd.DataFrame())
|
| 66 |
+
index_us_stock_index_NDX = results.get('NDX', pd.DataFrame())
|
| 67 |
+
|
| 68 |
+
print("Stock indices initialized successfully using yfinance")
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"Error initializing stock indices: {e}")
|
| 72 |
+
# 设置空的DataFrame作为fallback
|
| 73 |
+
index_us_stock_index_INX = pd.DataFrame()
|
| 74 |
+
index_us_stock_index_DJI = pd.DataFrame()
|
| 75 |
+
index_us_stock_index_IXIC = pd.DataFrame()
|
| 76 |
+
index_us_stock_index_NDX = pd.DataFrame()
|
| 77 |
+
|
| 78 |
+
def delayed_init_indices():
|
| 79 |
+
"""延迟初始化指数数据"""
|
| 80 |
+
time.sleep(5) # 等待5秒后开始初始化
|
| 81 |
+
init_stock_index_data()
|
| 82 |
+
|
| 83 |
+
# 启动延迟初始化
|
| 84 |
+
init_thread = threading.Thread(target=delayed_init_indices, daemon=True)
|
| 85 |
+
init_thread.start()
|
| 86 |
+
|
| 87 |
+
# 下面是原有的其他函数,保持不变...
|
| 88 |
+
|
| 89 |
+
# 新的文本时间处理函数
|
| 90 |
+
def parse_time(time_str):
|
| 91 |
+
"""解析时间字符串并返回规范化的日期格式"""
|
| 92 |
+
if not time_str:
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
today = date.today()
|
| 96 |
+
|
| 97 |
+
# 处理相对时间表达
|
| 98 |
+
if '昨天' in time_str or '昨日' in time_str:
|
| 99 |
+
return (today - timedelta(days=1)).strftime('%Y-%m-%d')
|
| 100 |
+
elif '今天' in time_str or '今日' in time_str:
|
| 101 |
+
return today.strftime('%Y-%m-%d')
|
| 102 |
+
elif '前天' in time_str:
|
| 103 |
+
return (today - timedelta(days=2)).strftime('%Y-%m-%d')
|
| 104 |
+
elif '上周' in time_str:
|
| 105 |
+
return (today - timedelta(weeks=1)).strftime('%Y-%m-%d')
|
| 106 |
+
elif '上月' in time_str:
|
| 107 |
+
return (today - timedelta(days=30)).strftime('%Y-%m-%d')
|
| 108 |
+
|
| 109 |
+
# 处理具体日期格式
|
| 110 |
+
try:
|
| 111 |
+
# 尝试多种日期格式
|
| 112 |
+
formats = ['%Y-%m-%d', '%Y/%m/%d', '%m/%d/%Y', '%m-%d-%Y', '%d/%m/%Y', '%d-%m-%Y']
|
| 113 |
+
for fmt in formats:
|
| 114 |
+
try:
|
| 115 |
+
parsed_date = datetime.strptime(time_str, fmt).date()
|
| 116 |
+
return parsed_date.strftime('%Y-%m-%d')
|
| 117 |
+
except ValueError:
|
| 118 |
+
continue
|
| 119 |
+
except:
|
| 120 |
+
pass
|
| 121 |
+
|
| 122 |
+
# 如果无法解析,返回今天的日期
|
| 123 |
+
return today.strftime('%Y-%m-%d')
|
| 124 |
+
|
| 125 |
+
# 原有的其他函数...
|
| 126 |
+
def preprocess_news_text(text):
|
| 127 |
+
"""预处理新闻文本"""
|
| 128 |
+
# 移除多余的空白字符
|
| 129 |
+
text = ' '.join(text.split())
|
| 130 |
+
# 转换为小写
|
| 131 |
+
text = text.lower()
|
| 132 |
+
return text
|
| 133 |
+
|
| 134 |
+
def extract_sentiment_score(text):
|
| 135 |
+
"""提取情感分数的占位符函数"""
|
| 136 |
+
# 这里可以集成实际的���感分析模型
|
| 137 |
+
# 目前返回一个基于文本长度的简单分数
|
| 138 |
+
if not text:
|
| 139 |
+
return 0.0
|
| 140 |
+
|
| 141 |
+
positive_words = ['good', 'great', 'excellent', 'positive', 'growth', 'profit', 'gain', 'rise', 'up']
|
| 142 |
+
negative_words = ['bad', 'poor', 'negative', 'loss', 'decline', 'fall', 'down', 'crash']
|
| 143 |
+
|
| 144 |
+
text_lower = text.lower()
|
| 145 |
+
positive_count = sum(1 for word in positive_words if word in text_lower)
|
| 146 |
+
negative_count = sum(1 for word in negative_words if word in text_lower)
|
| 147 |
+
|
| 148 |
+
if positive_count > negative_count:
|
| 149 |
+
return min(1.0, positive_count * 0.2)
|
| 150 |
+
elif negative_count > positive_count:
|
| 151 |
+
return max(-1.0, -negative_count * 0.2)
|
| 152 |
+
else:
|
| 153 |
+
return 0.0
|
| 154 |
+
|
| 155 |
+
def calculate_technical_indicators(price_data):
|
| 156 |
+
"""计算技术指标"""
|
| 157 |
+
if price_data.empty:
|
| 158 |
+
return {}
|
| 159 |
+
|
| 160 |
+
close_prices = price_data['close']
|
| 161 |
+
|
| 162 |
+
# 简单移动平均线
|
| 163 |
+
sma_5 = close_prices.rolling(window=5).mean().iloc[-1] if len(close_prices) >= 5 else close_prices.iloc[-1]
|
| 164 |
+
sma_10 = close_prices.rolling(window=10).mean().iloc[-1] if len(close_prices) >= 10 else close_prices.iloc[-1]
|
| 165 |
+
|
| 166 |
+
# RSI (相对强弱指数)
|
| 167 |
+
def calculate_rsi(prices, window=14):
|
| 168 |
+
if len(prices) < window:
|
| 169 |
+
return 50.0 # 默认值
|
| 170 |
+
|
| 171 |
+
delta = prices.diff()
|
| 172 |
+
gain = delta.where(delta > 0, 0)
|
| 173 |
+
loss = -delta.where(delta < 0, 0)
|
| 174 |
+
|
| 175 |
+
avg_gain = gain.rolling(window=window).mean()
|
| 176 |
+
avg_loss = loss.rolling(window=window).mean()
|
| 177 |
+
|
| 178 |
+
rs = avg_gain / avg_loss
|
| 179 |
+
rsi = 100 - (100 / (1 + rs))
|
| 180 |
+
return rsi.iloc[-1]
|
| 181 |
+
|
| 182 |
+
rsi = calculate_rsi(close_prices)
|
| 183 |
+
|
| 184 |
+
# 价格变化百分比
|
| 185 |
+
price_change = ((close_prices.iloc[-1] - close_prices.iloc[0]) / close_prices.iloc[0] * 100) if len(close_prices) > 1 else 0
|
| 186 |
+
|
| 187 |
+
return {
|
| 188 |
+
'sma_5': sma_5,
|
| 189 |
+
'sma_10': sma_10,
|
| 190 |
+
'rsi': rsi,
|
| 191 |
+
'price_change_pct': price_change
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
def normalize_features(features_dict):
|
| 195 |
+
"""标准化特征值"""
|
| 196 |
+
normalized = {}
|
| 197 |
+
|
| 198 |
+
for key, value in features_dict.items():
|
| 199 |
+
if isinstance(value, (int, float)) and not pd.isna(value):
|
| 200 |
+
# 简单的min-max标准化到[-1, 1]范围
|
| 201 |
+
if key == 'rsi':
|
| 202 |
+
normalized[key] = (value - 50) / 50 # RSI标准化
|
| 203 |
+
elif key.endswith('_pct'):
|
| 204 |
+
normalized[key] = np.tanh(value / 100) # 百分比变化标准化
|
| 205 |
+
else:
|
| 206 |
+
normalized[key] = np.tanh(value / 1000) # 其他数值标准化
|
| 207 |
+
else:
|
| 208 |
+
normalized[key] = 0.0
|
| 209 |
+
|
| 210 |
+
return normalized
|
| 211 |
+
|
| 212 |
+
# 主要的预处理函数
|
| 213 |
+
def preprocess_for_model(news_text, stock_symbol, news_date):
|
| 214 |
+
"""为模型预处理数据"""
|
| 215 |
+
try:
|
| 216 |
+
# 预处理文本
|
| 217 |
+
processed_text = preprocess_news_text(news_text)
|
| 218 |
+
|
| 219 |
+
# 解析日期
|
| 220 |
+
parsed_date = parse_time(news_date)
|
| 221 |
+
|
| 222 |
+
# 提取情感分数
|
| 223 |
+
sentiment_score = extract_sentiment_score(processed_text)
|
| 224 |
+
|
| 225 |
+
# 这里应该调用股票数据获取函数
|
| 226 |
+
# 由于需要避免循环导入,这里只返回基本特征
|
| 227 |
+
|
| 228 |
+
return {
|
| 229 |
+
'processed_text': processed_text,
|
| 230 |
+
'sentiment_score': sentiment_score,
|
| 231 |
+
'news_date': parsed_date,
|
| 232 |
+
'stock_symbol': stock_symbol
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
except Exception as e:
|
| 236 |
+
print(f"Error in preprocess_for_model: {e}")
|
| 237 |
+
return {
|
| 238 |
+
'processed_text': news_text,
|
| 239 |
+
'sentiment_score': 0.0,
|
| 240 |
+
'news_date': date.today().strftime('%Y-%m-%d'),
|
| 241 |
+
'stock_symbol': stock_symbol
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
if __name__ == "__main__":
|
| 245 |
+
# 测试函数
|
| 246 |
+
test_text = "Apple Inc. reported strong quarterly earnings, beating expectations."
|
| 247 |
+
result = preprocess_for_model(test_text, "AAPL", "2024-02-14")
|
| 248 |
+
print(f"Preprocessing result: {result}")
|
requirements.txt
CHANGED
|
@@ -3,12 +3,12 @@ blis==0.7.11
|
|
| 3 |
spacy==3.7.5
|
| 4 |
gensim
|
| 5 |
numpy
|
| 6 |
-
gensim
|
| 7 |
fastapi
|
| 8 |
requests
|
| 9 |
sentencepiece
|
|
|
|
| 10 |
transformers
|
| 11 |
-
uvicorn[standard]==0.
|
| 12 |
keras==3.6.0
|
| 13 |
yfinance==0.2.65
|
| 14 |
jsonpath==0.82.2
|
|
@@ -17,5 +17,23 @@ pydantic==2.9.2
|
|
| 17 |
pydantic_core==2.23.4
|
| 18 |
nltk
|
| 19 |
gunicorn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
--only-binary torch
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
spacy==3.7.5
|
| 4 |
gensim
|
| 5 |
numpy
|
|
|
|
| 6 |
fastapi
|
| 7 |
requests
|
| 8 |
sentencepiece
|
| 9 |
+
# 建议锁个合理范围,避免上游突发大版本: transformers>=4.41,<4.45
|
| 10 |
transformers
|
| 11 |
+
uvicorn[standard]==0.35.0
|
| 12 |
keras==3.6.0
|
| 13 |
yfinance==0.2.65
|
| 14 |
jsonpath==0.82.2
|
|
|
|
| 17 |
pydantic_core==2.23.4
|
| 18 |
nltk
|
| 19 |
gunicorn
|
| 20 |
+
|
| 21 |
+
# ---------------- 关键约束:解决你看到的冲突 ----------------
|
| 22 |
+
# 1) uvicorn[standard] 会安装 websockets;为兼容 gradio-client 等生态,限制 <13
|
| 23 |
+
# websockets>=10,<13
|
| 24 |
+
|
| 25 |
+
# 2) TensorFlow 2.16.2 需要 protobuf < 5(建议锁到 4.25.x)
|
| 26 |
+
protobuf>=4.25.0,<5
|
| 27 |
+
|
| 28 |
+
# 3) 有些依赖会把 grpcio-status 拉到 1.63+(它要求 protobuf>=5.26.1)→ 与 TF 冲突
|
| 29 |
+
grpcio-status<1.63
|
| 30 |
+
|
| 31 |
+
# 4) 避免在某些平台触发 PyTorch 源码编译(非常耗内存/时间)
|
| 32 |
--only-binary torch
|
| 33 |
+
|
| 34 |
+
# ---------------- PyTorch:按平台安装不同版本 ----------------
|
| 35 |
+
# Intel Mac(macOS x86_64)只到 2.2.2
|
| 36 |
+
torch==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64" and python_version < "3.13"
|
| 37 |
+
|
| 38 |
+
# Linux / Windows / macOS arm64 → 2.8.0(注意也限制 Python < 3.13)
|
| 39 |
+
torch==2.8.0; (platform_system == "Linux" or platform_system == "Windows" or (platform_system == "Darwin" and platform_machine == "arm64")) and python_version < "3.13"
|
us_stock.py
CHANGED
|
@@ -1,18 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
import re
|
| 3 |
-
import akshare as ak
|
| 4 |
import pandas as pd
|
| 5 |
from datetime import datetime, timedelta
|
| 6 |
import time # 导入标准库的 time 模块
|
| 7 |
|
| 8 |
import os
|
| 9 |
-
|
| 10 |
import requests
|
| 11 |
import threading
|
| 12 |
import asyncio
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
| 18 |
|
|
@@ -33,59 +41,154 @@ nasdaq_composite_stocks = pd.read_csv(nasdaq_composite_path)
|
|
| 33 |
|
| 34 |
|
| 35 |
def fetch_stock_us_spot_data_with_retries():
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
|
| 59 |
async def fetch_stock_us_spot_data_with_retries_async():
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
max_retries = 2 # 最多重试2次
|
| 63 |
-
|
| 64 |
-
for attempt in range(max_retries + 1):
|
| 65 |
try:
|
| 66 |
-
|
| 67 |
-
symbols = await asyncio.wait_for(
|
| 68 |
-
asyncio.to_thread(ak.stock_us_spot_em),
|
| 69 |
-
timeout=30.0
|
| 70 |
-
)
|
| 71 |
-
return symbols
|
| 72 |
-
except asyncio.TimeoutError:
|
| 73 |
-
print(f"Timeout error fetching data (attempt {attempt + 1}/{max_retries + 1})")
|
| 74 |
except Exception as e:
|
| 75 |
-
print(f"Error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
# 如果所有重试都失败,返回空数据
|
| 84 |
-
print("All retries failed, returning empty data")
|
| 85 |
-
return pd.DataFrame()
|
| 86 |
|
| 87 |
symbols = None
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
async def fetch_symbols():
|
| 90 |
global symbols
|
| 91 |
try:
|
|
@@ -114,10 +217,65 @@ def update_stock_indices():
|
|
| 114 |
global index_us_stock_index_INX, index_us_stock_index_DJI, index_us_stock_index_IXIC, index_us_stock_index_NDX
|
| 115 |
try:
|
| 116 |
print("Starting stock indices update...")
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
print("Stock indices updated successfully")
|
| 122 |
except Exception as e:
|
| 123 |
print(f"Error updating stock indices: {e}")
|
|
@@ -128,7 +286,7 @@ def update_stock_indices():
|
|
| 128 |
# 程序开始时不立即更新,而是延迟启动
|
| 129 |
def start_indices_update():
|
| 130 |
"""延迟启动股票指数更新,避免阻塞应用启动"""
|
| 131 |
-
threading.Timer(
|
| 132 |
|
| 133 |
# 延迟启动股票指数更新
|
| 134 |
start_indices_update()
|
|
@@ -206,13 +364,18 @@ def get_last_minute_stock_price(symbol: str, max_retries=3) -> float:
|
|
| 206 |
for attempt in range(max_retries):
|
| 207 |
try:
|
| 208 |
# 缓存无效或不存在,从yfinance获取新数据
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
if stock_data.empty:
|
| 218 |
print(f"Warning: Empty data received for {symbol}, attempt {attempt + 1}/{max_retries}")
|
|
@@ -263,10 +426,39 @@ def get_stock_history(symbol, news_date, retries=10):
|
|
| 263 |
|
| 264 |
while retry_count <= retries and len(symbol) != 0: # 无限循环重试
|
| 265 |
try:
|
| 266 |
-
#
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
# print(f"No data for {symbol} on {news_date}.")
|
| 271 |
stock_hist_df = None # 将 DataFrame 设置为 None
|
| 272 |
break
|
|
|
|
| 1 |
+
# ================================
|
| 2 |
+
# 版本切换开关 - 从配置文件导入
|
| 3 |
+
# ================================
|
| 4 |
+
from data_source_config import USE_YFINANCE_VERSION, API_TIMEOUT_SECONDS, MAX_RETRY_ATTEMPTS
|
| 5 |
+
|
| 6 |
import logging
|
| 7 |
import re
|
|
|
|
| 8 |
import pandas as pd
|
| 9 |
from datetime import datetime, timedelta
|
| 10 |
import time # 导入标准库的 time 模块
|
| 11 |
|
| 12 |
import os
|
|
|
|
| 13 |
import requests
|
| 14 |
import threading
|
| 15 |
import asyncio
|
| 16 |
|
| 17 |
+
# 根据开关导入不同的模块
|
| 18 |
+
if USE_YFINANCE_VERSION:
|
| 19 |
+
import yfinance as yf
|
| 20 |
+
print("🔄 Using yfinance version (new)")
|
| 21 |
+
else:
|
| 22 |
+
import akshare as ak
|
| 23 |
+
print("🔄 Using akshare version (old)")
|
| 24 |
|
| 25 |
logging.basicConfig(level=logging.INFO)
|
| 26 |
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def fetch_stock_us_spot_data_with_retries():
|
| 44 |
+
"""根据开关选择不同的数据源获取股票列表"""
|
| 45 |
+
if USE_YFINANCE_VERSION:
|
| 46 |
+
return fetch_stock_us_spot_data_yfinance()
|
| 47 |
+
else:
|
| 48 |
+
return fetch_stock_us_spot_data_akshare()
|
| 49 |
+
|
| 50 |
+
def fetch_stock_us_spot_data_akshare():
|
| 51 |
+
"""原始的 akshare 实现"""
|
| 52 |
+
if not USE_YFINANCE_VERSION:
|
| 53 |
+
# 定义重试间隔时间序列(秒)
|
| 54 |
+
retry_intervals = [10, 20, 60, 300, 600]
|
| 55 |
+
retry_index = 0 # 初始重试序号
|
| 56 |
+
|
| 57 |
+
while True:
|
| 58 |
+
try:
|
| 59 |
+
# 尝试获取API数据
|
| 60 |
+
symbols = ak.stock_us_spot_em()
|
| 61 |
+
return symbols # 成功获取数据后返回
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f"Error fetching data: {e}")
|
| 65 |
+
|
| 66 |
+
# 获取当前重试等待时间
|
| 67 |
+
wait_time = retry_intervals[retry_index]
|
| 68 |
+
print(f"Retrying in {wait_time} seconds...")
|
| 69 |
+
time.sleep(wait_time) # 等待指定的秒数
|
| 70 |
+
|
| 71 |
+
# 更新重试索引,但不要超出重试时间列表的范围
|
| 72 |
+
retry_index = min(retry_index + 1, len(retry_intervals) - 1)
|
| 73 |
+
else:
|
| 74 |
+
print("Warning: akshare function called while using yfinance version")
|
| 75 |
+
return pd.DataFrame()
|
| 76 |
|
| 77 |
+
def fetch_stock_us_spot_data_yfinance():
|
| 78 |
+
"""新的 yfinance 实现"""
|
| 79 |
+
try:
|
| 80 |
+
# 从本地CSV文件收集所有股票代码
|
| 81 |
+
all_symbols = set()
|
| 82 |
+
|
| 83 |
+
# 从各个指数CSV文件中提取股票代码
|
| 84 |
+
for df, name in [
|
| 85 |
+
(nasdaq_100_stocks, "NASDAQ-100"),
|
| 86 |
+
(dow_jones_stocks, "Dow Jones"),
|
| 87 |
+
(sp500_stocks, "S&P 500"),
|
| 88 |
+
(nasdaq_composite_stocks, "NASDAQ Composite")
|
| 89 |
+
]:
|
| 90 |
+
if 'Symbol' in df.columns:
|
| 91 |
+
symbols_from_csv = df['Symbol'].dropna().astype(str).tolist()
|
| 92 |
+
all_symbols.update(symbols_from_csv)
|
| 93 |
+
elif 'Code' in df.columns:
|
| 94 |
+
symbols_from_csv = df['Code'].dropna().astype(str).tolist()
|
| 95 |
+
all_symbols.update(symbols_from_csv)
|
| 96 |
+
|
| 97 |
+
# 添加一些常见的ETF和热门股票
|
| 98 |
+
additional_symbols = [
|
| 99 |
+
# 主要ETF
|
| 100 |
+
'SPY', 'QQQ', 'IWM', 'VTI', 'ARKK', 'TQQQ', 'SQQQ', 'SPXL',
|
| 101 |
+
# 热门科技股
|
| 102 |
+
'AAPL', 'MSFT', 'GOOGL', 'GOOG', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
|
| 103 |
+
'AMD', 'INTC', 'ORCL', 'CRM', 'ADBE', 'PYPL', 'UBER', 'LYFT',
|
| 104 |
+
# 中概股
|
| 105 |
+
'BABA', 'JD', 'PDD', 'NIO', 'XPEV', 'LI', 'DIDI', 'TME',
|
| 106 |
+
# 其他热门股票
|
| 107 |
+
'COST', 'WMT', 'JPM', 'BAC', 'XOM', 'CVX', 'PFE', 'JNJ', 'KO', 'PEP'
|
| 108 |
+
]
|
| 109 |
+
all_symbols.update(additional_symbols)
|
| 110 |
+
|
| 111 |
+
# 创建DataFrame
|
| 112 |
+
symbols_list = sorted(list(all_symbols))
|
| 113 |
+
symbols_df = pd.DataFrame({
|
| 114 |
+
'代码': symbols_list,
|
| 115 |
+
'名称': [f'{symbol} Inc.' for symbol in symbols_list] # 简单的名称映射
|
| 116 |
+
})
|
| 117 |
+
|
| 118 |
+
print(f"Created symbols dataframe with {len(symbols_df)} symbols using yfinance version")
|
| 119 |
+
return symbols_df
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"Error creating symbols dataframe: {e}")
|
| 123 |
+
# 返回基本的fallback数据
|
| 124 |
+
fallback_symbols = [
|
| 125 |
+
'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
|
| 126 |
+
'SPY', 'QQQ', 'IWM', 'VTI'
|
| 127 |
+
]
|
| 128 |
+
return pd.DataFrame({
|
| 129 |
+
'代码': fallback_symbols,
|
| 130 |
+
'名称': [f'{symbol} Inc.' for symbol in fallback_symbols]
|
| 131 |
+
})
|
| 132 |
|
| 133 |
|
| 134 |
|
| 135 |
async def fetch_stock_us_spot_data_with_retries_async():
|
| 136 |
+
"""异步版本的股票数据获取,支持版本切换"""
|
| 137 |
+
if USE_YFINANCE_VERSION:
|
|
|
|
|
|
|
|
|
|
| 138 |
try:
|
| 139 |
+
return await asyncio.to_thread(fetch_stock_us_spot_data_yfinance)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
except Exception as e:
|
| 141 |
+
print(f"Error in async yfinance fetch: {e}")
|
| 142 |
+
return pd.DataFrame()
|
| 143 |
+
else:
|
| 144 |
+
return await fetch_stock_us_spot_data_akshare_async()
|
| 145 |
+
|
| 146 |
+
async def fetch_stock_us_spot_data_akshare_async():
|
| 147 |
+
"""原始的 akshare 异步实现"""
|
| 148 |
+
if not USE_YFINANCE_VERSION:
|
| 149 |
+
retry_intervals = [10, 20] # 减少重试次数
|
| 150 |
+
retry_index = 0
|
| 151 |
+
max_retries = 2 # 最多重试2次
|
| 152 |
+
|
| 153 |
+
for attempt in range(max_retries + 1):
|
| 154 |
+
try:
|
| 155 |
+
# 添加30秒超时
|
| 156 |
+
symbols = await asyncio.wait_for(
|
| 157 |
+
asyncio.to_thread(ak.stock_us_spot_em),
|
| 158 |
+
timeout=30.0
|
| 159 |
+
)
|
| 160 |
+
return symbols
|
| 161 |
+
except asyncio.TimeoutError:
|
| 162 |
+
print(f"Timeout error fetching data (attempt {attempt + 1}/{max_retries + 1})")
|
| 163 |
+
except Exception as e:
|
| 164 |
+
print(f"Error fetching data (attempt {attempt + 1}/{max_retries + 1}): {e}")
|
| 165 |
+
|
| 166 |
+
if attempt < max_retries:
|
| 167 |
+
wait_time = retry_intervals[min(retry_index, len(retry_intervals) - 1)]
|
| 168 |
+
print(f"Retrying in {wait_time} seconds...")
|
| 169 |
+
await asyncio.sleep(wait_time)
|
| 170 |
+
retry_index += 1
|
| 171 |
|
| 172 |
+
# 如果所有重试都失败,返回空数据
|
| 173 |
+
print("All retries failed, returning empty data")
|
| 174 |
+
return pd.DataFrame()
|
| 175 |
+
else:
|
| 176 |
+
print("Warning: akshare async function called while using yfinance version")
|
| 177 |
+
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
symbols = None
|
| 180 |
|
| 181 |
+
def create_fallback_symbols():
|
| 182 |
+
"""创建fallback符号数据,用于测试"""
|
| 183 |
+
fallback_symbols = [
|
| 184 |
+
'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
|
| 185 |
+
'SPY', 'QQQ', 'IWM', 'VTI'
|
| 186 |
+
]
|
| 187 |
+
return pd.DataFrame({
|
| 188 |
+
'代码': fallback_symbols,
|
| 189 |
+
'名称': [f'{symbol} Inc.' for symbol in fallback_symbols]
|
| 190 |
+
})
|
| 191 |
+
|
| 192 |
async def fetch_symbols():
|
| 193 |
global symbols
|
| 194 |
try:
|
|
|
|
| 217 |
global index_us_stock_index_INX, index_us_stock_index_DJI, index_us_stock_index_IXIC, index_us_stock_index_NDX
|
| 218 |
try:
|
| 219 |
print("Starting stock indices update...")
|
| 220 |
+
|
| 221 |
+
if USE_YFINANCE_VERSION:
|
| 222 |
+
print("Updating indices using yfinance...")
|
| 223 |
+
# 使用 yfinance 更新指数数据
|
| 224 |
+
from datetime import datetime, timedelta
|
| 225 |
+
|
| 226 |
+
# 计算日期范围
|
| 227 |
+
end_date = datetime.now()
|
| 228 |
+
start_date = end_date - timedelta(weeks=8)
|
| 229 |
+
|
| 230 |
+
# 定义指数映射
|
| 231 |
+
indices = {
|
| 232 |
+
'^GSPC': 'INX', # S&P 500
|
| 233 |
+
'^DJI': 'DJI', # Dow Jones
|
| 234 |
+
'^IXIC': 'IXIC', # NASDAQ Composite
|
| 235 |
+
'^NDX': 'NDX' # NASDAQ 100
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
for yf_symbol, var_name in indices.items():
|
| 239 |
+
try:
|
| 240 |
+
ticker = yf.Ticker(yf_symbol)
|
| 241 |
+
hist_data = ticker.history(start=start_date, end=end_date)
|
| 242 |
+
|
| 243 |
+
if not hist_data.empty:
|
| 244 |
+
# 转换为与akshare相同的格式
|
| 245 |
+
formatted_data = pd.DataFrame({
|
| 246 |
+
'date': hist_data.index.strftime('%Y-%m-%d'),
|
| 247 |
+
'开盘': hist_data['Open'].values,
|
| 248 |
+
'收盘': hist_data['Close'].values,
|
| 249 |
+
'最高': hist_data['High'].values,
|
| 250 |
+
'最低': hist_data['Low'].values,
|
| 251 |
+
'成交量': hist_data['Volume'].values,
|
| 252 |
+
'成交额': (hist_data['Close'] * hist_data['Volume']).values
|
| 253 |
+
})
|
| 254 |
+
|
| 255 |
+
# 设置全局变量
|
| 256 |
+
if var_name == 'INX':
|
| 257 |
+
index_us_stock_index_INX = formatted_data
|
| 258 |
+
elif var_name == 'DJI':
|
| 259 |
+
index_us_stock_index_DJI = formatted_data
|
| 260 |
+
elif var_name == 'IXIC':
|
| 261 |
+
index_us_stock_index_IXIC = formatted_data
|
| 262 |
+
elif var_name == 'NDX':
|
| 263 |
+
index_us_stock_index_NDX = formatted_data
|
| 264 |
+
|
| 265 |
+
print(f"Successfully updated {var_name}: {len(formatted_data)} records")
|
| 266 |
+
else:
|
| 267 |
+
print(f"No data received for {yf_symbol}")
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
print(f"Error fetching {yf_symbol}: {e}")
|
| 271 |
+
else:
|
| 272 |
+
print("Updating indices using akshare...")
|
| 273 |
+
# 使用 akshare 更新指数数据
|
| 274 |
+
index_us_stock_index_INX = ak.index_us_stock_sina(symbol=".INX")
|
| 275 |
+
index_us_stock_index_DJI = ak.index_us_stock_sina(symbol=".DJI")
|
| 276 |
+
index_us_stock_index_IXIC = ak.index_us_stock_sina(symbol=".IXIC")
|
| 277 |
+
index_us_stock_index_NDX = ak.index_us_stock_sina(symbol=".NDX")
|
| 278 |
+
|
| 279 |
print("Stock indices updated successfully")
|
| 280 |
except Exception as e:
|
| 281 |
print(f"Error updating stock indices: {e}")
|
|
|
|
| 286 |
# 程序开始时不立即更新,而是延迟启动
|
| 287 |
def start_indices_update():
|
| 288 |
"""延迟启动股票指数更新,避免阻塞应用启动"""
|
| 289 |
+
threading.Timer(5, update_stock_indices).start() # 5秒后开始第一次更新
|
| 290 |
|
| 291 |
# 延迟启动股票指数更新
|
| 292 |
start_indices_update()
|
|
|
|
| 364 |
for attempt in range(max_retries):
|
| 365 |
try:
|
| 366 |
# 缓存无效或不存在,从yfinance获取新数据
|
| 367 |
+
if USE_YFINANCE_VERSION:
|
| 368 |
+
stock_data = yf.download(
|
| 369 |
+
symbol,
|
| 370 |
+
period='1d',
|
| 371 |
+
interval='5m',
|
| 372 |
+
progress=False, # 禁用进度条
|
| 373 |
+
timeout=10 # 设置超时时间
|
| 374 |
+
)
|
| 375 |
+
else:
|
| 376 |
+
# 使用akshare获取数据的逻辑
|
| 377 |
+
ticker = ak.stock_us_hist(symbol=symbol, period="daily", start_date="20240101", end_date="20240201")
|
| 378 |
+
stock_data = ticker if not ticker.empty else pd.DataFrame()
|
| 379 |
|
| 380 |
if stock_data.empty:
|
| 381 |
print(f"Warning: Empty data received for {symbol}, attempt {attempt + 1}/{max_retries}")
|
|
|
|
| 426 |
|
| 427 |
while retry_count <= retries and len(symbol) != 0: # 无限循环重试
|
| 428 |
try:
|
| 429 |
+
# 根据版本开关选择不同的API
|
| 430 |
+
if USE_YFINANCE_VERSION:
|
| 431 |
+
# 使用 yfinance 获取数据
|
| 432 |
+
ticker = yf.Ticker(symbol)
|
| 433 |
+
# 将日期格式转换为 yfinance 期望的格式 (YYYY-MM-DD)
|
| 434 |
+
yf_start_date = datetime.strptime(start_date, "%Y%m%d").strftime("%Y-%m-%d")
|
| 435 |
+
yf_end_date = datetime.strptime(end_date, "%Y%m%d").strftime("%Y-%m-%d")
|
| 436 |
+
|
| 437 |
+
stock_hist_df = ticker.history(start=yf_start_date, end=yf_end_date)
|
| 438 |
+
|
| 439 |
+
if not stock_hist_df.empty:
|
| 440 |
+
# 转换为与akshare相同的格式
|
| 441 |
+
stock_hist_df = stock_hist_df.reset_index()
|
| 442 |
+
stock_hist_df = pd.DataFrame({
|
| 443 |
+
'date': stock_hist_df['Date'].dt.strftime('%Y-%m-%d'),
|
| 444 |
+
'开盘': stock_hist_df['Open'],
|
| 445 |
+
'收盘': stock_hist_df['Close'],
|
| 446 |
+
'最高': stock_hist_df['High'],
|
| 447 |
+
'最低': stock_hist_df['Low'],
|
| 448 |
+
'成交量': stock_hist_df['Volume'],
|
| 449 |
+
'成交额': stock_hist_df['Close'] * stock_hist_df['Volume'],
|
| 450 |
+
'振幅': 0, # yfinance没有直接提供,设为0
|
| 451 |
+
'涨跌幅': 0, # 可以计算,但这里简化为0
|
| 452 |
+
'涨跌额': 0, # 可以计算,但这里简化为0
|
| 453 |
+
'换手率': 0 # yfinance没有直接提供,设为0
|
| 454 |
+
})
|
| 455 |
+
else:
|
| 456 |
+
stock_hist_df = None
|
| 457 |
+
else:
|
| 458 |
+
# 使用 akshare 获取数据
|
| 459 |
+
stock_hist_df = ak.stock_us_hist(symbol=symbol, period="daily", start_date=start_date, end_date=end_date, adjust="")
|
| 460 |
+
|
| 461 |
+
if stock_hist_df is None or stock_hist_df.empty: # 检查是否为空数据
|
| 462 |
# print(f"No data for {symbol} on {news_date}.")
|
| 463 |
stock_hist_df = None # 将 DataFrame 设置为 None
|
| 464 |
break
|
us_stock_yfinance.py
ADDED
|
@@ -0,0 +1,577 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import re
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from datetime import datetime, timedelta
|
| 5 |
+
import time # 导入标准库的 time 模块
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
import requests
|
| 10 |
+
import threading
|
| 11 |
+
import asyncio
|
| 12 |
+
|
| 13 |
+
import yfinance as yf
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
logging.basicConfig(level=logging.INFO)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# 获取当前文件的目录
|
| 20 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
| 21 |
+
|
| 22 |
+
# 构建CSV文件的绝对路径
|
| 23 |
+
nasdaq_100_path = os.path.join(base_dir, './model/nasdaq100.csv')
|
| 24 |
+
dow_jones_path = os.path.join(base_dir, './model/dji.csv')
|
| 25 |
+
sp500_path = os.path.join(base_dir, './model/sp500.csv')
|
| 26 |
+
nasdaq_composite_path = os.path.join(base_dir, './model/nasdaq_all.csv')
|
| 27 |
+
# 从CSV文件加载成分股数据
|
| 28 |
+
nasdaq_100_stocks = pd.read_csv(nasdaq_100_path)
|
| 29 |
+
dow_jones_stocks = pd.read_csv(dow_jones_path)
|
| 30 |
+
sp500_stocks = pd.read_csv(sp500_path)
|
| 31 |
+
nasdaq_composite_stocks = pd.read_csv(nasdaq_composite_path)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def fetch_stock_us_spot_data_with_retries():
|
| 35 |
+
"""使用 yfinance 和本地 CSV 数据创建股票代码表"""
|
| 36 |
+
try:
|
| 37 |
+
# 从本地CSV文件收集所有股票代码
|
| 38 |
+
all_symbols = set()
|
| 39 |
+
|
| 40 |
+
# 从各个指数CSV文件中提取股票代码
|
| 41 |
+
for df, name in [
|
| 42 |
+
(nasdaq_100_stocks, "NASDAQ-100"),
|
| 43 |
+
(dow_jones_stocks, "Dow Jones"),
|
| 44 |
+
(sp500_stocks, "S&P 500"),
|
| 45 |
+
(nasdaq_composite_stocks, "NASDAQ Composite")
|
| 46 |
+
]:
|
| 47 |
+
if 'Symbol' in df.columns:
|
| 48 |
+
symbols_from_csv = df['Symbol'].dropna().astype(str).tolist()
|
| 49 |
+
all_symbols.update(symbols_from_csv)
|
| 50 |
+
elif 'Code' in df.columns:
|
| 51 |
+
symbols_from_csv = df['Code'].dropna().astype(str).tolist()
|
| 52 |
+
all_symbols.update(symbols_from_csv)
|
| 53 |
+
|
| 54 |
+
# 添加一些常见的ETF和热门股票
|
| 55 |
+
additional_symbols = [
|
| 56 |
+
# 主要ETF
|
| 57 |
+
'SPY', 'QQQ', 'IWM', 'VTI', 'ARKK', 'TQQQ', 'SQQQ', 'SPXL',
|
| 58 |
+
# 热门科技股
|
| 59 |
+
'AAPL', 'MSFT', 'GOOGL', 'GOOG', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
|
| 60 |
+
'AMD', 'INTC', 'ORCL', 'CRM', 'ADBE', 'PYPL', 'UBER', 'LYFT',
|
| 61 |
+
# 中概股
|
| 62 |
+
'BABA', 'JD', 'PDD', 'NIO', 'XPEV', 'LI', 'DIDI', 'TME',
|
| 63 |
+
# 其他热门股票
|
| 64 |
+
'COST', 'WMT', 'JPM', 'BAC', 'XOM', 'CVX', 'PFE', 'JNJ', 'KO', 'PEP'
|
| 65 |
+
]
|
| 66 |
+
all_symbols.update(additional_symbols)
|
| 67 |
+
|
| 68 |
+
# 创建DataFrame
|
| 69 |
+
symbols_list = sorted(list(all_symbols))
|
| 70 |
+
symbols_df = pd.DataFrame({
|
| 71 |
+
'代码': symbols_list,
|
| 72 |
+
'名称': [f'{symbol} Inc.' for symbol in symbols_list] # 简单的名称映射
|
| 73 |
+
})
|
| 74 |
+
|
| 75 |
+
print(f"Created symbols dataframe with {len(symbols_df)} symbols")
|
| 76 |
+
return symbols_df
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"Error creating symbols dataframe: {e}")
|
| 80 |
+
# 返回基本的fallback数据
|
| 81 |
+
fallback_symbols = [
|
| 82 |
+
'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'NFLX',
|
| 83 |
+
'SPY', 'QQQ', 'IWM', 'VTI'
|
| 84 |
+
]
|
| 85 |
+
return pd.DataFrame({
|
| 86 |
+
'代码': fallback_symbols,
|
| 87 |
+
'名称': [f'{symbol} Inc.' for symbol in fallback_symbols]
|
| 88 |
+
})
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
async def fetch_stock_us_spot_data_with_retries_async():
|
| 92 |
+
"""异步版本的股票代码获取"""
|
| 93 |
+
try:
|
| 94 |
+
return await asyncio.to_thread(fetch_stock_us_spot_data_with_retries)
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f"Error in async fetch: {e}")
|
| 97 |
+
return pd.DataFrame()
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
symbols = None
|
| 101 |
+
|
| 102 |
+
async def fetch_symbols():
|
| 103 |
+
global symbols
|
| 104 |
+
try:
|
| 105 |
+
print("Starting symbols initialization...")
|
| 106 |
+
# 异步获取数据
|
| 107 |
+
symbols = await fetch_stock_us_spot_data_with_retries_async()
|
| 108 |
+
if symbols is not None and not symbols.empty:
|
| 109 |
+
print(f"Symbols initialized successfully: {len(symbols)} symbols loaded")
|
| 110 |
+
else:
|
| 111 |
+
print("Symbols initialization failed, using empty dataset")
|
| 112 |
+
symbols = pd.DataFrame()
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"Error in fetch_symbols: {e}")
|
| 115 |
+
symbols = pd.DataFrame()
|
| 116 |
+
finally:
|
| 117 |
+
print("Symbols initialization completed")
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
# 全局变量
|
| 121 |
+
index_us_stock_index_INX = None
|
| 122 |
+
index_us_stock_index_DJI = None
|
| 123 |
+
index_us_stock_index_IXIC = None
|
| 124 |
+
index_us_stock_index_NDX = None
|
| 125 |
+
|
| 126 |
+
def update_stock_indices():
|
| 127 |
+
"""使用 yfinance 获取美股指数数据"""
|
| 128 |
+
global index_us_stock_index_INX, index_us_stock_index_DJI, index_us_stock_index_IXIC, index_us_stock_index_NDX
|
| 129 |
+
try:
|
| 130 |
+
print("Starting stock indices update using yfinance...")
|
| 131 |
+
|
| 132 |
+
# 获取过去8周的数据
|
| 133 |
+
end_date = datetime.now()
|
| 134 |
+
start_date = end_date - timedelta(weeks=8)
|
| 135 |
+
|
| 136 |
+
# 指数映射
|
| 137 |
+
indices = {
|
| 138 |
+
'^GSPC': 'INX', # S&P 500
|
| 139 |
+
'^DJI': 'DJI', # Dow Jones
|
| 140 |
+
'^IXIC': 'IXIC', # NASDAQ Composite
|
| 141 |
+
'^NDX': 'NDX' # NASDAQ 100
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
results = {}
|
| 145 |
+
|
| 146 |
+
for yf_symbol, var_name in indices.items():
|
| 147 |
+
try:
|
| 148 |
+
ticker = yf.Ticker(yf_symbol)
|
| 149 |
+
hist_data = ticker.history(start=start_date, end=end_date)
|
| 150 |
+
|
| 151 |
+
if not hist_data.empty:
|
| 152 |
+
# 转换为与akshare相同的格式
|
| 153 |
+
formatted_data = pd.DataFrame({
|
| 154 |
+
'date': hist_data.index.strftime('%Y-%m-%d'),
|
| 155 |
+
'开盘': hist_data['Open'].values,
|
| 156 |
+
'收盘': hist_data['Close'].values,
|
| 157 |
+
'最高': hist_data['High'].values,
|
| 158 |
+
'最低': hist_data['Low'].values,
|
| 159 |
+
'成交量': hist_data['Volume'].values,
|
| 160 |
+
'成交额': (hist_data['Close'] * hist_data['Volume']).values
|
| 161 |
+
})
|
| 162 |
+
results[var_name] = formatted_data
|
| 163 |
+
print(f"Successfully fetched {var_name} data: {len(formatted_data)} records")
|
| 164 |
+
else:
|
| 165 |
+
print(f"No data received for {yf_symbol}")
|
| 166 |
+
results[var_name] = pd.DataFrame()
|
| 167 |
+
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f"Error fetching {yf_symbol}: {e}")
|
| 170 |
+
results[var_name] = pd.DataFrame()
|
| 171 |
+
|
| 172 |
+
# 设置全局变量
|
| 173 |
+
index_us_stock_index_INX = results.get('INX', pd.DataFrame())
|
| 174 |
+
index_us_stock_index_DJI = results.get('DJI', pd.DataFrame())
|
| 175 |
+
index_us_stock_index_IXIC = results.get('IXIC', pd.DataFrame())
|
| 176 |
+
index_us_stock_index_NDX = results.get('NDX', pd.DataFrame())
|
| 177 |
+
|
| 178 |
+
print("Stock indices updated successfully using yfinance")
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
print(f"Error updating stock indices: {e}")
|
| 182 |
+
|
| 183 |
+
# 设置定时器,每隔12小时更新一次
|
| 184 |
+
threading.Timer(12 * 60 * 60, update_stock_indices).start()
|
| 185 |
+
|
| 186 |
+
# 程序开始时不立即更新,而是延迟启动
|
| 187 |
+
def start_indices_update():
|
| 188 |
+
"""延迟启动股票指数更新,避免阻塞应用启动"""
|
| 189 |
+
threading.Timer(5, update_stock_indices).start() # 5秒后开始第一次更新
|
| 190 |
+
|
| 191 |
+
# 延迟启动股票指数更新
|
| 192 |
+
start_indices_update()
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
# 创建列名转换的字典
|
| 196 |
+
column_mapping = {
|
| 197 |
+
'日期': 'date',
|
| 198 |
+
'开盘': 'open',
|
| 199 |
+
'收盘': 'close',
|
| 200 |
+
'最高': 'high',
|
| 201 |
+
'最低': 'low',
|
| 202 |
+
'成交量': 'volume',
|
| 203 |
+
'成交额': 'amount',
|
| 204 |
+
'振幅': 'amplitude',
|
| 205 |
+
'涨跌幅': 'price_change_percentage',
|
| 206 |
+
'涨跌额': 'price_change_amount',
|
| 207 |
+
'换手率': 'turnover_rate'
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
# 定义一个标准的列顺序
|
| 211 |
+
standard_columns = ['date', 'open', 'close', 'high', 'low', 'volume', 'amount']
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
# 定义查找函数
|
| 215 |
+
def find_stock_entry(stock_code):
|
| 216 |
+
# 使用 str.endswith 来匹配股票代码
|
| 217 |
+
if symbols is None or symbols.empty:
|
| 218 |
+
print("Warning: symbols data is empty")
|
| 219 |
+
return ""
|
| 220 |
+
|
| 221 |
+
try:
|
| 222 |
+
matching_row = symbols[symbols['代码'].str.endswith(stock_code, na=False)]
|
| 223 |
+
if not matching_row.empty:
|
| 224 |
+
return matching_row['代码'].values[0]
|
| 225 |
+
else:
|
| 226 |
+
# 如果没有找到,直接返回输入的代码(假设它是有效的)
|
| 227 |
+
return stock_code.upper()
|
| 228 |
+
except Exception as e:
|
| 229 |
+
print(f"Error in find_stock_entry: {e}")
|
| 230 |
+
return stock_code.upper()
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def reduce_columns(df, columns_to_keep):
|
| 234 |
+
return df[columns_to_keep]
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
# 创建缓存字典
|
| 238 |
+
_price_cache = {}
|
| 239 |
+
|
| 240 |
+
def get_last_minute_stock_price(symbol: str, max_retries=3) -> float:
|
| 241 |
+
"""获取股票最新价格,使用30分钟缓存,并包含重试机制"""
|
| 242 |
+
|
| 243 |
+
if not symbol:
|
| 244 |
+
return -1.0
|
| 245 |
+
if symbol == "NONE_SYMBOL_FOUND":
|
| 246 |
+
return -1.0
|
| 247 |
+
|
| 248 |
+
current_time = datetime.now()
|
| 249 |
+
|
| 250 |
+
# 检查缓存
|
| 251 |
+
if symbol in _price_cache:
|
| 252 |
+
cached_price, cached_time = _price_cache[symbol]
|
| 253 |
+
# 如果缓存时间在30分钟内,直接返回缓存的价格
|
| 254 |
+
if current_time - cached_time < timedelta(minutes=30):
|
| 255 |
+
return cached_price
|
| 256 |
+
|
| 257 |
+
# 重试机制
|
| 258 |
+
for attempt in range(max_retries):
|
| 259 |
+
try:
|
| 260 |
+
# 使用yfinance获取实时数据
|
| 261 |
+
ticker = yf.Ticker(symbol)
|
| 262 |
+
info = ticker.info
|
| 263 |
+
|
| 264 |
+
current_price = info.get('regularMarketPrice') or info.get('currentPrice')
|
| 265 |
+
|
| 266 |
+
if current_price is None:
|
| 267 |
+
# 尝试获取历史数据的最新价格
|
| 268 |
+
hist = ticker.history(period='1d', interval='1m')
|
| 269 |
+
if not hist.empty:
|
| 270 |
+
current_price = float(hist['Close'].iloc[-1])
|
| 271 |
+
|
| 272 |
+
if current_price is not None:
|
| 273 |
+
current_price = float(current_price)
|
| 274 |
+
# 更新缓存
|
| 275 |
+
_price_cache[symbol] = (current_price, current_time)
|
| 276 |
+
return current_price
|
| 277 |
+
else:
|
| 278 |
+
print(f"Warning: No price data for {symbol}, attempt {attempt + 1}/{max_retries}")
|
| 279 |
+
if attempt == max_retries - 1:
|
| 280 |
+
return -1.0
|
| 281 |
+
time.sleep(1)
|
| 282 |
+
|
| 283 |
+
except Exception as e:
|
| 284 |
+
print(f"Error fetching price for {symbol}, attempt {attempt + 1}/{max_retries}: {str(e)}")
|
| 285 |
+
if attempt == max_retries - 1:
|
| 286 |
+
return -1.0
|
| 287 |
+
time.sleep(1)
|
| 288 |
+
|
| 289 |
+
return -1.0
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
# 返回个股历史数据
|
| 293 |
+
def get_stock_history(symbol, news_date, retries=10):
|
| 294 |
+
"""使用 yfinance 获取股票历史数据"""
|
| 295 |
+
|
| 296 |
+
# 如果传入的symbol不包含数字前缀,则通过 find_stock_entry 获取完整的symbol
|
| 297 |
+
if not any(char.isdigit() for char in symbol):
|
| 298 |
+
full_symbol = find_stock_entry(symbol)
|
| 299 |
+
if len(symbol) != 0 and full_symbol:
|
| 300 |
+
symbol = full_symbol
|
| 301 |
+
else:
|
| 302 |
+
symbol = ""
|
| 303 |
+
|
| 304 |
+
# 将news_date转换为datetime对象
|
| 305 |
+
current_date = datetime.now()
|
| 306 |
+
|
| 307 |
+
# 计算start_date和end_date
|
| 308 |
+
start_date = current_date - timedelta(days=60)
|
| 309 |
+
end_date = current_date
|
| 310 |
+
|
| 311 |
+
stock_hist_df = None
|
| 312 |
+
retry_count = 0
|
| 313 |
+
|
| 314 |
+
while retry_count <= retries and len(symbol) != 0:
|
| 315 |
+
try:
|
| 316 |
+
# 使用yfinance获取数据
|
| 317 |
+
ticker = yf.Ticker(symbol)
|
| 318 |
+
stock_hist_df = ticker.history(start=start_date, end=end_date)
|
| 319 |
+
|
| 320 |
+
if stock_hist_df.empty:
|
| 321 |
+
print(f"No data for {symbol} on {news_date}.")
|
| 322 |
+
stock_hist_df = None
|
| 323 |
+
else:
|
| 324 |
+
# 转换为与akshare相同的格式
|
| 325 |
+
stock_hist_df = stock_hist_df.reset_index()
|
| 326 |
+
stock_hist_df = pd.DataFrame({
|
| 327 |
+
'date': stock_hist_df['Date'].dt.strftime('%Y-%m-%d'),
|
| 328 |
+
'开盘': stock_hist_df['Open'],
|
| 329 |
+
'收盘': stock_hist_df['Close'],
|
| 330 |
+
'最高': stock_hist_df['High'],
|
| 331 |
+
'最低': stock_hist_df['Low'],
|
| 332 |
+
'成交量': stock_hist_df['Volume'],
|
| 333 |
+
'成交额': stock_hist_df['Close'] * stock_hist_df['Volume'],
|
| 334 |
+
'振幅': 0, # yfinance没有直接提供,设为0
|
| 335 |
+
'涨跌幅': 0, # 可以计算,但这里简化为0
|
| 336 |
+
'涨跌额': 0, # 可以计算,但这里简化为0
|
| 337 |
+
'换手率': 0 # yfinance没有直接提供,设为0
|
| 338 |
+
})
|
| 339 |
+
break
|
| 340 |
+
|
| 341 |
+
except Exception as e:
|
| 342 |
+
print(f"Error {e} scraping data for {symbol} on {news_date}. Retrying...")
|
| 343 |
+
retry_count += 1
|
| 344 |
+
if retry_count <= retries:
|
| 345 |
+
time.sleep(2) # 等待2秒后重试
|
| 346 |
+
continue
|
| 347 |
+
|
| 348 |
+
# 如果获取失败或数据为空,返回填充为0的 DataFrame
|
| 349 |
+
if stock_hist_df is None or stock_hist_df.empty:
|
| 350 |
+
# 构建一个空的 DataFrame,包含指定日期范围的空数据
|
| 351 |
+
date_range = pd.date_range(start=start_date, end=end_date)
|
| 352 |
+
stock_hist_df = pd.DataFrame({
|
| 353 |
+
'date': date_range.strftime('%Y-%m-%d'),
|
| 354 |
+
'开盘': 0,
|
| 355 |
+
'收盘': 0,
|
| 356 |
+
'最高': 0,
|
| 357 |
+
'最低': 0,
|
| 358 |
+
'成交量': 0,
|
| 359 |
+
'成交额': 0,
|
| 360 |
+
'振幅': 0,
|
| 361 |
+
'涨跌幅': 0,
|
| 362 |
+
'涨跌额': 0,
|
| 363 |
+
'换手率': 0
|
| 364 |
+
})
|
| 365 |
+
|
| 366 |
+
# 使用rename方法转换列名
|
| 367 |
+
stock_hist_df = stock_hist_df.rename(columns=column_mapping)
|
| 368 |
+
stock_hist_df = stock_hist_df.reindex(columns=standard_columns)
|
| 369 |
+
# 处理个股数据,保留所需列
|
| 370 |
+
stock_hist_df = reduce_columns(stock_hist_df, standard_columns)
|
| 371 |
+
return stock_hist_df
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
# 返回个股所属指数历史数据
|
| 375 |
+
def get_stock_index_history(symbol, news_date, force_index=0):
|
| 376 |
+
# 检查股票所属的指数
|
| 377 |
+
if symbol in nasdaq_100_stocks['Symbol'].values or force_index == 1:
|
| 378 |
+
index_code = ".NDX"
|
| 379 |
+
index_data = index_us_stock_index_NDX
|
| 380 |
+
elif symbol in dow_jones_stocks['Symbol'].values or force_index == 2:
|
| 381 |
+
index_code = ".DJI"
|
| 382 |
+
index_data = index_us_stock_index_DJI
|
| 383 |
+
elif symbol in sp500_stocks['Symbol'].values or force_index == 3:
|
| 384 |
+
index_code = ".INX"
|
| 385 |
+
index_data = index_us_stock_index_INX
|
| 386 |
+
elif symbol in nasdaq_composite_stocks["Symbol"].values or symbol is None or symbol == "" or force_index == 4:
|
| 387 |
+
index_code = ".IXIC"
|
| 388 |
+
index_data = index_us_stock_index_IXIC
|
| 389 |
+
else:
|
| 390 |
+
index_code = ".IXIC"
|
| 391 |
+
index_data = index_us_stock_index_IXIC
|
| 392 |
+
|
| 393 |
+
# 获取当前日期
|
| 394 |
+
current_date = datetime.now()
|
| 395 |
+
|
| 396 |
+
# 计算 start_date 和 end_date
|
| 397 |
+
start_date = (current_date - timedelta(weeks=8)).strftime("%Y-%m-%d")
|
| 398 |
+
end_date = current_date.strftime("%Y-%m-%d")
|
| 399 |
+
|
| 400 |
+
if index_data is None or index_data.empty:
|
| 401 |
+
# 如果全局数据为空,尝试实时获取
|
| 402 |
+
print(f"Index data for {index_code} is empty, fetching real-time data...")
|
| 403 |
+
try:
|
| 404 |
+
# 映射到yfinance符号
|
| 405 |
+
yf_symbol_map = {
|
| 406 |
+
'.INX': '^GSPC',
|
| 407 |
+
'.DJI': '^DJI',
|
| 408 |
+
'.IXIC': '^IXIC',
|
| 409 |
+
'.NDX': '^NDX'
|
| 410 |
+
}
|
| 411 |
+
yf_symbol = yf_symbol_map.get(index_code, '^IXIC')
|
| 412 |
+
|
| 413 |
+
ticker = yf.Ticker(yf_symbol)
|
| 414 |
+
hist_data = ticker.history(start=start_date, end=end_date)
|
| 415 |
+
|
| 416 |
+
if not hist_data.empty:
|
| 417 |
+
index_data = pd.DataFrame({
|
| 418 |
+
'date': hist_data.index.strftime('%Y-%m-%d'),
|
| 419 |
+
'开盘': hist_data['Open'].values,
|
| 420 |
+
'收盘': hist_data['Close'].values,
|
| 421 |
+
'最高': hist_data['High'].values,
|
| 422 |
+
'最低': hist_data['Low'].values,
|
| 423 |
+
'成交量': hist_data['Volume'].values,
|
| 424 |
+
'成交额': (hist_data['Close'] * hist_data['Volume']).values
|
| 425 |
+
})
|
| 426 |
+
else:
|
| 427 |
+
# 返回空数据
|
| 428 |
+
date_range = pd.date_range(start=start_date, end=end_date)
|
| 429 |
+
index_data = pd.DataFrame({
|
| 430 |
+
'date': date_range.strftime('%Y-%m-%d'),
|
| 431 |
+
'开盘': 0, '收盘': 0, '最高': 0, '最低': 0, '成交量': 0, '成交额': 0
|
| 432 |
+
})
|
| 433 |
+
except Exception as e:
|
| 434 |
+
print(f"Error fetching real-time index data: {e}")
|
| 435 |
+
# 返回空数据
|
| 436 |
+
date_range = pd.date_range(start=start_date, end=end_date)
|
| 437 |
+
index_data = pd.DataFrame({
|
| 438 |
+
'date': date_range.strftime('%Y-%m-%d'),
|
| 439 |
+
'开盘': 0, '收盘': 0, '最高': 0, '最低': 0, '成交量': 0, '成交额': 0
|
| 440 |
+
})
|
| 441 |
+
|
| 442 |
+
# 确保 index_data['date'] 是 datetime 类型
|
| 443 |
+
index_data['date'] = pd.to_datetime(index_data['date'])
|
| 444 |
+
|
| 445 |
+
# 从指数历史数据中提取指定日期范围的数据
|
| 446 |
+
index_hist_df = index_data[(index_data['date'] >= start_date) & (index_data['date'] <= end_date)]
|
| 447 |
+
|
| 448 |
+
# 统一列名
|
| 449 |
+
index_hist_df = index_hist_df.rename(columns=column_mapping)
|
| 450 |
+
index_hist_df = index_hist_df.reindex(columns=standard_columns)
|
| 451 |
+
# 处理个股数据,保留所需列
|
| 452 |
+
index_hist_df = reduce_columns(index_hist_df, standard_columns)
|
| 453 |
+
return index_hist_df
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
def find_stock_codes_or_names(entities):
|
| 457 |
+
"""
|
| 458 |
+
从给定的实体列表中检索股票代码或公司名称。
|
| 459 |
+
"""
|
| 460 |
+
stock_codes = set()
|
| 461 |
+
|
| 462 |
+
# 合并所有股票字典并清理数据,确保都是字符串
|
| 463 |
+
all_symbols = pd.concat([nasdaq_100_stocks['Symbol'],
|
| 464 |
+
dow_jones_stocks['Symbol'],
|
| 465 |
+
sp500_stocks['Symbol'],
|
| 466 |
+
nasdaq_composite_stocks['Symbol']]).dropna().astype(str).unique().tolist()
|
| 467 |
+
|
| 468 |
+
all_names = pd.concat([nasdaq_100_stocks['Name'],
|
| 469 |
+
nasdaq_composite_stocks['Name'],
|
| 470 |
+
sp500_stocks['Security'],
|
| 471 |
+
dow_jones_stocks['Company']]).dropna().astype(str).unique().tolist()
|
| 472 |
+
|
| 473 |
+
# 创建一个 Name 到 Symbol 的映射
|
| 474 |
+
name_to_symbol = {}
|
| 475 |
+
for idx, name in enumerate(all_names):
|
| 476 |
+
if idx < len(all_symbols):
|
| 477 |
+
symbol = all_symbols[idx]
|
| 478 |
+
name_to_symbol[name.lower()] = symbol
|
| 479 |
+
|
| 480 |
+
# 查找实体映射到的股票代码
|
| 481 |
+
for entity, entity_type in entities:
|
| 482 |
+
entity_lower = entity.lower()
|
| 483 |
+
entity_upper = entity.upper()
|
| 484 |
+
|
| 485 |
+
# 检查 Symbol 列
|
| 486 |
+
if entity_upper in all_symbols:
|
| 487 |
+
stock_codes.add(entity_upper)
|
| 488 |
+
|
| 489 |
+
# 检查 Name 列,确保完整匹配而不是部分匹配
|
| 490 |
+
for name, symbol in name_to_symbol.items():
|
| 491 |
+
# 使用正则表达式进行严格匹配
|
| 492 |
+
pattern = rf'\b{re.escape(entity_lower)}\b'
|
| 493 |
+
if re.search(pattern, name):
|
| 494 |
+
stock_codes.add(symbol.upper())
|
| 495 |
+
|
| 496 |
+
if not stock_codes:
|
| 497 |
+
return ['NONE_SYMBOL_FOUND']
|
| 498 |
+
return list(stock_codes)
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def process_history(stock_history, target_date, history_days=30, following_days=3):
|
| 502 |
+
# 检查数据是否为空
|
| 503 |
+
if stock_history.empty:
|
| 504 |
+
return create_empty_data(history_days), create_empty_data(following_days)
|
| 505 |
+
|
| 506 |
+
# 确保日期列存在并转换为datetime格式
|
| 507 |
+
if 'date' not in stock_history.columns:
|
| 508 |
+
return create_empty_data(history_days), create_empty_data(following_days)
|
| 509 |
+
|
| 510 |
+
stock_history['date'] = pd.to_datetime(stock_history['date'])
|
| 511 |
+
target_date = pd.to_datetime(target_date)
|
| 512 |
+
|
| 513 |
+
# 按日期升序排序
|
| 514 |
+
stock_history = stock_history.sort_values('date')
|
| 515 |
+
|
| 516 |
+
# 找到目标日期对应的索引
|
| 517 |
+
target_row = stock_history[stock_history['date'] <= target_date]
|
| 518 |
+
if target_row.empty:
|
| 519 |
+
return create_empty_data(history_days), create_empty_data(following_days)
|
| 520 |
+
|
| 521 |
+
# 获取目标日期最近的行
|
| 522 |
+
target_index = target_row.index[-1]
|
| 523 |
+
target_pos = stock_history.index.get_loc(target_index)
|
| 524 |
+
|
| 525 |
+
# 获取历史数据(包括目标日期)
|
| 526 |
+
start_pos = max(0, target_pos - history_days + 1)
|
| 527 |
+
previous_rows = stock_history.iloc[start_pos:target_pos + 1]
|
| 528 |
+
|
| 529 |
+
# 获取后续数据
|
| 530 |
+
following_rows = stock_history.iloc[target_pos + 1:target_pos + following_days + 1]
|
| 531 |
+
|
| 532 |
+
# 删除日期列并确保数据完整性
|
| 533 |
+
previous_rows = previous_rows.drop(columns=['date'])
|
| 534 |
+
following_rows = following_rows.drop(columns=['date'])
|
| 535 |
+
|
| 536 |
+
# 处理数据不足的情况
|
| 537 |
+
previous_rows = handle_insufficient_data(previous_rows, history_days)
|
| 538 |
+
following_rows = handle_insufficient_data(following_rows, following_days)
|
| 539 |
+
|
| 540 |
+
return previous_rows.iloc[:, :6], following_rows.iloc[:, :6]
|
| 541 |
+
|
| 542 |
+
|
| 543 |
+
def create_empty_data(days):
|
| 544 |
+
return pd.DataFrame({
|
| 545 |
+
'开盘': [-1] * days,
|
| 546 |
+
'收盘': [-1] * days,
|
| 547 |
+
'最高': [-1] * days,
|
| 548 |
+
'最低': [-1] * days,
|
| 549 |
+
'成交量': [-1] * days,
|
| 550 |
+
'成交额': [-1] * days
|
| 551 |
+
})
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
def handle_insufficient_data(data, required_days):
|
| 555 |
+
current_rows = len(data)
|
| 556 |
+
if current_rows < required_days:
|
| 557 |
+
missing_rows = required_days - current_rows
|
| 558 |
+
empty_data = create_empty_data(missing_rows)
|
| 559 |
+
return pd.concat([empty_data, data]).reset_index(drop=True)
|
| 560 |
+
return data
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
if __name__ == "__main__":
|
| 564 |
+
# 测试函数
|
| 565 |
+
result = find_stock_entry('AAPL')
|
| 566 |
+
print(f"find_stock_entry: {result}")
|
| 567 |
+
result = get_stock_history('AAPL', '20240214')
|
| 568 |
+
print(f"get_stock_history: {result}")
|
| 569 |
+
result = get_stock_index_history('AAPL', '20240214')
|
| 570 |
+
print(f"get_stock_index_history: {result}")
|
| 571 |
+
result = find_stock_codes_or_names([('苹果', 'ORG'), ('苹果公司', 'ORG')])
|
| 572 |
+
print(f"find_stock_codes_or_names: {result}")
|
| 573 |
+
result = process_history(get_stock_history('AAPL', '20240214'), '20240214')
|
| 574 |
+
print(f"process_history: {result}")
|
| 575 |
+
result = process_history(get_stock_index_history('AAPL', '20240214'), '20240214')
|
| 576 |
+
print(f"process_history: {result}")
|
| 577 |
+
pass
|