Skip to content

Commit 50c32ac

Browse files
authored
refactor(data_collector): use akshare to build unified trade calendar (#2093)
* refactor(data_collector): use akshare to build unified trade calendar * fix: github action failure caused by black upgrade
1 parent 80982f8 commit 50c32ac

File tree

3 files changed

+13
-25
lines changed

3 files changed

+13
-25
lines changed

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ rl = [
6969
"torch",
7070
"numpy<2.0.0",
7171
]
72+
# We exclude black version 26.1.0 due to known issues with nbqa when formatting Jupyter notebooks,
73+
# which can cause false-positive --check results and inconsistent notebook formatting.
7274
lint = [
73-
"black",
75+
"black!=26.1.0",
7476
"pylint",
7577
"mypy<1.5.0",
7678
"flake8",

scripts/data_collector/utils.py

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import time
88
import bisect
99
import pickle
10-
import random
1110
import requests
1211
import functools
1312
from pathlib import Path
@@ -80,28 +79,14 @@ def _get_calendar(url):
8079
calendar = df.index.get_level_values(level="date").map(pd.Timestamp).unique().tolist()
8180
else:
8281
if bench_code.upper() == "ALL":
83-
84-
@deco_retry
85-
def _get_calendar_from_month(month):
86-
_cal = []
87-
try:
88-
resp = requests.get(
89-
SZSE_CALENDAR_URL.format(month=month, random=random.random), timeout=None
90-
).json()
91-
for _r in resp["data"]:
92-
if int(_r["jybz"]):
93-
_cal.append(pd.Timestamp(_r["jyrq"]))
94-
except Exception as e:
95-
raise ValueError(f"{month}-->{e}") from e
96-
return _cal
97-
98-
month_range = pd.date_range(start="2000-01", end=pd.Timestamp.now() + pd.Timedelta(days=31), freq="M")
99-
calendar = []
100-
for _m in month_range:
101-
cal = _get_calendar_from_month(_m.strftime("%Y-%m"))
102-
if cal:
103-
calendar += cal
104-
calendar = list(filter(lambda x: x <= pd.Timestamp.now(), calendar))
82+
import akshare as ak # pylint: disable=C0415
83+
84+
trade_date_df = ak.tool_trade_date_hist_sina()
85+
trade_date_list = trade_date_df["trade_date"].tolist()
86+
trade_date_list = [pd.Timestamp(d) for d in trade_date_list]
87+
dates = pd.DatetimeIndex(trade_date_list)
88+
filtered_dates = dates[(dates >= "2000-01-04") & (dates <= pd.Timestamp.today().normalize())]
89+
calendar = filtered_dates.tolist()
10590
else:
10691
calendar = _get_calendar(CALENDAR_BENCH_URL_MAP[bench_code])
10792
_CALENDAR_MAP[bench_code] = calendar

scripts/data_collector/yahoo/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ yahooquery
99
joblib
1010
beautifulsoup4
1111
bs4
12-
soupsieve
12+
soupsieve
13+
akshare

0 commit comments

Comments
 (0)