Add get_hist_obs_daily function.

This commit is contained in:
NMC-DAVE 2021-08-07 00:07:47 +08:00
parent 257911cffb
commit fa721a2c21
2 changed files with 54 additions and 8 deletions

@ -547,7 +547,7 @@ def cmadaas_obs_in_rect_by_time(times, limit, data_code="SURF_CHN_MUL_HOR_N",
def cmadaas_obs_in_rect_by_time_range(time_range, limit, data_code="SURF_CHN_MUL_HOR_N",
sta_levels=None, ranges=None, order=None,
ranges=None, order=None,
count=None, trans_type=True,
elements="Station_Id_C,Datetime,Lat,Lon,TEM"):
"""
@ -557,8 +557,6 @@ def cmadaas_obs_in_rect_by_time_range(time_range, limit, data_code="SURF_CHN_MUL
time_range (str): time for retrieve, "[YYYYMMDDHHMISS,YYYYMMDDHHMISS]"
limit (list): map limits, [min_lat, min_lon, max_lat, max_lon]
data_code (str, optional): dataset code. Defaults to "SURF_CHN_MUL_HOR_N".
sta_levels (str, optional): station levels, seperated by ',',
like "011,012,013" for standard, base and general stations. Defaults to None.
ranges (str, optional): elements value ranges, seperated by ';'
range: (a,) is >a, [a,) is >=a, (,a) is <a, (,a] is <=a, (a,b) is >a & <b,
[a,b) is >=a & <b, (a,b] is >a & <=b, [a,b] is >=a & <=b
@ -593,7 +591,7 @@ def cmadaas_obs_in_rect_by_time_range(time_range, limit, data_code="SURF_CHN_MUL
'maxLat': '{:.10f}'.format(limit[2]),
'maxLon': '{:.10f}'.format(limit[3]),
'orderby': order if order is not None else "Datetime:ASC"}
if sta_levels is not None: params['staLevels'] = sta_levels
#if sta_levels is not None: params['staLevels'] = sta_levels # getSurfEleInRectByTimeRange not support this parameter.
if ranges is not None: params['eleValueRanges'] = ranges
if count is not None: params['limitCnt'] = str(count)

@ -21,7 +21,7 @@ def get_hist_obs_id(years=np.arange(2000, 2011, 1),
elements=None, sta_ids="54511"):
"""
Retrieve hitory observations for sta_ids.
大数据云平台上获取指定站点的地面观测数据. 由于大数据云平台对一次性检索有数量限制,
CMADaaS上获取指定站点的地面观测数据. 由于大数据云平台对一次性检索有数量限制,
因此先逐年下载, 然后再联接成一张观测记录表.
:
@ -64,7 +64,7 @@ def get_hist_obs_id(years=np.arange(2000, 2011, 1),
def get_accumulated_rainfall(time_range, data_code="SURF_CHN_MUL_HOR",
accumulated=True, limit=None):
"""
大数据云平台中国地面逐小时资料下载站点的逐小时降水观测, 并累加为一段时间的累积降水.
CMADaaS中国地面逐小时资料下载站点的逐小时降水观测, 并累加为一段时间的累积降水.
例如, 需要24h的累积降水, 用站点观测的PRE_24h有误差, 需要直接用PRE_1h来进行累积.
Args:
@ -75,7 +75,7 @@ def get_accumulated_rainfall(time_range, data_code="SURF_CHN_MUL_HOR",
"""
# 读入数据
elements = "Station_Name,Province,Station_Id_C,Lon,Lat,PRE_1h"
elements = "Station_Name,Province,Station_Id_C,Lon,Lat,Datetime,PRE_1h"
if limit is None:
df = cmadaas_obs_by_time_range(time_range, data_code=data_code, elements=elements)
else:
@ -88,4 +88,52 @@ def get_accumulated_rainfall(time_range, data_code="SURF_CHN_MUL_HOR",
df.rename(columns={"PRE_1h":"PRE"}, inplace=True)
# 返回计算值
return df
return df
def get_hist_obs_daily(years=np.arange(2000, 2011, 1),
data_code='SURF_CHN_MUL_DAY_N',
elements=None, limit=None,
sta_levels="011,012,013"):
"""
从CMADaaS上逐日的观测数据.
Args:
years (np.array, optional): years for historical data. Defaults to np.arange(2000, 2011, 1).
data_code (str, optional): dataset code. Defaults to 'SURF_CHN_MUL_DAY_N'.
elements ([type], optional): elements for retrieve, 'ele1, ele2, ...'. Defaults to None.
limit (tuple, optional): 指定返回数据的范围, [min_lat, min_lon, max_lat, max_lon]
sta_levels(str, optional): 指定返回的站点级别, 默认"011,012,013", 即国家基准气候站, 国家基本气象站, 国家一般气象站
若设为None值, 则返回全部级别的站点. 如果设置了limit, 该参数则不起作用.
Returns:
dataframe: station obervation records.
"""
# check elements
if elements is None:
elements = 'Station_Id_d,Station_levl,Station_Name,Province,Lat,Lon,Alti,Datetime,PRE_Time_0808,Q_PRE_Time_0808'
# loop every yeas
data_list = []
tqdm_years = tqdm(years, desc="Years: ")
for year in tqdm_years:
start_time = str(year) + '0101000000'
end_time = str(year) + '1231230000'
time_range = "[" + start_time + "," + end_time + "]"
if limit is None:
df = cmadaas_obs_by_time_range(
time_range, data_code=data_code, elements=elements,
sta_levels=sta_levels)
else:
df = cmadaas_obs_in_rect_by_time_range(
time_range, limit, data_code=data_code, elements=elements)
if df is not None:
df = df.drop_duplicates()
data_list.append(df)
# concentrate dataframes
if len(data_list) == 0:
return None
else:
return pd.concat(data_list, axis=0, ignore_index=True)