Add get_filenames and get_sub_grid functions.

This commit is contained in:
NMC-DAVE 2021-08-21 23:23:02 +08:00
parent fa721a2c21
commit 4a5c7ac19e
4 changed files with 245 additions and 26 deletions

@ -1,6 +1,8 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 从MICAPS Cassandra Server数据库读取数值模式数据\n",
"\n",
@ -27,9 +29,7 @@
"* https://github.com/nmcdev/nmc_met_io\n",
"* http://www.micaps.cn/MifunForum/topic/list?fId=7\n",
"---"
],
"cell_type": "markdown",
"metadata": {}
]
},
{
"cell_type": "markdown",
@ -63,14 +63,14 @@
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<xarray.core.options.set_options at 0x7fef3958ab10>"
]
},
"execution_count": 2,
"metadata": {},
"execution_count": 2
"output_type": "execute_result"
}
],
"source": [
@ -129,10 +129,23 @@
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"<xarray.Dataset>\nDimensions: (lat: 301, level: 1, lon: 651, time: 1)\nCoordinates:\n * time (time) datetime64[ns] 2021-05-09T08:00:00\n * level (level) float32 850.0\n * lat (lat) float64 6.735e-06 0.2 0.4 ... 59.6 59.8 60.0\n * lon (lon) float64 50.0 50.2 50.4 ... 179.6 179.8 180.0\n forecast_reference_time datetime64[ns] 2021-05-08T08:00:00\n forecast_period (time) float64 24.0\nData variables:\n data (time, level, lat, lon) float32 19.18 ... -2.792\nAttributes:\n Conventions: CF-1.6\n Origin: MICAPS Cassandra Server\n"
"<xarray.Dataset>\n",
"Dimensions: (lat: 301, level: 1, lon: 651, time: 1)\n",
"Coordinates:\n",
" * time (time) datetime64[ns] 2021-05-09T08:00:00\n",
" * level (level) float32 850.0\n",
" * lat (lat) float64 6.735e-06 0.2 0.4 ... 59.6 59.8 60.0\n",
" * lon (lon) float64 50.0 50.2 50.4 ... 179.6 179.8 180.0\n",
" forecast_reference_time datetime64[ns] 2021-05-08T08:00:00\n",
" forecast_period (time) float64 24.0\n",
"Data variables:\n",
" data (time, level, lat, lon) float32 19.18 ... -2.792\n",
"Attributes:\n",
" Conventions: CF-1.6\n",
" Origin: MICAPS Cassandra Server\n"
]
}
],
@ -526,8 +539,9 @@
"celltoolbar": "Initialization Cell",
"hide_input": false,
"kernelspec": {
"name": "python3710jvsc74a57bd0ce674488fc7389ca027fd99dc2eb5f5e35c93a4190347346ae1bacb3bb4e56a8",
"display_name": "Python 3.7.10 64-bit ('MET': conda)"
"display_name": "Meteorology with Python",
"language": "python",
"name": "met"
},
"language_info": {
"codemirror_mode": {
@ -566,4 +580,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

@ -4,4 +4,4 @@ from various data sources.
"""
__author__ = "The R & D Center for Weather Forecasting Technology in NMC, CMA"
__version__ = '0.1.6'
__version__ = '0.1.7'

@ -1070,6 +1070,7 @@ def cmadaas_obs_grid_by_time(time_str, limit=None, data_code="SURF_CMPA_FAST_5KM
"""
Retrieve surface analysis grid products, like CMPAS-V2.1融合降水分析实时数据产品NC.
For SURF_CMPA_RT_NC, this function will retrieve the 0.01 resolution data and take long time.
该函数主要用于降水实况分析数据检索, 若需要温度风的实况分析, 使用cmadaas_analysis_by_time.
:param time_str: analysis time string, like "20171008000000", format: YYYYMMDDHHMISS
:param limit: [min_lat, min_lon, max_lat, max_lon]
@ -1081,7 +1082,7 @@ def cmadaas_obs_grid_by_time(time_str, limit=None, data_code="SURF_CMPA_FAST_5KM
"SURF_CMPA_FAST_5KM_DAY": CMPAS-V2.1融合降水分析快速数据日产品GRIB5km
"SURF_CMPA_FRT_5KM_DAY": CMPAS-V2.1融合降水分析实时数据日产品GRIB5km
:param fcst_ele: elements
:param zoom: the zoom out integer > 1, like 2.
:param zoom: the zoom out integer > 1, like 2. 像元缩小倍数, 数值大于11的整数倍.
:param units: forecast element's units, defaults to retrieved units.
:param scale_off: [scale, offset], return values = values*scale + offset.
:param cache: cache retrieved data to local directory, default is True.
@ -1515,7 +1516,7 @@ def cmadaas_obs_by_years(data_code="SURF_CHN_YER_MMUT_19812010",
def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ASI_NC',
levattrs={'long_name':'Height above Ground', 'units':'m'}, level_type='-',
fcst_level=0, fcst_ele="TMP", zoom=None, units=None, scale_off=None,
fcst_level=None, fcst_ele="TMP", zoom=None, units=None, scale_off=None,
cache=True, cache_clear=True):
"""
Retrieve CLDAS analysis data from CMADaaS service.
@ -1523,20 +1524,26 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
:param time_str: analysis time, like "20160817120000", format: YYYYMMDDHHMISS
:param limit: [min_lat, min_lon, max_lat, max_lon]
:param data_code: MUSIC data code, default is "NAFP_CLDAS2.0_NRT_ASI_NC"
CLDAS2.0近实时数据产品NC-亚洲区域.
:param fcst_level: vertical level, default is 2.
:param level_type: vertical level type, default is 999998
CLDAS2.0近实时数据产品NC-亚洲区域. Others like:
NAFP_HRCLDAS_CHN_1KM_RT, HRCLDAS中国0.01°×0.01°逐小时实时融合实况分析产品
:param fcst_level: vertical level, default is None.
:param level_type: vertical level type, default is -, 表示没有层次类型
:param fcst_ele: forecast element, default is 2m temperature "TAIR"
:param zoom: the zoom out integer > 1, like 2.
:param zoom: the zoom out integer > 1, like 2. 像元缩小倍数, 数值大于11的整数倍.
:param units: forecast element's units, defaults to retrieved units.
:param scale_off: [scale, offset], return values = values*scale + offset.
:param cache: cache retrieved data to local directory, default is True.
:return: xarray dataset.
Examples:
>>> data = cmadaas_analysis_by_time("20210123000000", data_code="NAFP_CLDAS2.0_NRT_ASI_NC",
fcst_level=0, level_type='-', fcst_ele='TMP', units="C",
scale_off=[1.0, -273.15], cache=False)
>>> data = cmadaas_analysis_by_time(
"20210123000000", data_code="NAFP_CLDAS2.0_NRT_ASI_NC",
fcst_ele='TMP', units="C", scale_off=[1.0, -273.15],
cache=False)
>>> data = cmadaas_analysis_by_time(
"20210821020000", data_code="NAFP_HRCLDAS_CHN_1KM_RT",
fcst_ele='TAIR', units="C",
scale_off=[1.0, -273.15], cache=False)
"""
# retrieve data from cached file
@ -1556,7 +1563,6 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
params = {'dataCode': data_code,
'time': time_str,
'levelType': str(level_type).strip(),
'fcstLevel': '{:d}'.format(fcst_level),
'fcstEle': fcst_ele}
if zoom is not None: params['zoomOut'] = str(zoom)
interface_id = 'getNafpAnaEleGridByTimeAndLevel'
@ -1568,9 +1574,10 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
"maxLat": '{:.10f}'.format(limit[2]),
"maxLon": '{:.10f}'.format(limit[3]),
'levelType': str(level_type).strip(),
'fcstLevel': '{:d}'.format(fcst_level),
'fcstEle': fcst_ele}
interface_id = 'getNafpAnaEleGridInRectByTimeAndLevel'
if fcst_level is not None:
params['fcstLevel'] = '{:d}'.format(fcst_level)
# retrieve data contents
contents = get_rest_result(interface_id, params)
@ -1605,7 +1612,7 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
lat_coord = ('lat', lat, {
'long_name':'latitude', 'units':'degrees_north',
'_CoordinateAxisType':'Lat', 'axis': 'Y'})
if fcst_level != 0:
if fcst_level is not None:
level_coord = ('level', np.array([fcst_level]), levattrs)
varname = fcst_ele
varattrs = {'long_name': name, 'units': units}
@ -1614,7 +1621,7 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
data = np.array(contents['DS'], dtype=np.float32)
if scale_off is not None:
data = data * scale_off[0] + scale_off[1]
if fcst_level == 0:
if fcst_level is None:
data = data[np.newaxis, ...]
data = xr.Dataset({
varname:(['time', 'lat', 'lon'], data, varattrs)},

@ -4,12 +4,163 @@
# Distributed under the terms of the GPL V3 License.
import os
import pathlib
import warnings
import re
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
def product_filename(model=None, product=None, level=None, obs_time=None,
init_time=None, fhour=None, valid_time=None,
statistic=None, place=None, suffix=None, root_dir=None):
"""
Construct standard product file name, all parameters should not
include "_".
:param model: model name.
:param product: product name.
:param level: vertical level.
:param obs_time: observation level.
:param init_time: model initial model time.
:param fhour: model forecast time.
:param valid_time: model forecast valid time.
:param statistic: statistic method name.
:param place: place or station name.
:param suffix: file suffix.
:param root_dir: file directory.
:return: product file name.
"""
# define filename
filename = ""
# model name
if model is not None:
filename = filename + "MD_" + str(model).strip().upper()
# product name
if product is not None:
filename = filename + "_PD_" + str(product).strip()
# vertical level
if level is not None:
filename = filename + "_LV_" + str(level).strip()
# observation time
if obs_time is not None:
if isinstance(obs_time, datetime):
filename = filename + "_OT_" + obs_time.strftime("%Y%m%d%H")
elif isinstance(obs_time, np.datetime64):
filename = filename + "_OT_" + \
pd.to_datetime(str(obs_time)).strftime("%Y%m%d%H")
else:
filename = filename + "_OT_" + str(obs_time).strip()
# model initial time
if init_time is not None:
if isinstance(init_time, datetime):
filename = filename + "_IT_" + init_time.strftime("%Y%m%d%H")
elif isinstance(init_time, np.datetime64):
filename = filename + "_IT_" + \
pd.to_datetime(str(init_time)).strftime("%Y%m%d%H")
else:
filename = filename + "_IT_" + str(init_time).strip()
# model forecast hour
if fhour is not None:
filename = filename + "_FH_" + str(fhour).strip()
# model valid time
if valid_time is not None:
if isinstance(valid_time, datetime):
filename = filename + "_VT_" + valid_time.strftime("%Y%m%d%H")
elif isinstance(valid_time, np.datetime64):
filename = filename + "_VT_" + \
pd.to_datetime(str(valid_time)).strftime("%Y%m%d%H")
else:
filename = filename + "_VT_" + str(valid_time).strip()
# statistic name
if statistic is not None:
filename = filename + "_SN_" + str(statistic).strip()
# place name
if place is not None:
filename = filename + "_PN_" + str(place).strip()
# remove the first "_"
if filename[0] == "_":
filename = filename[1:]
# add suffix
if suffix is not None:
if suffix[0] == ".":
filename = filename + suffix
else:
filename = filename + "." + suffix
# add root directory
if root_dir is not None:
filename = os.path.join(root_dir, filename)
# return
return filename
def product_filename_retrieve(filename):
"""
Retrieve information from the standard product filename.
:param filename: file name.
:return: filename information dictionary.
"""
file_name = pathlib.PureWindowsPath(filename)
file_stem = file_name.stem.split("_")
return dict(zip(file_stem[0::2], file_stem[1::2]))
def get_fcst_times(validTime, initHours=[8, 20], format=None,
initStep=1, min_fhour=1, max_fhour=84):
"""
获得某一预报时刻相应的不同起报时间, 预报时效序列.
Args:
validTime (string or datetime): 预报时间.
initHours (int, optional): 模式的起报时刻, 例如 [8, 20][2,8,14,20]
format (string, optional): 预报时间为字符串时的格式. Defaults to None.
initStep (int, optional): 模式起报的时间间隔, 设置可以加快计算速度. Defaults to 1.
min_fhour (int, optional): minimum forecast hour. Defaults to 0.
max_fhour (int, optional): maximum forecast hour. Defaults to 84.
"""
# convert to pd.Timestamp
if not isinstance(validTime, pd.Timestamp):
validTime = pd.to_datetime(validTime, format=format)
# check initHour and find the first initial time
initHours = np.asarray(initHours)
if np.min(initHours) < 0 or np.max(initHours) > 23:
raise ValueError('initHours should be in 0-23')
fhours = []
initTimes = []
fhour = min_fhour
while fhour <= max_fhour:
initTime = validTime - pd.Timedelta(fhour, unit='hour')
if initTime.hour in initHours and fhour >= min_fhour:
fhours.append(fhour)
initTimes.append(initTime)
fhour += initStep
else:
fhour += 1
# return
return initTimes, fhours
def get_filenames(initTime, fhours="0/72/3;", zeroFill=3):
"""
Construct filenames
@ -110,8 +261,9 @@ def get_initTime(iHours, delayHour=6, currentTime=None):
break
# return initial time
initTime = datetime(currentTime.year, currentTime.month,
currentTime.day, currentTime.hour)
initTime = datetime(
currentTime.year, currentTime.month,
currentTime.day, currentTime.hour)
return initTime
@ -139,3 +291,49 @@ def get_sub_stations(data, limit=[70, 140, 8, 60],
raise ValueError("'lon' or 'Lat' coordinates is not in data.")
return subdata
def get_sub_grid(gdata, glon, glat, limit, pad=0):
"""
Extract grid subset region.
:param gdata: 2D grid data, [nlat, nlon]
:param glon: 1D or 2D array, longitude.
:param glat: 1D or 2D array, latitude.
:param limit: subset boundary, [lonmin, lonmax, latmin, latmax]
:param pad: pad for bounds.
:return: subset boundary index.
"""
# add pad
limit = [limit[0]-pad, limit[1]+pad,
limit[2]-pad, limit[3]+pad]
# convert to numpy array
lat = np.squeeze(np.asarray(glat))
lon = np.squeeze(np.asarray(glon))
gdata = np.squeeze(np.asarray(gdata))
# get 1D coordinates
if glon.ndim == 2:
lat = lat[:, 0]
lon = lon[0, :]
# latitude lower and upper index
latli = np.argmin(np.abs(lat - limit[2]))
latui = np.argmin(np.abs(lat - limit[3]))
# longitude lower and upper index
lonli = np.argmin(np.abs(lon - limit[0]))
lonui = np.argmin(np.abs(lon - limit[1]))
# extract information
if glon.ndim == 2:
glon = glon[latli:latui+1, lonli:lonui+1]
glat = glat[latli:latui+1, lonli:lonui+1]
else:
glon = glon[lonli:lonui+1]
glat = glat[latli:latui+1]
gdata = gdata[latli:latui+1, lonli:lonui+1]
# return subset boundary index
return gdata, glon, glat