From 4a5c7ac19e934d4010e66720b0c388dfadb605ac Mon Sep 17 00:00:00 2001 From: NMC-DAVE Date: Sat, 21 Aug 2021 23:23:02 +0800 Subject: [PATCH] Add get_filenames and get_sub_grid functions. --- examples/retrieve_mdfs_model.ipynb | 34 +++-- nmc_met_io/__init__.py | 2 +- nmc_met_io/retrieve_cmadaas.py | 33 +++-- nmc_met_io/util.py | 202 ++++++++++++++++++++++++++++- 4 files changed, 245 insertions(+), 26 deletions(-) diff --git a/examples/retrieve_mdfs_model.ipynb b/examples/retrieve_mdfs_model.ipynb index c00f2bd..1042e32 100644 --- a/examples/retrieve_mdfs_model.ipynb +++ b/examples/retrieve_mdfs_model.ipynb @@ -1,6 +1,8 @@ { "cells": [ { + "cell_type": "markdown", + "metadata": {}, "source": [ "# 从MICAPS Cassandra Server数据库读取数值模式数据\n", "\n", @@ -27,9 +29,7 @@ "* https://github.com/nmcdev/nmc_met_io\n", "* http://www.micaps.cn/MifunForum/topic/list?fId=7\n", "---" - ], - "cell_type": "markdown", - "metadata": {} + ] }, { "cell_type": "markdown", @@ -63,14 +63,14 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "" ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 2 + "output_type": "execute_result" } ], "source": [ @@ -129,10 +129,23 @@ "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "\nDimensions: (lat: 301, level: 1, lon: 651, time: 1)\nCoordinates:\n * time (time) datetime64[ns] 2021-05-09T08:00:00\n * level (level) float32 850.0\n * lat (lat) float64 6.735e-06 0.2 0.4 ... 59.6 59.8 60.0\n * lon (lon) float64 50.0 50.2 50.4 ... 179.6 179.8 180.0\n forecast_reference_time datetime64[ns] 2021-05-08T08:00:00\n forecast_period (time) float64 24.0\nData variables:\n data (time, level, lat, lon) float32 19.18 ... -2.792\nAttributes:\n Conventions: CF-1.6\n Origin: MICAPS Cassandra Server\n" + "\n", + "Dimensions: (lat: 301, level: 1, lon: 651, time: 1)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2021-05-09T08:00:00\n", + " * level (level) float32 850.0\n", + " * lat (lat) float64 6.735e-06 0.2 0.4 ... 59.6 59.8 60.0\n", + " * lon (lon) float64 50.0 50.2 50.4 ... 179.6 179.8 180.0\n", + " forecast_reference_time datetime64[ns] 2021-05-08T08:00:00\n", + " forecast_period (time) float64 24.0\n", + "Data variables:\n", + " data (time, level, lat, lon) float32 19.18 ... -2.792\n", + "Attributes:\n", + " Conventions: CF-1.6\n", + " Origin: MICAPS Cassandra Server\n" ] } ], @@ -526,8 +539,9 @@ "celltoolbar": "Initialization Cell", "hide_input": false, "kernelspec": { - "name": "python3710jvsc74a57bd0ce674488fc7389ca027fd99dc2eb5f5e35c93a4190347346ae1bacb3bb4e56a8", - "display_name": "Python 3.7.10 64-bit ('MET': conda)" + "display_name": "Meteorology with Python", + "language": "python", + "name": "met" }, "language_info": { "codemirror_mode": { @@ -566,4 +580,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/nmc_met_io/__init__.py b/nmc_met_io/__init__.py index 9724c20..62a7c0f 100644 --- a/nmc_met_io/__init__.py +++ b/nmc_met_io/__init__.py @@ -4,4 +4,4 @@ from various data sources. """ __author__ = "The R & D Center for Weather Forecasting Technology in NMC, CMA" -__version__ = '0.1.6' +__version__ = '0.1.7' diff --git a/nmc_met_io/retrieve_cmadaas.py b/nmc_met_io/retrieve_cmadaas.py index ae574c9..929152a 100644 --- a/nmc_met_io/retrieve_cmadaas.py +++ b/nmc_met_io/retrieve_cmadaas.py @@ -1070,6 +1070,7 @@ def cmadaas_obs_grid_by_time(time_str, limit=None, data_code="SURF_CMPA_FAST_5KM """ Retrieve surface analysis grid products, like CMPAS-V2.1融合降水分析实时数据产品(NC). For SURF_CMPA_RT_NC, this function will retrieve the 0.01 resolution data and take long time. + 该函数主要用于降水实况分析数据检索, 若需要温度、风的实况分析, 使用cmadaas_analysis_by_time. :param time_str: analysis time string, like "20171008000000", format: YYYYMMDDHHMISS :param limit: [min_lat, min_lon, max_lat, max_lon] @@ -1081,7 +1082,7 @@ def cmadaas_obs_grid_by_time(time_str, limit=None, data_code="SURF_CMPA_FAST_5KM "SURF_CMPA_FAST_5KM_DAY": CMPAS-V2.1融合降水分析快速数据日产品(GRIB,5km) "SURF_CMPA_FRT_5KM_DAY": CMPAS-V2.1融合降水分析实时数据日产品(GRIB,5km) :param fcst_ele: elements - :param zoom: the zoom out integer > 1, like 2. + :param zoom: the zoom out integer > 1, like 2. 像元缩小倍数, 数值大于1,1的整数倍. :param units: forecast element's units, defaults to retrieved units. :param scale_off: [scale, offset], return values = values*scale + offset. :param cache: cache retrieved data to local directory, default is True. @@ -1515,7 +1516,7 @@ def cmadaas_obs_by_years(data_code="SURF_CHN_YER_MMUT_19812010", def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ASI_NC', levattrs={'long_name':'Height above Ground', 'units':'m'}, level_type='-', - fcst_level=0, fcst_ele="TMP", zoom=None, units=None, scale_off=None, + fcst_level=None, fcst_ele="TMP", zoom=None, units=None, scale_off=None, cache=True, cache_clear=True): """ Retrieve CLDAS analysis data from CMADaaS service. @@ -1523,20 +1524,26 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ :param time_str: analysis time, like "20160817120000", format: YYYYMMDDHHMISS :param limit: [min_lat, min_lon, max_lat, max_lon] :param data_code: MUSIC data code, default is "NAFP_CLDAS2.0_NRT_ASI_NC" - CLDAS2.0近实时数据产品(NC)-亚洲区域. - :param fcst_level: vertical level, default is 2. - :param level_type: vertical level type, default is 999998 + CLDAS2.0近实时数据产品(NC)-亚洲区域. Others like: + NAFP_HRCLDAS_CHN_1KM_RT, HRCLDAS中国0.01°×0.01°逐小时实时融合实况分析产品 + :param fcst_level: vertical level, default is None. + :param level_type: vertical level type, default is -, 表示没有层次类型 :param fcst_ele: forecast element, default is 2m temperature "TAIR" - :param zoom: the zoom out integer > 1, like 2. + :param zoom: the zoom out integer > 1, like 2. 像元缩小倍数, 数值大于1,1的整数倍. :param units: forecast element's units, defaults to retrieved units. :param scale_off: [scale, offset], return values = values*scale + offset. :param cache: cache retrieved data to local directory, default is True. :return: xarray dataset. Examples: - >>> data = cmadaas_analysis_by_time("20210123000000", data_code="NAFP_CLDAS2.0_NRT_ASI_NC", - fcst_level=0, level_type='-', fcst_ele='TMP', units="C", - scale_off=[1.0, -273.15], cache=False) + >>> data = cmadaas_analysis_by_time( + "20210123000000", data_code="NAFP_CLDAS2.0_NRT_ASI_NC", + fcst_ele='TMP', units="C", scale_off=[1.0, -273.15], + cache=False) + >>> data = cmadaas_analysis_by_time( + "20210821020000", data_code="NAFP_HRCLDAS_CHN_1KM_RT", + fcst_ele='TAIR', units="C", + scale_off=[1.0, -273.15], cache=False) """ # retrieve data from cached file @@ -1556,7 +1563,6 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ params = {'dataCode': data_code, 'time': time_str, 'levelType': str(level_type).strip(), - 'fcstLevel': '{:d}'.format(fcst_level), 'fcstEle': fcst_ele} if zoom is not None: params['zoomOut'] = str(zoom) interface_id = 'getNafpAnaEleGridByTimeAndLevel' @@ -1568,9 +1574,10 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ "maxLat": '{:.10f}'.format(limit[2]), "maxLon": '{:.10f}'.format(limit[3]), 'levelType': str(level_type).strip(), - 'fcstLevel': '{:d}'.format(fcst_level), 'fcstEle': fcst_ele} interface_id = 'getNafpAnaEleGridInRectByTimeAndLevel' + if fcst_level is not None: + params['fcstLevel'] = '{:d}'.format(fcst_level) # retrieve data contents contents = get_rest_result(interface_id, params) @@ -1605,7 +1612,7 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ lat_coord = ('lat', lat, { 'long_name':'latitude', 'units':'degrees_north', '_CoordinateAxisType':'Lat', 'axis': 'Y'}) - if fcst_level != 0: + if fcst_level is not None: level_coord = ('level', np.array([fcst_level]), levattrs) varname = fcst_ele varattrs = {'long_name': name, 'units': units} @@ -1614,7 +1621,7 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ data = np.array(contents['DS'], dtype=np.float32) if scale_off is not None: data = data * scale_off[0] + scale_off[1] - if fcst_level == 0: + if fcst_level is None: data = data[np.newaxis, ...] data = xr.Dataset({ varname:(['time', 'lat', 'lon'], data, varattrs)}, diff --git a/nmc_met_io/util.py b/nmc_met_io/util.py index bd7adb8..8a2d63d 100644 --- a/nmc_met_io/util.py +++ b/nmc_met_io/util.py @@ -4,12 +4,163 @@ # Distributed under the terms of the GPL V3 License. +import os +import pathlib import warnings import re from datetime import datetime, timedelta import numpy as np +import pandas as pd +def product_filename(model=None, product=None, level=None, obs_time=None, + init_time=None, fhour=None, valid_time=None, + statistic=None, place=None, suffix=None, root_dir=None): + """ + Construct standard product file name, all parameters should not + include "_". + + :param model: model name. + :param product: product name. + :param level: vertical level. + :param obs_time: observation level. + :param init_time: model initial model time. + :param fhour: model forecast time. + :param valid_time: model forecast valid time. + :param statistic: statistic method name. + :param place: place or station name. + :param suffix: file suffix. + :param root_dir: file directory. + :return: product file name. + """ + + # define filename + filename = "" + + # model name + if model is not None: + filename = filename + "MD_" + str(model).strip().upper() + + # product name + if product is not None: + filename = filename + "_PD_" + str(product).strip() + + # vertical level + if level is not None: + filename = filename + "_LV_" + str(level).strip() + + # observation time + if obs_time is not None: + if isinstance(obs_time, datetime): + filename = filename + "_OT_" + obs_time.strftime("%Y%m%d%H") + elif isinstance(obs_time, np.datetime64): + filename = filename + "_OT_" + \ + pd.to_datetime(str(obs_time)).strftime("%Y%m%d%H") + else: + filename = filename + "_OT_" + str(obs_time).strip() + + # model initial time + if init_time is not None: + if isinstance(init_time, datetime): + filename = filename + "_IT_" + init_time.strftime("%Y%m%d%H") + elif isinstance(init_time, np.datetime64): + filename = filename + "_IT_" + \ + pd.to_datetime(str(init_time)).strftime("%Y%m%d%H") + else: + filename = filename + "_IT_" + str(init_time).strip() + + # model forecast hour + if fhour is not None: + filename = filename + "_FH_" + str(fhour).strip() + + # model valid time + if valid_time is not None: + if isinstance(valid_time, datetime): + filename = filename + "_VT_" + valid_time.strftime("%Y%m%d%H") + elif isinstance(valid_time, np.datetime64): + filename = filename + "_VT_" + \ + pd.to_datetime(str(valid_time)).strftime("%Y%m%d%H") + else: + filename = filename + "_VT_" + str(valid_time).strip() + + # statistic name + if statistic is not None: + filename = filename + "_SN_" + str(statistic).strip() + + # place name + if place is not None: + filename = filename + "_PN_" + str(place).strip() + + # remove the first "_" + if filename[0] == "_": + filename = filename[1:] + + # add suffix + if suffix is not None: + if suffix[0] == ".": + filename = filename + suffix + else: + filename = filename + "." + suffix + + # add root directory + if root_dir is not None: + filename = os.path.join(root_dir, filename) + + # return + return filename + + +def product_filename_retrieve(filename): + """ + Retrieve information from the standard product filename. + + :param filename: file name. + :return: filename information dictionary. + """ + + file_name = pathlib.PureWindowsPath(filename) + file_stem = file_name.stem.split("_") + return dict(zip(file_stem[0::2], file_stem[1::2])) + + +def get_fcst_times(validTime, initHours=[8, 20], format=None, + initStep=1, min_fhour=1, max_fhour=84): + """ + 获得某一预报时刻相应的不同起报时间, 预报时效序列. + + Args: + validTime (string or datetime): 预报时间. + initHours (int, optional): 模式的起报时刻, 例如 [8, 20]或[2,8,14,20] + format (string, optional): 预报时间为字符串时的格式. Defaults to None. + initStep (int, optional): 模式起报的时间间隔, 设置可以加快计算速度. Defaults to 1. + min_fhour (int, optional): minimum forecast hour. Defaults to 0. + max_fhour (int, optional): maximum forecast hour. Defaults to 84. + """ + + # convert to pd.Timestamp + if not isinstance(validTime, pd.Timestamp): + validTime = pd.to_datetime(validTime, format=format) + + # check initHour and find the first initial time + initHours = np.asarray(initHours) + if np.min(initHours) < 0 or np.max(initHours) > 23: + raise ValueError('initHours should be in 0-23') + fhours = [] + initTimes = [] + fhour = min_fhour + while fhour <= max_fhour: + initTime = validTime - pd.Timedelta(fhour, unit='hour') + if initTime.hour in initHours and fhour >= min_fhour: + fhours.append(fhour) + initTimes.append(initTime) + fhour += initStep + else: + fhour += 1 + + # return + return initTimes, fhours + + def get_filenames(initTime, fhours="0/72/3;", zeroFill=3): """ Construct filenames @@ -110,8 +261,9 @@ def get_initTime(iHours, delayHour=6, currentTime=None): break # return initial time - initTime = datetime(currentTime.year, currentTime.month, - currentTime.day, currentTime.hour) + initTime = datetime( + currentTime.year, currentTime.month, + currentTime.day, currentTime.hour) return initTime @@ -139,3 +291,49 @@ def get_sub_stations(data, limit=[70, 140, 8, 60], raise ValueError("'lon' or 'Lat' coordinates is not in data.") return subdata + +def get_sub_grid(gdata, glon, glat, limit, pad=0): + """ + Extract grid subset region. + + :param gdata: 2D grid data, [nlat, nlon] + :param glon: 1D or 2D array, longitude. + :param glat: 1D or 2D array, latitude. + :param limit: subset boundary, [lonmin, lonmax, latmin, latmax] + :param pad: pad for bounds. + :return: subset boundary index. + """ + + # add pad + limit = [limit[0]-pad, limit[1]+pad, + limit[2]-pad, limit[3]+pad] + + # convert to numpy array + lat = np.squeeze(np.asarray(glat)) + lon = np.squeeze(np.asarray(glon)) + gdata = np.squeeze(np.asarray(gdata)) + + # get 1D coordinates + if glon.ndim == 2: + lat = lat[:, 0] + lon = lon[0, :] + + # latitude lower and upper index + latli = np.argmin(np.abs(lat - limit[2])) + latui = np.argmin(np.abs(lat - limit[3])) + + # longitude lower and upper index + lonli = np.argmin(np.abs(lon - limit[0])) + lonui = np.argmin(np.abs(lon - limit[1])) + + # extract information + if glon.ndim == 2: + glon = glon[latli:latui+1, lonli:lonui+1] + glat = glat[latli:latui+1, lonli:lonui+1] + else: + glon = glon[lonli:lonui+1] + glat = glat[latli:latui+1] + gdata = gdata[latli:latui+1, lonli:lonui+1] + + # return subset boundary index + return gdata, glon, glat