From 4a5c7ac19e934d4010e66720b0c388dfadb605ac Mon Sep 17 00:00:00 2001
From: NMC-DAVE <daikan1998@163.com>
Date: Sat, 21 Aug 2021 23:23:02 +0800
Subject: [PATCH] Add get_filenames and get_sub_grid functions.

---
 examples/retrieve_mdfs_model.ipynb |  34 +++--
 nmc_met_io/__init__.py             |   2 +-
 nmc_met_io/retrieve_cmadaas.py     |  33 +++--
 nmc_met_io/util.py                 | 202 ++++++++++++++++++++++++++++-
 4 files changed, 245 insertions(+), 26 deletions(-)

diff --git a/examples/retrieve_mdfs_model.ipynb b/examples/retrieve_mdfs_model.ipynb
index c00f2bd..1042e32 100644
--- a/examples/retrieve_mdfs_model.ipynb
+++ b/examples/retrieve_mdfs_model.ipynb
@@ -1,6 +1,8 @@
 {
  "cells": [
   {
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "# 从MICAPS Cassandra Server数据库读取数值模式数据\n",
     "\n",
@@ -27,9 +29,7 @@
     "* https://github.com/nmcdev/nmc_met_io\n",
     "* http://www.micaps.cn/MifunForum/topic/list?fId=7\n",
     "---"
-   ],
-   "cell_type": "markdown",
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
@@ -63,14 +63,14 @@
    },
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "<xarray.core.options.set_options at 0x7fef3958ab10>"
       ]
      },
+     "execution_count": 2,
      "metadata": {},
-     "execution_count": 2
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -129,10 +129,23 @@
    "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
-      "<xarray.Dataset>\nDimensions:                  (lat: 301, level: 1, lon: 651, time: 1)\nCoordinates:\n  * time                     (time) datetime64[ns] 2021-05-09T08:00:00\n  * level                    (level) float32 850.0\n  * lat                      (lat) float64 6.735e-06 0.2 0.4 ... 59.6 59.8 60.0\n  * lon                      (lon) float64 50.0 50.2 50.4 ... 179.6 179.8 180.0\n    forecast_reference_time  datetime64[ns] 2021-05-08T08:00:00\n    forecast_period          (time) float64 24.0\nData variables:\n    data                     (time, level, lat, lon) float32 19.18 ... -2.792\nAttributes:\n    Conventions:  CF-1.6\n    Origin:       MICAPS Cassandra Server\n"
+      "<xarray.Dataset>\n",
+      "Dimensions:                  (lat: 301, level: 1, lon: 651, time: 1)\n",
+      "Coordinates:\n",
+      "  * time                     (time) datetime64[ns] 2021-05-09T08:00:00\n",
+      "  * level                    (level) float32 850.0\n",
+      "  * lat                      (lat) float64 6.735e-06 0.2 0.4 ... 59.6 59.8 60.0\n",
+      "  * lon                      (lon) float64 50.0 50.2 50.4 ... 179.6 179.8 180.0\n",
+      "    forecast_reference_time  datetime64[ns] 2021-05-08T08:00:00\n",
+      "    forecast_period          (time) float64 24.0\n",
+      "Data variables:\n",
+      "    data                     (time, level, lat, lon) float32 19.18 ... -2.792\n",
+      "Attributes:\n",
+      "    Conventions:  CF-1.6\n",
+      "    Origin:       MICAPS Cassandra Server\n"
      ]
     }
    ],
@@ -526,8 +539,9 @@
   "celltoolbar": "Initialization Cell",
   "hide_input": false,
   "kernelspec": {
-   "name": "python3710jvsc74a57bd0ce674488fc7389ca027fd99dc2eb5f5e35c93a4190347346ae1bacb3bb4e56a8",
-   "display_name": "Python 3.7.10 64-bit ('MET': conda)"
+   "display_name": "Meteorology with Python",
+   "language": "python",
+   "name": "met"
   },
   "language_info": {
    "codemirror_mode": {
@@ -566,4 +580,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/nmc_met_io/__init__.py b/nmc_met_io/__init__.py
index 9724c20..62a7c0f 100644
--- a/nmc_met_io/__init__.py
+++ b/nmc_met_io/__init__.py
@@ -4,4 +4,4 @@ from various data sources.
 """
 
 __author__ = "The R & D Center for Weather Forecasting Technology in NMC, CMA"
-__version__ = '0.1.6'
+__version__ = '0.1.7'
diff --git a/nmc_met_io/retrieve_cmadaas.py b/nmc_met_io/retrieve_cmadaas.py
index ae574c9..929152a 100644
--- a/nmc_met_io/retrieve_cmadaas.py
+++ b/nmc_met_io/retrieve_cmadaas.py
@@ -1070,6 +1070,7 @@ def cmadaas_obs_grid_by_time(time_str, limit=None, data_code="SURF_CMPA_FAST_5KM
     """
     Retrieve surface analysis grid products, like CMPAS-V2.1融合降水分析实时数据产品（NC）.
     For SURF_CMPA_RT_NC, this function will retrieve the 0.01 resolution data and take long time.
+    该函数主要用于降水实况分析数据检索, 若需要温度、风的实况分析, 使用cmadaas_analysis_by_time.
 
     :param time_str: analysis time string, like "20171008000000", format: YYYYMMDDHHMISS
     :param limit: [min_lat, min_lon, max_lat, max_lon]
@@ -1081,7 +1082,7 @@ def cmadaas_obs_grid_by_time(time_str, limit=None, data_code="SURF_CMPA_FAST_5KM
         "SURF_CMPA_FAST_5KM_DAY": CMPAS-V2.1融合降水分析快速数据日产品（GRIB，5km）
         "SURF_CMPA_FRT_5KM_DAY": CMPAS-V2.1融合降水分析实时数据日产品（GRIB，5km）
     :param fcst_ele: elements
-    :param zoom: the zoom out integer > 1, like 2.
+    :param zoom: the zoom out integer > 1, like 2. 像元缩小倍数, 数值大于1，1的整数倍.
     :param units: forecast element's units, defaults to retrieved units.
     :param scale_off: [scale, offset], return values = values*scale + offset.
     :param cache: cache retrieved data to local directory, default is True.
@@ -1515,7 +1516,7 @@ def cmadaas_obs_by_years(data_code="SURF_CHN_YER_MMUT_19812010",
 
 def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_ASI_NC',
                              levattrs={'long_name':'Height above Ground', 'units':'m'}, level_type='-',
-                             fcst_level=0, fcst_ele="TMP", zoom=None, units=None, scale_off=None,
+                             fcst_level=None, fcst_ele="TMP", zoom=None, units=None, scale_off=None,
                              cache=True, cache_clear=True):
     """
     Retrieve CLDAS analysis data from CMADaaS service.
@@ -1523,20 +1524,26 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
     :param time_str: analysis time, like "20160817120000", format: YYYYMMDDHHMISS
     :param limit: [min_lat, min_lon, max_lat, max_lon]
     :param data_code: MUSIC data code, default is "NAFP_CLDAS2.0_NRT_ASI_NC"
-                      CLDAS2.0近实时数据产品（NC）-亚洲区域.
-    :param fcst_level: vertical level, default is 2.
-    :param level_type: vertical level type, default is 999998
+                      CLDAS2.0近实时数据产品（NC）-亚洲区域. Others like:
+                      NAFP_HRCLDAS_CHN_1KM_RT, HRCLDAS中国0.01°×0.01°逐小时实时融合实况分析产品
+    :param fcst_level: vertical level, default is None.
+    :param level_type: vertical level type, default is -, 表示没有层次类型
     :param fcst_ele: forecast element, default is 2m temperature "TAIR"
-    :param zoom: the zoom out integer > 1, like 2.
+    :param zoom: the zoom out integer > 1, like 2. 像元缩小倍数, 数值大于1，1的整数倍.
     :param units: forecast element's units, defaults to retrieved units.
     :param scale_off: [scale, offset], return values = values*scale + offset.
     :param cache: cache retrieved data to local directory, default is True.
     :return: xarray dataset.
 
     Examples:
-    >>> data = cmadaas_analysis_by_time("20210123000000", data_code="NAFP_CLDAS2.0_NRT_ASI_NC", 
-                                        fcst_level=0, level_type='-', fcst_ele='TMP', units="C",
-                                        scale_off=[1.0, -273.15], cache=False)
+    >>> data = cmadaas_analysis_by_time(
+            "20210123000000", data_code="NAFP_CLDAS2.0_NRT_ASI_NC", 
+            fcst_ele='TMP', units="C", scale_off=[1.0, -273.15],
+            cache=False)
+    >>> data = cmadaas_analysis_by_time(
+            "20210821020000", data_code="NAFP_HRCLDAS_CHN_1KM_RT", 
+            fcst_ele='TAIR', units="C",
+            scale_off=[1.0, -273.15], cache=False)
     """
 
     # retrieve data from cached file
@@ -1556,7 +1563,6 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
         params = {'dataCode': data_code,
                   'time': time_str,
                   'levelType': str(level_type).strip(),
-                  'fcstLevel': '{:d}'.format(fcst_level),
                   'fcstEle': fcst_ele}
         if zoom is not None: params['zoomOut'] = str(zoom)
         interface_id = 'getNafpAnaEleGridByTimeAndLevel'
@@ -1568,9 +1574,10 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
                   "maxLat": '{:.10f}'.format(limit[2]),
                   "maxLon": '{:.10f}'.format(limit[3]),
                   'levelType': str(level_type).strip(),
-                  'fcstLevel': '{:d}'.format(fcst_level),
                   'fcstEle': fcst_ele}
         interface_id = 'getNafpAnaEleGridInRectByTimeAndLevel'
+    if fcst_level is not None:
+        params['fcstLevel'] = '{:d}'.format(fcst_level)
 
     # retrieve data contents
     contents = get_rest_result(interface_id, params)
@@ -1605,7 +1612,7 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
     lat_coord = ('lat', lat, {
         'long_name':'latitude', 'units':'degrees_north',
         '_CoordinateAxisType':'Lat', 'axis': 'Y'})
-    if fcst_level != 0:
+    if fcst_level is not None:
         level_coord = ('level', np.array([fcst_level]), levattrs)
     varname = fcst_ele
     varattrs = {'long_name': name, 'units': units}
@@ -1614,7 +1621,7 @@ def cmadaas_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_NRT_
     data = np.array(contents['DS'], dtype=np.float32)
     if scale_off is not None:
         data = data * scale_off[0] + scale_off[1]
-    if fcst_level == 0:
+    if fcst_level is None:
         data = data[np.newaxis, ...]
         data = xr.Dataset({
             varname:(['time', 'lat', 'lon'], data, varattrs)},
diff --git a/nmc_met_io/util.py b/nmc_met_io/util.py
index bd7adb8..8a2d63d 100644
--- a/nmc_met_io/util.py
+++ b/nmc_met_io/util.py
@@ -4,12 +4,163 @@
 # Distributed under the terms of the GPL V3 License.
 
 
+import os
+import pathlib
 import warnings
 import re
 from datetime import datetime, timedelta
 import numpy as np
+import pandas as pd
 
 
+def product_filename(model=None, product=None, level=None, obs_time=None,
+                     init_time=None, fhour=None, valid_time=None,
+                     statistic=None, place=None, suffix=None, root_dir=None):
+    """
+    Construct standard product file name, all parameters should not
+    include "_".
+
+    :param model: model name.
+    :param product: product name.
+    :param level: vertical level.
+    :param obs_time: observation level.
+    :param init_time: model initial model time.
+    :param fhour: model forecast time.
+    :param valid_time: model forecast valid time.
+    :param statistic: statistic method name.
+    :param place: place or station name.
+    :param suffix: file suffix.
+    :param root_dir: file directory.
+    :return: product file name.
+    """
+
+    # define filename
+    filename = ""
+
+    # model name
+    if model is not None:
+        filename = filename + "MD_" + str(model).strip().upper()
+
+    # product name
+    if product is not None:
+        filename = filename + "_PD_" + str(product).strip()
+
+    # vertical level
+    if level is not None:
+        filename = filename + "_LV_" + str(level).strip()
+
+    # observation time
+    if obs_time is not None:
+        if isinstance(obs_time, datetime):
+            filename = filename + "_OT_" + obs_time.strftime("%Y%m%d%H")
+        elif isinstance(obs_time, np.datetime64):
+            filename = filename + "_OT_" + \
+                       pd.to_datetime(str(obs_time)).strftime("%Y%m%d%H")
+        else:
+            filename = filename + "_OT_" + str(obs_time).strip()
+
+    # model initial time
+    if init_time is not None:
+        if isinstance(init_time, datetime):
+            filename = filename + "_IT_" + init_time.strftime("%Y%m%d%H")
+        elif isinstance(init_time, np.datetime64):
+            filename = filename + "_IT_" + \
+                       pd.to_datetime(str(init_time)).strftime("%Y%m%d%H")
+        else:
+            filename = filename + "_IT_" + str(init_time).strip()
+
+    # model forecast hour
+    if fhour is not None:
+        filename = filename + "_FH_" + str(fhour).strip()
+
+    # model valid time
+    if valid_time is not None:
+        if isinstance(valid_time, datetime):
+            filename = filename + "_VT_" + valid_time.strftime("%Y%m%d%H")
+        elif isinstance(valid_time, np.datetime64):
+            filename = filename + "_VT_" + \
+                       pd.to_datetime(str(valid_time)).strftime("%Y%m%d%H")
+        else:
+            filename = filename + "_VT_" + str(valid_time).strip()
+
+    # statistic name
+    if statistic is not None:
+        filename = filename + "_SN_" + str(statistic).strip()
+
+    # place name
+    if place is not None:
+        filename = filename + "_PN_" + str(place).strip()
+
+    # remove the first "_"
+    if filename[0] == "_":
+        filename = filename[1:]
+
+    # add suffix
+    if suffix is not None:
+        if suffix[0] == ".":
+            filename = filename + suffix
+        else:
+            filename = filename + "." + suffix
+
+    # add root directory
+    if root_dir is not None:
+        filename = os.path.join(root_dir, filename)
+
+    # return
+    return filename
+
+
+def product_filename_retrieve(filename):
+    """
+    Retrieve information from the standard product filename.
+
+    :param filename: file name.
+    :return: filename information dictionary.
+    """
+
+    file_name = pathlib.PureWindowsPath(filename)
+    file_stem = file_name.stem.split("_")
+    return dict(zip(file_stem[0::2], file_stem[1::2]))
+
+
+def get_fcst_times(validTime, initHours=[8, 20], format=None,
+                   initStep=1, min_fhour=1, max_fhour=84):
+    """
+    获得某一预报时刻相应的不同起报时间, 预报时效序列.
+
+    Args:
+        validTime (string or datetime): 预报时间.
+        initHours (int, optional): 模式的起报时刻, 例如 [8, 20]或[2,8,14,20]
+        format (string, optional): 预报时间为字符串时的格式. Defaults to None.
+        initStep (int, optional): 模式起报的时间间隔, 设置可以加快计算速度. Defaults to 1.
+        min_fhour (int, optional): minimum forecast hour. Defaults to 0.
+        max_fhour (int, optional): maximum forecast hour. Defaults to 84.
+    """
+    
+    # convert to pd.Timestamp
+    if not isinstance(validTime, pd.Timestamp):
+        validTime = pd.to_datetime(validTime, format=format)
+        
+    # check initHour and find the first initial time
+    initHours = np.asarray(initHours)
+    if np.min(initHours) < 0 or np.max(initHours) > 23:
+        raise ValueError('initHours should be in 0-23')
+    fhours = []
+    initTimes = []
+    fhour = min_fhour
+    while fhour <= max_fhour:
+        initTime = validTime - pd.Timedelta(fhour, unit='hour')
+        if initTime.hour in initHours and fhour >= min_fhour:
+            fhours.append(fhour)
+            initTimes.append(initTime)
+            fhour += initStep
+        else:
+            fhour += 1
+    
+    # return
+    return initTimes, fhours
+    
+
 def get_filenames(initTime, fhours="0/72/3;", zeroFill=3):
     """
     Construct filenames 
@@ -110,8 +261,9 @@ def get_initTime(iHours, delayHour=6, currentTime=None):
             break
 
     # return initial time
-    initTime = datetime(currentTime.year, currentTime.month,
-                        currentTime.day, currentTime.hour)
+    initTime = datetime(
+        currentTime.year, currentTime.month,
+        currentTime.day, currentTime.hour)
     return initTime
 
 
@@ -139,3 +291,49 @@ def get_sub_stations(data, limit=[70, 140, 8, 60],
         raise ValueError("'lon' or 'Lat' coordinates is not in data.")
     return subdata
 
+
+def get_sub_grid(gdata, glon, glat, limit, pad=0):
+    """
+    Extract grid subset region.
+
+    :param gdata: 2D grid data, [nlat, nlon]
+    :param glon: 1D or 2D array, longitude.
+    :param glat: 1D or 2D array, latitude.
+    :param limit: subset boundary, [lonmin, lonmax, latmin, latmax]
+    :param pad: pad for bounds.
+    :return: subset boundary index.
+    """
+    
+    # add pad
+    limit = [limit[0]-pad, limit[1]+pad,
+             limit[2]-pad, limit[3]+pad]
+    
+    # convert to numpy array
+    lat = np.squeeze(np.asarray(glat))
+    lon = np.squeeze(np.asarray(glon))
+    gdata = np.squeeze(np.asarray(gdata))
+    
+    # get 1D coordinates
+    if glon.ndim == 2:
+        lat = lat[:, 0]
+        lon = lon[0, :]
+
+    # latitude lower and upper index
+    latli = np.argmin(np.abs(lat - limit[2]))
+    latui = np.argmin(np.abs(lat - limit[3]))
+
+    # longitude lower and upper index
+    lonli = np.argmin(np.abs(lon - limit[0]))
+    lonui = np.argmin(np.abs(lon - limit[1]))
+    
+    # extract information
+    if glon.ndim == 2:
+        glon = glon[latli:latui+1, lonli:lonui+1]
+        glat = glat[latli:latui+1, lonli:lonui+1]
+    else:
+        glon = glon[lonli:lonui+1]
+        glat = glat[latli:latui+1]
+    gdata = gdata[latli:latui+1, lonli:lonui+1]
+
+    # return subset boundary index
+    return gdata, glon, glat