Add support micaps 2, 3, 5, 7, 8 type file read.

2020-03-15 00:37:10 +08:00 · 2020-03-15 00:37:10 +08:00 · 4171a3bfa5
commit 4171a3bfa5
parent 6bac20f9d3
2 changed files with 459 additions and 5 deletions
--- a/nmc_met_io/read_micaps.py
+++ b/nmc_met_io/read_micaps.py
@ -14,6 +14,191 @@ import pandas as pd
 from datetime import datetime, timedelta


+def read_micaps_1(fname, limit=None):
+    """
+    Read Micaps 1 type fle, the surface observation data.
+
+    注：此类数据用于规范的地面填图,  数据：  
+        区站号（长整数）  经度  纬度  拔海高度（均为浮点数）站点级别（整数）  总云量  风向  风速  
+        海平面气压（或本站气压）  3小时变压  过去天气1  过去天气2  6小时降水 低云状  低云量  低云高  
+        露点  能见度  现在天气  温度 中云状  高云状  标志1  标志2（均为整数） 24小时变温  24小时变压
+    注：
+        缺值时用9999表示，以后相同。
+        站点级别表示站点的放大级别，即只有当图象放大到该级别时此站才被填图。以后相同。
+        当标志1为1，标志2为2时，说明后面有24小时变温变压。否则说明后面没有24小时变温变压。
+
+    Args:
+        fname (str): data filename.
+        limit (list): region limit, [min_lat, min_lon, max_lat, max_lon]
+
+    Returns:
+        pandas DataFrame
+
+    Examples:
+    >>> data = read_micaps_1("Z:/data/surface/plot/19010108.000")
+    """
+
+    # check file exist
+    if not os.path.isfile(fname):
+        return None
+
+    # read contents
+    encodings = ['utf-8', 'gb18030', 'GBK']
+    for encoding in encodings:
+        txt = None
+        try:
+            with open(fname, 'r', encoding=encoding) as f:
+                txt = f.read().replace('\n', ' ').split()
+        except Exception:
+            pass
+    if txt is None:
+        print("Micaps 1 file error: " + fname)
+        return None
+
+    # head information
+    head_info = txt[2]
+
+    # date and time
+    year = int(txt[3]) if len(txt[3]) == 4 else int(txt[3]) + 2000
+    month = int(txt[4])
+    day = int(txt[5])
+    hour = int(txt[6])
+    time = datetime(year, month, day, hour)
+
+    # number of stations
+    number = int(txt[7])
+
+    # set record column names
+    columns = [
+            'ID', 'lon', 'lat', 'alt', 'grade', 'total_cloud_cover', 'wind_angle', 'wind_speed', 
+            'MSLP', 'pressure_3h_trend', 'past_weather_1', 'past_weather_2', 'precipitation_6h',
+            'low_cloud_type', 'low_cloud_cover', 'low_cloud_base', 'dewpoint', 'visibility', 
+            'current_weather', 'temperature', 'middle_cloud_type', 'high_cloud_type', 'flag1', 'flag2']
+
+    # cut the data
+    txt = txt[8:]
+    if (len(txt) % 24) == 0:
+        txt = np.array(txt)
+        txt.shape = [number, 24]
+    else:
+        txt = np.array(txt)
+        txt.shape = [number, 26]
+        columns.extend(['temperature_24h_trend', 'pressure_24h_trend'])
+
+    # initial data
+    data = pd.DataFrame(txt, columns=columns)
+
+    # convert column type
+    for column in data.columns:
+        if column == 'ID':
+            continue
+        data[column] = pd.to_numeric(data[column])
+        data[column].mask(data[column] ==  9999.0, inplace=True)
+
+    # cut the region
+    if limit is not None:
+        data = data[(limit[0] <= data['lat']) & (data['lat'] <= limit[2]) &
+                    (limit[1] <= data['lon']) & (data['lon'] <= limit[3])]
+
+    # check records
+    if len(data) == 0:
+        return None
+
+    # decode the sea leavl pressure
+    data.loc[data['MSLP'] <= 600, 'MSLP'] = data.loc[data['MSLP'] <= 600, 'MSLP']/10. + 1000.
+    data.loc[data['MSLP'] >  600, 'MSLP'] = data.loc[data['MSLP'] >  600, 'MSLP']/10. +  900.
+
+    # add time
+    data['time'] = time
+
+    # return
+    return data
+
+
+def read_micaps_2(fname, limit=None):
+    """
+    Read Micaps 2 type file (high observation)
+    
+    Args:
+        fname (str): data filename.
+        limit (list): region limit, [min_lat, min_lon, max_lat, max_lon]
+
+    Returns:
+        pandas DataFrame
+
+    Examples:
+    >>> data = read_micaps_2("Z:/data/high/plot/500/19010108.000")
+    """
+
+    # check file exist
+    if not os.path.isfile(fname):
+        return None
+
+    # read contents
+    encodings = ['utf-8', 'gb18030', 'GBK']
+    for encoding in encodings:
+        txt = None
+        try:
+            with open(fname, 'r', encoding=encoding) as f:
+                txt = f.read().replace('\n', ' ').split()
+        except Exception:
+            pass
+    if txt is None:
+        print("Micaps 1 file error: " + fname)
+        return None
+
+    # head information
+    head_info = txt[2]
+
+    # date and time
+    year = int(txt[3]) if len(txt[3]) == 4 else int(txt[3]) + 2000
+    month = int(txt[4])
+    day = int(txt[5])
+    hour = int(txt[6])
+    time = datetime(year, month, day, hour)
+
+    # level
+    level = float(txt[7])
+
+    # number of stations
+    number = int(txt[8])
+
+    # set record column names
+    columns = [
+            'ID', 'lon', 'lat', 'alt', 'grade', 'height', 'temperature', 'dewpoint_depression',
+            'wind_angle', 'wind_speed']
+
+    # cut the data
+    txt = np.array(txt[9:])
+    txt.shape = [number, 10]
+
+    # initial data
+    data = pd.DataFrame(txt, columns=columns)
+
+    # convert column type
+    for column in data.columns:
+        if column == 'ID':
+            continue
+        data[column] = pd.to_numeric(data[column])
+        data[column].mask(data[column] ==  9999.0, inplace=True)
+
+    # cut the region
+    if limit is not None:
+        data = data[(limit[0] <= data['lat']) & (data['lat'] <= limit[2]) &
+                    (limit[1] <= data['lon']) & (data['lon'] <= limit[3])]
+
+    # check records
+    if len(data) == 0:
+        return None
+
+    # add time
+    data['time'] = time
+    data['level'] = level
+
+    # return
+    return data
+
+
 def read_micaps_3(fname, limit=None):
    """
    Read Micaps 3 type file (general scatter point)
@ -111,7 +296,11 @@ def read_micaps_3(fname, limit=None):
    data = pd.DataFrame(txt, columns=columns)

    # convert column type
-    data = data.iloc[:, 1:].apply(pd.to_numeric)
+    for column in data.columns:
+        if column == 'ID':
+            continue
+        data[column] = pd.to_numeric(data[column])
+        data[column].mask(data[column] ==  9999.0, inplace=True)

    # cut the region
    if limit is not None:
@ -256,6 +445,271 @@ def read_micaps_4(fname, limit=None, varname='data', varattrs={'units':''}, scal
    return data


+def read_micaps_5(fname, limit=None):
+    """
+    Read Micaps 5 type file (TLOGP observation)
+    
+    Args:
+        fname (str): data filename.
+        limit (list): region limit, [min_lat, min_lon, max_lat, max_lon]
+
+    Returns:
+        pandas DataFrame
+
+    Examples:
+    >>> data = read_micaps_5("Z:/data/high/tlogp/20031420.000")
+    """
+
+    # check file exist
+    if not os.path.isfile(fname):
+        return None
+
+    # read contents
+    encodings = ['utf-8', 'gb18030', 'GBK']
+    for encoding in encodings:
+        txt = None
+        try:
+            with open(fname, 'r', encoding=encoding) as f:
+                txt = f.read().replace('\n', ' ').split()
+        except Exception:
+            pass
+    if txt is None:
+        print("Micaps 1 file error: " + fname)
+        return None
+
+    # head information
+    head_info = txt[2]
+
+    # date and time
+    year = int(txt[3]) if len(txt[3]) == 4 else int(txt[3]) + 2000
+    month = int(txt[4])
+    day = int(txt[5])
+    hour = int(txt[6])
+    time = datetime(year, month, day, hour)
+
+    # number of stations
+    number = int(txt[7])
+
+    # set record column names
+    columns = ['ID', 'lon', 'lat', 'alt', 'pressure', 'height', 'temperature', 'dewpoint',
+               'wind_angle', 'wind_speed']
+
+    # loop every station
+    pid = 8
+    data = []
+    for istn in range(number):
+        ID = txt[pid]
+        lon = float(txt[pid+1])
+        lat = float(txt[pid+2])
+        alt = float(txt[pid+3])
+        length = int(txt[pid+4])
+        pid += 5
+
+        for irec in range(int(length/6)):
+            record = {'ID':ID, 'lon':lon, 'lat':lat, 'alt':alt}
+            record['pressure'] = float(txt[pid])
+            record['height'] = float(txt[pid+1])
+            record['temperature'] = float(txt[pid+2])
+            record['dewpoint'] = float(txt[pid+3])
+            record['wind_angle'] = float(txt[pid+4])
+            record['wind_speed'] = float(txt[pid+5])
+            pid += 6
+            data.append(record)
+
+
+    # initial data
+    data = pd.DataFrame(data)
+
+    # convert column type
+    for column in data.columns:
+        if column == 'ID':
+            continue
+        data[column].mask(data[column] ==  9999.0, inplace=True)
+
+    # cut the region
+    if limit is not None:
+        data = data[(limit[0] <= data['lat']) & (data['lat'] <= limit[2]) &
+                    (limit[1] <= data['lon']) & (data['lon'] <= limit[3])]
+
+    # check records
+    if len(data) == 0:
+        return None
+
+    # add time
+    data['time'] = time
+
+    # return
+    return data
+
+
+def read_micaps_7(fname):
+    """
+    Read Micaps 7 type file (typhoon track record)
+    
+    Args:
+        fname (str): data filename.
+
+    Returns:
+        pandas DataFrame
+
+    Examples:
+    >>> data = read_micaps_7("Z:/data/cyclone/babj/babj2028.dat")
+    """
+
+    # check file exist
+    if not os.path.isfile(fname):
+        return None
+
+    # read contents
+    encodings = ['utf-8', 'gb18030', 'GBK']
+    for encoding in encodings:
+        txt = None
+        try:
+            with open(fname, 'r', encoding=encoding) as f:
+                txt = f.read().replace('\n', ' ').split()
+        except Exception:
+            pass
+    if txt is None:
+        print("Micaps 1 file error: " + fname)
+        return None
+
+    # head information
+    head_info = txt[2]
+
+    # loop every 
+    pid = 3
+    data = []
+    while (pid < len(txt)):
+        # typhoon name
+        name = txt[pid]
+        ID = txt[pid+1]
+        origin = txt[pid+2]
+        number = int(txt[pid+3])
+        pid += 4
+
+        for irec in range(number):
+            record = {'name':name, 'ID':ID, 'origin':origin}
+            year= int(txt[pid])
+            month = int(txt[pid+1])
+            day = int(txt[pid+2])
+            hour = int(txt[pid+3])
+            record['time'] = datetime(year, month, day, hour)
+            record['fhour'] = int(txt[pid+4])
+            record['cent_lon'] = float(txt[pid+5])
+            record['cent_lat'] = float(txt[pid+6])
+            record['max_wind_speed'] = float(txt[pid+7])
+            record['min_pressure'] = float(txt[pid+8])
+            record['radius_wind_7'] = float(txt[pid+9])
+            record['radius_wind_10'] = float(txt[pid+10])
+            record['move_direction'] = float(txt[pid+11])
+            record['move_speed'] = float(txt[pid+12])
+            pid += 13
+            data.append(record)
+
+        # check the end
+        if pid >= len(txt):
+            break
+        else:
+            pid += 1
+
+    # initial data
+    data = pd.DataFrame(data)
+
+    # convert column type
+    for column in data.columns:
+        if column in ['name', 'ID', 'origin']:
+            continue
+        data[column].mask(data[column] ==  9999.0, inplace=True)
+
+    # return
+    return data
+
+
+def read_micaps_8(fname, limit=None):
+    """
+    Read Micaps 8 type file (city forecast)
+
+    数据：
+    区站号  经度  纬度  拔海高度  天气现象1 风向1 风速1  最低温度 最高温度 天气现象2 风向2 风速2
+    注：天气现象、风向、风速均可以有两个值，分别为前后两个预报时段的值。
+
+    Args:
+        fname (str): data filename.
+        limit (list): region limit, [min_lat, min_lon, max_lat, max_lon]
+
+    Returns:
+        pandas DataFrame
+
+    Examples:
+    >>> data = read_micaps_2("Z:/data/high/plot/500/19010108.000")
+    """
+
+    # check file exist
+    if not os.path.isfile(fname):
+        return None
+
+    # read contents
+    encodings = ['utf-8', 'gb18030', 'GBK']
+    for encoding in encodings:
+        txt = None
+        try:
+            with open(fname, 'r', encoding=encoding) as f:
+                txt = f.read().replace('\n', ' ').split()
+        except Exception:
+            pass
+    if txt is None:
+        print("Micaps 1 file error: " + fname)
+        return None
+
+    # head information
+    head_info = txt[2]
+
+    # date and time
+    year = int(txt[3]) if len(txt[3]) == 4 else int(txt[3]) + 2000
+    month = int(txt[4])
+    day = int(txt[5])
+    hour = int(txt[6])
+    time = datetime(year, month, day, hour)
+    fhour = int(txt[7])
+
+    # number of stations
+    number = int(txt[8])
+
+    # set record column names
+    columns = ['ID', 'lon', 'lat', 'alt', 'weather_code1', 'wind_angle1', 'wind_speed1',
+               'min_temperature', 'max_temperature', 'weather_code2', 'wind_angle2', 'wind_speed2',]
+
+    # cut the data
+    txt = np.array(txt[9:])
+    txt.shape = [number, 12]
+
+    # initial data
+    data = pd.DataFrame(txt, columns=columns)
+
+    # convert column type
+    for column in data.columns:
+        if column == 'ID':
+            continue
+        data[column] = pd.to_numeric(data[column])
+        data[column].mask(data[column] ==  9999.0, inplace=True)
+
+    # cut the region
+    if limit is not None:
+        data = data[(limit[0] <= data['lat']) & (data['lat'] <= limit[2]) &
+                    (limit[1] <= data['lon']) & (data['lon'] <= limit[3])]
+
+    # check records
+    if len(data) == 0:
+        return None
+
+    # add time
+    data['time'] = time
+    data['fhour'] = fhour
+
+    # return
+    return data
+
+
 def read_micaps_11(fname, limit=None, scale_off=None, no_level=False,
                   levattrs={'long_name':'pressure_level', 'units':'hPa', '_CoordinateAxisType':'Pressure'}):
    """
--- a/nmc_met_io/retrieve_cimiss_server.py
+++ b/nmc_met_io/retrieve_cimiss_server.py
@ -990,7 +990,7 @@ def cimiss_obs_grid_by_time(time_str, limit=None, data_code="SURF_CMPA_FRT_5KM",
        filename = time_str
        if limit is not None:
            filename = filename + '.' + str(limit)
-        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS_DATA")
+        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS")
        if cache_file.is_file():
            with open(cache_file, 'rb') as f:
                data = pickle.load(f)
@ -1168,7 +1168,7 @@ def cimiss_analysis_by_time(time_str, limit=None, data_code='NAFP_CLDAS2.0_RT_GR
        filename = time_str
        if limit is not None:
            filename = filename + '.' + str(limit)
-        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS_DATA")
+        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS")
        if cache_file.is_file():
            with open(cache_file, 'rb') as f:
                data = pickle.load(f)
@ -1327,7 +1327,7 @@ def cimiss_model_grid(data_code, init_time_str, valid_time, fcst_ele, fcst_level
    if cache:
        directory = os.path.join(data_code, fcst_ele, str(fcst_level))
        filename = init_time_str + '.' + str(valid_time).zfill(3)
-        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS_DATA")
+        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS")
        if cache_file.is_file():
            with open(cache_file, 'rb') as f:
                data = pickle.load(f)
@ -1628,7 +1628,7 @@ def cimiss_model_by_time(init_time_str, valid_time=0, limit=None,
        filename = init_time_str + '.' + str(valid_time)
        if limit is not None:
            filename = filename + '.' + str(limit)
-        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS_DATA")
+        cache_file = CONFIG.get_cache_file(directory, filename, name="CIMISS")
        if cache_file.is_file():
            with open(cache_file, 'rb') as f:
                data = pickle.load(f)