Initializing nmc_met_io

This commit is contained in:
NMC-DAVE 2019-02-18 14:34:15 +08:00
parent fb5237cf58
commit 9f45fefbd2
13 changed files with 3089 additions and 1 deletions

6
MANIFEST.in Normal file

@ -0,0 +1,6 @@
include README.md
include LICENSE
recursive-exclude * __pycache__
recursive-exclude * *.pyc
recursive-exclude * *.pyo
recursive-exclude * *.orig

@ -1,2 +1,43 @@
# nmc_met_io
# 气象数据读写及访问程序库
提供对MICAPS文件, 卫星云图, 天气雷达等数据的读写, 并访问CIMISS和MICAPS CASSANDRA数据库文件等.
Only Python 3 is supported.
## Dependencies
Other required packages:
- numpy
- scipy
- xarray
- pandas
- pyproj
- protobuf
- urllib3
- python-dateutil
## Install
Using the fellowing command to install packages:
```
pip install git+git://github.com/nmcdev/nmc_met_io.git
```
or download the package and install:
```
git clone --recursive https://github.com/nmcdev/nmc_met_io.git
cd nmc_met_io
python setup.py install
```
## 设置CIMISS和MICAPS服务器的地址及用户信息
在系统用户目录下("C:\Users\用户名"), 新建文本文件config_met_io.ini, 里面内容模板为:
```
[CIMISS]
DNS = xx.xx.xx.xx
USER_ID = xxxxxxxxx
PASSWORD = xxxxxxxx
[MICAPS]
GDS_IP = xx.xx.xx.xx
GDS_PORT = xxxx
```
这里xxxx用相应的地址, 接口和用户信息代替.

359
nmc_met_io/DataBlock_pb2.py Normal file

@ -0,0 +1,359 @@
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: DataBlock.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='DataBlock.proto',
package='',
syntax='proto3',
serialized_pb=_b('\n\x0f\x44\x61taBlock.proto\"E\n\x0cStringResult\x12\x11\n\terrorCode\x18\x01 \x01(\x05\x12\x14\n\x0c\x65rrorMessage\x18\x02 \x01(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\"M\n\x0f\x42yteArrayResult\x12\x11\n\terrorCode\x18\x01 \x01(\x05\x12\x14\n\x0c\x65rrorMessage\x18\x02 \x01(\t\x12\x11\n\tbyteArray\x18\x03 \x01(\x0c\"h\n\x18StringAndByteArrayResult\x12\x11\n\terrorCode\x18\x01 \x01(\x05\x12\x14\n\x0c\x65rrorMessage\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61taName\x18\x03 \x01(\t\x12\x11\n\tbyteArray\x18\x04 \x01(\x0c\"[\n\x0e\x46ileInfoResult\x12\x11\n\terrorCode\x18\x01 \x01(\x05\x12\x14\n\x0c\x65rrorMessage\x18\x02 \x01(\t\x12\x0e\n\x06isFile\x18\x03 \x01(\x08\x12\x10\n\x08\x66ileSize\x18\x04 \x01(\x03\"\x94\x01\n\tMapResult\x12\x11\n\terrorCode\x18\x01 \x01(\x05\x12\x14\n\x0c\x65rrorMessage\x18\x02 \x01(\t\x12,\n\tresultMap\x18\x03 \x03(\x0b\x32\x19.MapResult.ResultMapEntry\x1a\x30\n\x0eResultMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42)\n\x1c\x63n.gov.cma.cimiss.gds.serverB\tDataBlockb\x06proto3')
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
_STRINGRESULT = _descriptor.Descriptor(
name='StringResult',
full_name='StringResult',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='errorCode', full_name='StringResult.errorCode', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='errorMessage', full_name='StringResult.errorMessage', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='name', full_name='StringResult.name', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=19,
serialized_end=88,
)
_BYTEARRAYRESULT = _descriptor.Descriptor(
name='ByteArrayResult',
full_name='ByteArrayResult',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='errorCode', full_name='ByteArrayResult.errorCode', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='errorMessage', full_name='ByteArrayResult.errorMessage', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='byteArray', full_name='ByteArrayResult.byteArray', index=2,
number=3, type=12, cpp_type=9, label=1,
has_default_value=False, default_value=_b(""),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=90,
serialized_end=167,
)
_STRINGANDBYTEARRAYRESULT = _descriptor.Descriptor(
name='StringAndByteArrayResult',
full_name='StringAndByteArrayResult',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='errorCode', full_name='StringAndByteArrayResult.errorCode', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='errorMessage', full_name='StringAndByteArrayResult.errorMessage', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='dataName', full_name='StringAndByteArrayResult.dataName', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='byteArray', full_name='StringAndByteArrayResult.byteArray', index=3,
number=4, type=12, cpp_type=9, label=1,
has_default_value=False, default_value=_b(""),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=169,
serialized_end=273,
)
_FILEINFORESULT = _descriptor.Descriptor(
name='FileInfoResult',
full_name='FileInfoResult',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='errorCode', full_name='FileInfoResult.errorCode', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='errorMessage', full_name='FileInfoResult.errorMessage', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='isFile', full_name='FileInfoResult.isFile', index=2,
number=3, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='fileSize', full_name='FileInfoResult.fileSize', index=3,
number=4, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=275,
serialized_end=366,
)
_MAPRESULT_RESULTMAPENTRY = _descriptor.Descriptor(
name='ResultMapEntry',
full_name='MapResult.ResultMapEntry',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='key', full_name='MapResult.ResultMapEntry.key', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='value', full_name='MapResult.ResultMapEntry.value', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=469,
serialized_end=517,
)
_MAPRESULT = _descriptor.Descriptor(
name='MapResult',
full_name='MapResult',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='errorCode', full_name='MapResult.errorCode', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='errorMessage', full_name='MapResult.errorMessage', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='resultMap', full_name='MapResult.resultMap', index=2,
number=3, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[_MAPRESULT_RESULTMAPENTRY, ],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=369,
serialized_end=517,
)
_MAPRESULT_RESULTMAPENTRY.containing_type = _MAPRESULT
_MAPRESULT.fields_by_name['resultMap'].message_type = _MAPRESULT_RESULTMAPENTRY
DESCRIPTOR.message_types_by_name['StringResult'] = _STRINGRESULT
DESCRIPTOR.message_types_by_name['ByteArrayResult'] = _BYTEARRAYRESULT
DESCRIPTOR.message_types_by_name['StringAndByteArrayResult'] = _STRINGANDBYTEARRAYRESULT
DESCRIPTOR.message_types_by_name['FileInfoResult'] = _FILEINFORESULT
DESCRIPTOR.message_types_by_name['MapResult'] = _MAPRESULT
StringResult = _reflection.GeneratedProtocolMessageType('StringResult', (_message.Message,), dict(
DESCRIPTOR = _STRINGRESULT,
__module__ = 'DataBlock_pb2'
# @@protoc_insertion_point(class_scope:StringResult)
))
_sym_db.RegisterMessage(StringResult)
ByteArrayResult = _reflection.GeneratedProtocolMessageType('ByteArrayResult', (_message.Message,), dict(
DESCRIPTOR = _BYTEARRAYRESULT,
__module__ = 'DataBlock_pb2'
# @@protoc_insertion_point(class_scope:ByteArrayResult)
))
_sym_db.RegisterMessage(ByteArrayResult)
StringAndByteArrayResult = _reflection.GeneratedProtocolMessageType('StringAndByteArrayResult', (_message.Message,), dict(
DESCRIPTOR = _STRINGANDBYTEARRAYRESULT,
__module__ = 'DataBlock_pb2'
# @@protoc_insertion_point(class_scope:StringAndByteArrayResult)
))
_sym_db.RegisterMessage(StringAndByteArrayResult)
FileInfoResult = _reflection.GeneratedProtocolMessageType('FileInfoResult', (_message.Message,), dict(
DESCRIPTOR = _FILEINFORESULT,
__module__ = 'DataBlock_pb2'
# @@protoc_insertion_point(class_scope:FileInfoResult)
))
_sym_db.RegisterMessage(FileInfoResult)
MapResult = _reflection.GeneratedProtocolMessageType('MapResult', (_message.Message,), dict(
ResultMapEntry = _reflection.GeneratedProtocolMessageType('ResultMapEntry', (_message.Message,), dict(
DESCRIPTOR = _MAPRESULT_RESULTMAPENTRY,
__module__ = 'DataBlock_pb2'
# @@protoc_insertion_point(class_scope:MapResult.ResultMapEntry)
))
,
DESCRIPTOR = _MAPRESULT,
__module__ = 'DataBlock_pb2'
# @@protoc_insertion_point(class_scope:MapResult)
))
_sym_db.RegisterMessage(MapResult)
_sym_db.RegisterMessage(MapResult.ResultMapEntry)
DESCRIPTOR.has_options = True
DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\034cn.gov.cma.cimiss.gds.serverB\tDataBlock'))
_MAPRESULT_RESULTMAPENTRY.has_options = True
_MAPRESULT_RESULTMAPENTRY._options = _descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001'))
# @@protoc_insertion_point(module_scope)

7
nmc_met_io/__init__.py Normal file

@ -0,0 +1,7 @@
"""
dk_Met_IO is a package to read or retrieve meteorological data
from various data sources.
"""
__author__ = "The R & D Center for Weather Forecasting Technology in NMC, CMA"
__version__ = '0.1.0'

31
nmc_met_io/config.py Normal file

@ -0,0 +1,31 @@
# _*_ coding: utf-8 _*_
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
Read configure file.
"""
import os
import configparser
def ConfigFetchError(Exception):
pass
def _get_config_from_rcfile():
"""
Get configure information from config_dk_met_io.ini file.
"""
rc = os.path.normpath(os.path.expanduser("~/config_met_io.ini"))
try:
config = configparser.ConfigParser()
config.read(rc)
except IOError as e:
raise ConfigFetchError(str(e))
except Exception as e:
raise ConfigFetchError(str(e))
return config

66
nmc_met_io/read_grads.py Normal file

@ -0,0 +1,66 @@
# _*_ coding: utf-8 _*_
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
Read grads data file.
"""
import os
import re
from datetime import datetime
import numpy as np
def read_cmp_pre_hour_grid(files, start_lon=70.05, start_lat=15.05):
"""
Read SURF_CLI_CHN_MERGE_CMP_PRE_HOUR_GRID_0.1 data file
:param files: a single or multiple data filenames.
:param start_lon: region lower left corner longitude.
:param start_lat: region lower left corner latitude.
:return: data and time
:Examples:
>>> files = ("F:/201607/SURF_CLI_CHN_MERGE_CMP_"
"PRE_HOUR_GRID_0.10-2016070100.grd")
>>> data, time, lon, lat = read_cmp_pre_hour_grid(files)
"""
# sort and count data files
if isinstance(files, str):
files = np.array([files])
else:
files = np.array(files)
# define coordinates
lon = np.arange(700) * 0.1 + start_lon
lat = np.arange(440) * 0.1 + start_lat
# define variables
data = np.full((len(files), lat.size, lon.size), np.nan)
time = []
# loop every data file
for i, f in enumerate(files):
# check file exist
if not os.path.isfile(f):
return None, None, None, None
# extract time information
ttime = re.search('\d{10}', os.path.basename(f))
time.append(datetime.strptime(ttime.group(0), "%Y%m%d%H"))
# read data
try:
tdata = np.fromfile(
f, dtype=np.dtype('float32')).reshape(2, len(lat), len(lon))
tdata[tdata == -999.0] = np.nan # missing value
data[i, :, :] = tdata[0, :, :]
except IOError:
print("Can not read data from "+f)
continue
# return value
return data, time, lon, lat

847
nmc_met_io/read_micaps.py Normal file

@ -0,0 +1,847 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
Read micaps data file.
"""
import os.path
import numpy as np
import xarray as xr
import pandas as pd
from datetime import datetime
def read_micaps_3(fname, limit=None):
"""
Read Micaps 3 type file (general scatter point)
1 此类数据主要用于非规范的站点填图填图目前是单要素的
2 此类数据除用于填图外还可根据站点数据用有限元法直接画等值线
(只要等值线条数大于 0)各等值线的值由文件头中的等值线值1
等值线值2 ...来决定在这些等值线值中可选出一个为加粗线值
3 等值线可以被限制在一个剪切区域内剪切区域由一个闭合折线定义
该折线构成剪切区域的边缘这个折线由剪切区域边缘线上的点数及
各点的经纬度决定
4 当填的是地面要素时文件头中的层次变为控制填图格式的标志
-1 填6小时降水量当降水量为0.0mm时填T当降水量为0.1-0.9
填一位小数当降水量大于1时只填整数
-2 填24小时降水量当降水量小于1mm时不填大于等于1mm时只填整数
-3 填温度只填整数
注意按照MICAPS3.2扩展的数据格式定义在6小时雨量中0.0表示微量降水
而不是无降水上述类别数据填图属性中设置小数位数不起作用考虑到实际
业务中使用的数据格式修改为0.0时表示无降水大于0并且小于0.1为微量降水
任意使用负值或9999表示降水为0可能会导致数据分析中出现异常结果
:param fname: micaps file name.
:param limit: region limit, [min_lat, min_lon, max_lat, max_lon]
:return: data, pandas type
:Examples:
>>> data = read_micaps_3('Z:/data/surface/jiany_rr/r20/17032908.000')
"""
# check file exist
if not os.path.isfile(fname):
return None
# read contents
try:
with open(fname, 'r') as f:
txt = f.read().decode('GBK').replace('\n', ' ').split()
except IOError as err:
print("Micaps 3 file error: " + str(err))
return None
# head information
head_info = txt[2]
# date and time
year = int(txt[3]) if len(txt[3]) == 4 else int(txt[3]) + 2000
month = int(txt[4])
day = int(txt[5])
hour = int(txt[6])
time = datetime(year, month, day, hour)
# level
level = int(txt[7])
# contour information
n_contour = int(txt[8])
pt = 9
if n_contour > 0:
contours = np.array(txt[pt:(pt + n_contour - 1)])
pt += n_contour
# smooth and bold value
smoothCeof = float(txt[pt])
pt += 1
boldCeof = float(txt[pt])
pt += 1
# boundary
n_bound = int(txt[pt])
pt += 1
if n_bound > 0:
bound = np.array(txt[pt:(pt + 2 * n_bound - 1)])
pt += 2 * n_bound
# number of elements and data
n_elements = int(txt[pt])
pt += 1
number = int(txt[pt])
pt += 1
# cut data
txt = np.array(txt[pt:])
txt.shape = [number, n_elements + 4]
# initial data
columns = list(['ID', 'lon', 'lat', 'alt'])
columns.extend(['V%s' % x for x in range(n_elements)])
data = pd.DataFrame(txt, columns=columns)
# convert column type
data = data.ix[:, 1:].apply(pd.to_numeric)
# cut the region
if limit is not None:
data = data[(limit[0] <= data['lat']) & (data['lat'] <= limit[2]) &
(limit[1] <= data['lon']) & (data['lon'] <= limit[3])]
# check records
if len(data) == 0:
return None
# add time
data['time'] = time
data['level'] = level
# return
return data
def read_micaps_4(fname, limit=None):
"""
Read Micaps 4 type file (grid)
:param fname: micaps file name.
:param limit: region limit, [min_lat, min_lon, max_lat, max_lon]
:return: data, xarray type
:Examples:
>>> data = read_micaps_4('Z:/data/newecmwf_grib/pressure/17032008.006')
"""
# check file exist
if not os.path.isfile(fname):
return None
# read contents
try:
with open(fname, 'r') as f:
txt = f.read().decode('GBK').replace('\n', ' ').split()
except IOError as err:
print("Micaps 4 file error: " + str(err))
return None
# head information
head_info = txt[2]
# date and time
year = int(txt[3]) if len(txt[3]) == 4 else int(txt[3]) + 2000
month = int(txt[4])
day = int(txt[5])
hour = int(txt[6])
time = datetime(year, month, day, hour)
fhour = int(txt[7])
# vertical level
level = int(txt[8])
# grid information
xint = float(txt[9])
yint = float(txt[10])
slon = float(txt[11])
slat = float(txt[13])
nlon = int(txt[15])
nlat = int(txt[16])
lon = slon + np.arange(nlon) * xint
lat = slat + np.arange(nlat) * yint
# contour information
cnInterval = float(txt[17])
cnStart = float(txt[18])
cnEnd = float(txt[19])
# smooth and bold value
smoothCeof = float(txt[20])
boldCeof = float(txt[21])
# extract data
data = (np.array(txt[22:])).astype(np.float)
data.shape = [nlat, nlon]
# check latitude order
if lat[0] > lat[1]:
lat = lat[::-1]
data = data[::-1, :]
# create xarray data
data = xr.DataArray(data, coords=[lat, lon], dims=['lat', 'lon'])
# subset data
if limit is not None:
lat_bnds, lon_bnds = [limit[0], limit[2]], [limit[1], limit[3]]
data = data.sel(lat=slice(*lat_bnds), lon=slice(*lon_bnds))
# add attributes
data.attrs['time'] = time
data.attrs['fhour'] = fhour
data.attrs['level'] = level
data.attrs['head_info'] = head_info
data.attrs['cnInterval'] = cnInterval
data.attrs['cnStart'] = cnStart
data.attrs['cnEnd'] = cnEnd
data.attrs['smoothCeof'] = smoothCeof
data.attrs['boldCeof'] = boldCeof
# return
return data
def read_micaps_14(fname):
"""
Read micaps 14 file (编辑图象的图元数据, 即交互操作结果数据).
:param fname: micaps 14 filename.
:return: data dictionary
:Examples:
>>> data = read_micaps_14("Z:/diamond/update/rr082008.024")
"""
# check file exist
if not os.path.isfile(fname):
return None
# read contents
try:
with open(fname, 'r') as f:
txt = f.read().replace('\n', ' ').split()
except IOError as err:
print("Micaps 14 file error: " + str(err))
return None
# head information
head_info = txt[2]
# date and time
year = int(txt[3]) if len(txt[3]) == 4 else int(txt[3]) + 2000
month = int(txt[4])
day = int(txt[5])
hour = int(txt[6])
time = datetime(year, month, day, hour)
fhour = int(txt[7])
# ======================================================
# read lines
# ======================================================
if 'LINES:' in txt:
# get the start position
idx = txt.index('LINES:')
# number of lines
number = int(txt[idx+1])
idx += 2
# loop every line
if number > 0:
# define data
line_width = []
line_xyz_num = []
line_xyz = []
line_label_num = []
line_label = []
line_label_xyz = []
for i in range(number):
# line width
width = float(txt[idx])
line_width.append(width)
idx += 1
# line xyz point number
xyz_num = int(txt[idx])
line_xyz_num.append(xyz_num)
idx += 1
# line xyz
xyz = np.array(txt[idx:(idx + 3*xyz_num)]).astype(np.float)
xyz.shape = [xyz_num, 3]
line_xyz.append(xyz)
idx += xyz_num * 3
# line label
label = txt[idx]
line_label.append(label)
idx += 1
# line label number
label_num = int(txt[idx])
line_label_num.append(label_num)
idx += 1
# label xyz
if label_num > 0:
label_xyz = np.array(
txt[idx:(idx + 3*label_num)]).astype(np.float)
label_xyz.shape = [label_num, 3]
line_label_xyz.append(label_xyz)
idx += label_num * 3
else:
line_label_xyz.append([])
# construct line data type
lines = {
"line_width": line_width, "line_xyz_num": line_xyz_num,
"line_xyz": line_xyz, "line_label_num": line_label_num,
"line_label": line_label, "line_label_xyz": line_label_xyz}
else:
lines = {}
else:
lines = {}
# ======================================================
# read line symbols
# ======================================================
if 'LINES_SYMBOL:' in txt:
# get the start position
idx = txt.index('LINES_SYMBOL:')
# number of line symbols
number = int(txt[idx + 1])
idx += 2
# loop every line symbol
if number > 0:
# define data
linesym_code = []
linesym_width = []
linesym_xyz_num = []
linesym_xyz = []
for i in range(number):
# line symbol code
code = int(txt[idx])
linesym_code.append(code)
idx += 1
# line width
width = float(txt[idx])
linesym_width.append(width)
idx += 1
# line symbol xyz point number
xyz_num = int(txt[idx])
linesym_xyz_num.append(xyz_num)
idx += 1
# line symbol xyz
xyz = np.array(txt[idx:(idx + 3*xyz_num)]).astype(np.float)
xyz.shape = [xyz_num, 3]
linesym_xyz.append(xyz)
idx += xyz_num * 3
# line symbol label
label = txt[idx]
idx += 1
# line symbol label number
label_num = int(txt[idx])
idx += label_num * 3 + 1
lines_symbol = {"linesym_code": linesym_code,
"linesym_width": linesym_width,
"linesym_xyz_num": linesym_xyz_num,
"linesym_xyz": linesym_xyz}
else:
lines_symbol = {}
else:
lines_symbol = {}
# ======================================================
# read symbol
# ======================================================
if "SYMBOLS:" in txt:
# start position of symbols
idx = txt.index("SYMBOLS:")
# number of lines
number = int(txt[idx + 1])
idx += 2
# loop every symbol
if number > 0:
# define data
symbol_code = []
symbol_xyz = []
symbol_value = []
for i in range(number):
# symbol code
code = int(txt[idx])
symbol_code.append(code)
idx += 1
# symbol xyz
xyz = np.array(txt[idx:(idx + 3)]).astype(np.float)
symbol_xyz.append(xyz)
idx += 3
# symbol value
value = txt[idx]
symbol_value.append(value)
idx += 1
symbols = {"symbol_code": symbol_code,
"symbol_xyz": symbol_xyz,
"symbol_value": symbol_value}
else:
symbols = {}
else:
symbols = {}
# ======================================================
# read closed contours
# ======================================================
if "CLOSED_CONTOURS:" in txt:
# get the start position
idx = txt.index('CLOSED_CONTOURS:')
# number of lines
number = int(txt[idx + 1])
idx += 2
# loop every closed contour
if number > 0:
# define data
cn_width = []
cn_xyz_num = []
cn_xyz = []
cn_label_num = []
cn_label = []
cn_label_xyz = []
for i in range(number):
# line width
width = float(txt[idx])
cn_width.append(width)
idx += 1
# line xyz point number
xyz_num = int(txt[idx])
cn_xyz_num.append(xyz_num)
idx += 1
# line xyz
xyz = np.array(txt[idx:(idx + 3 * xyz_num)]).astype(np.float)
xyz.shape = [xyz_num, 3]
cn_xyz.append(xyz)
idx += 3 * xyz_num
# line label
label = txt[idx]
cn_label.append(label)
idx += 1
# line label number
label_num = int(txt[idx])
cn_label_num.append(label_num)
idx += 1
# label xyz
if label_num > 0:
label_xyz = np.array(
txt[idx:(idx + 3 * label_num)]).astype(np.float)
label_xyz.shape = [3, label_num]
cn_label_xyz.append(label_xyz)
idx += label_num * 3
else:
cn_label_xyz.append([])
closed_contours = {
"cn_width": cn_width, "cn_xyz_num": cn_xyz_num,
"cn_xyz": cn_xyz, "cn_label": cn_label,
"cn_label_num": cn_label_num, "cn_label_xyz": cn_label_xyz}
else:
closed_contours = {}
else:
closed_contours = {}
# ======================================================
# read station situation
# ======================================================
if "STATION_SITUATION" in txt:
# get the start position
idx = txt.index('STATION_SITUATION')
# find data subscript
end_idx = idx + 1
while txt[end_idx].isdigit() and end_idx < len(txt):
end_idx += 1
if end_idx > idx + 1:
stations = np.array(txt[(idx+1):(end_idx)])
stations.shape = [len(stations)//2, 2]
else:
stations = None
else:
stations = None
# ======================================================
# read weather regions
# ======================================================
if "WEATHER_REGION:" in txt:
# get the start position
idx = txt.index('WEATHER_REGION:')
# number of regions
number = int(txt[idx + 1])
idx += 2
# loop every region
if number > 0:
# define data
weather_region_code = []
weather_region_xyz_num = []
weather_region_xyz = []
for i in range(number):
# region code
code = int(txt[idx])
weather_region_code.append(code)
idx += 1
# region xyz point number
xyz_num = int(txt[idx])
weather_region_xyz_num.append(xyz_num)
idx += 1
# region xyz point
xyz = np.array(
txt[idx:(idx + 3*xyz_num)]).astype(np.float)
xyz.shape = [xyz_num, 3]
weather_region_xyz.append(xyz)
idx += 3 * xyz_num
weather_region = {
"weather_region_code": weather_region_code,
"weather_region_xyz_num": weather_region_xyz_num,
"weather_region_xyz": weather_region_xyz}
else:
weather_region = {}
else:
weather_region = {}
# ======================================================
# read fill area
# ======================================================
if "FILLAREA:" in txt:
# get the start position
idx = txt.index('FILLAREA:')
# number of regions
number = int(txt[idx + 1])
idx += 2
# loop every fill area
if number > 0:
# define data
fillarea_code = []
fillarea_num = []
fillarea_xyz = []
fillarea_type = []
fillarea_color = []
fillarea_frontcolor = []
fillarea_backcolor = []
fillarea_gradient_angle = []
fillarea_graphics_type = []
fillarea_frame = []
for i in range(number):
# code
code = int(txt[idx])
fillarea_code.append(code)
idx += 1
# xyz point number
xyz_num = int(txt[idx])
fillarea_num.append(xyz_num)
idx += 1
# xyz point
xyz = np.array(
txt[idx:(idx + 3 * xyz_num)]).astype(np.float)
xyz.shape = [xyz_num, 3]
fillarea_xyz.append(xyz)
idx += 3 * xyz_num
# fill type
ftype = int(txt[idx])
fillarea_type.append(ftype)
idx += 1
# line color
color = np.array(txt[idx:(idx + 4)]).astype(np.int)
fillarea_color.append(color)
idx += 4
# front color
front_color = np.array(txt[idx:(idx + 4)]).astype(np.int)
fillarea_frontcolor.append(front_color)
idx += 4
# background color
back_color = np.array(txt[idx:(idx + 4)]).astype(np.int)
fillarea_backcolor.append(back_color)
idx += 4
# color gradient angle
gradient_angle = float(txt[idx])
fillarea_gradient_angle.append(gradient_angle)
idx += 1
# graphics type
graphics_type = int(txt[idx])
fillarea_graphics_type.append(graphics_type)
idx += 1
# draw frame or not
frame = int(txt[idx])
fillarea_frame.append(frame)
idx += 1
fill_area = {
"fillarea_code": fillarea_code, "fillarea_num": fillarea_num,
"fillarea_xyz": fillarea_xyz, "fillarea_type": fillarea_type,
"fillarea_color": fillarea_color,
"fillarea_frontcolor": fillarea_frontcolor,
"fillarea_backcolor": fillarea_backcolor,
"fillarea_gradient_angle": fillarea_gradient_angle,
"fillarea_graphics_type": fillarea_graphics_type,
"fillarea_frame": fillarea_frame}
else:
fill_area = {}
else:
fill_area = {}
# ======================================================
# read notes symbol
# ======================================================
if "NOTES_SYMBOL:" in txt:
# get the start position
idx = txt.index('NOTES_SYMBOL:')
# number of regions
number = int(txt[idx + 1])
idx += 2
# loop every notes symbol
if number > 0:
# define data
nsymbol_code = []
nsymbol_xyz = []
nsymbol_charLen = []
nsymbol_char = []
nsymbol_angle = []
nsymbol_fontLen = []
nsymbol_fontName = []
nsymbol_fontSize = []
nsymbol_fontType = []
nsymbol_color = []
for i in range(number):
# code
code = int(txt[idx])
nsymbol_code.append(code)
idx += 1
# xyz
xyz = np.array(txt[idx:(idx + 3)]).astype(np.float)
nsymbol_xyz.append([xyz])
idx += 3
# character length
char_len = int(txt[idx])
nsymbol_charLen.append(char_len)
idx += 1
# characters
char = txt[idx]
nsymbol_char.append(char)
idx += 1
# character angle
angle = txt[idx]
nsymbol_angle.append(angle)
idx += 1
# font length
font_len = txt[idx]
nsymbol_fontLen.append(font_len)
idx += 1
# font name
font_name = txt[idx]
nsymbol_fontName.append(font_name)
idx += 1
# font size
font_size = txt[idx]
nsymbol_fontSize.append(font_size)
idx += 1
# font type
font_type = txt[idx]
nsymbol_fontType.append(font_type)
idx += 1
# color
color = np.array(txt[idx:(idx + 4)]).astype(np.int)
nsymbol_color.append(color)
idx += 4
notes_symbol = {
"nsymbol_code": nsymbol_code,
"nsymbol_xyz": nsymbol_xyz,
"nsymbol_charLen": nsymbol_charLen,
"nsymbol_char": nsymbol_char,
"nsymbol_angle": nsymbol_angle,
"nsymbol_fontLen": nsymbol_fontLen,
"nsymbol_fontName": nsymbol_fontName,
"nsymbol_fontSize": nsymbol_fontSize,
"nsymbol_fontType": nsymbol_fontType,
"nsymbol_color": nsymbol_color}
else:
notes_symbol = {}
else:
notes_symbol = {}
# ======================================================
# read lines symbols with property
# ======================================================
if "WITHPROP_LINESYMBOLS:" in txt:
# get the start position
idx = txt.index('WITHPROP_LINESYMBOLS:')
# number of regions
number = int(txt[idx + 1])
idx += 2
# loop every line symbol
if number > 0:
# define data
plinesym_code = []
plinesym_width = []
plinesym_color = []
plinesym_type = []
plinesym_shadow = []
plinesym_xyz_num = []
plinesym_xyz = []
plinesym_label = []
plinesym_label_num = []
plinesym_label_xyz = []
for i in range(number):
# line symbol code
code = int(txt[idx])
plinesym_code.append(code)
idx += 1
# line width
width = float(txt[idx])
plinesym_width.append(width)
idx += 1
# line color
color = np.array(txt[idx:(idx + 3)]).astype(np.int)
plinesym_color.append([color])
idx += 3
# line type
ltype = int(txt[idx])
plinesym_type.append(ltype)
idx += 1
# line shadow
shadow = int(txt[idx])
plinesym_shadow.append(shadow)
idx += 1
# line symbol xyz point number
xyz_num = int(txt[idx])
plinesym_xyz_num.append(xyz_num)
idx += 1
# line symbol xyz
xyz = np.array(txt[idx:(idx + 3 * xyz_num)]).astype(np.float)
xyz.shape = [xyz_num, 3]
plinesym_xyz.append(xyz)
idx += 3 * xyz_num
# line symbol label
label = txt[idx]
plinesym_label.append(label)
idx += 1
# line label number
label_num = int(txt[idx])
plinesym_label_num.append(label_num)
idx += 1
# label xyz
if label_num > 0:
label_xyz = np.array(
txt[idx:(idx + 3 * label_num)]).astype(np.float)
label_xyz.shape = [label_num, 3]
plinesym_label_xyz.append(label_xyz)
idx += label_num * 3
else:
plinesym_label_xyz.append([])
plines_symbol = {
"plinesym_code": plinesym_code,
"plinesym_width": plinesym_width,
"plinesym_color": plinesym_color,
"plinesym_type": plinesym_type,
"plinesym_shadow": plinesym_shadow,
"plinesym_xyz_num": plinesym_xyz_num,
"plinesym_xyz": plinesym_xyz,
"plinesym_label": plinesym_label,
"plinesym_label_num": plinesym_label_num,
"plinesym_label_xyz": plinesym_label_xyz}
else:
plines_symbol = {}
else:
plines_symbol = {}
# return data contents
return {"file_type": 14,
"time": time,
"fhour": fhour,
"lines": lines,
"lines_symbol": lines_symbol,
"symbols": symbols,
"closed_contours": closed_contours,
"stations": stations,
"weather_region": weather_region,
"fill_area": fill_area,
"notes_symbol": notes_symbol,
"plines_symbol": plines_symbol}

114
nmc_met_io/read_radar.py Normal file

@ -0,0 +1,114 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
Read CMA radar format file.
"""
from array import array
import numpy as np
def read_cma_sa_radar(fname):
"""
Read CMA SA radar format file.
refer to
https://github.com/smft/CMA_SA_RADAR_READER/blob/master/read_RADAR.py
https://github.com/smft/CMA_SA_RADAR_READER/blob/master/barb_plot.py
for plot map.
:param fname: file path name.
:return: data dictionary.
"""
# read data
flag = open(fname, "r")
data = np.asarray(array("B", flag.read()))
data = data.reshape([len(data)/2432, 2432])
# find elevation angle
if data[0, 72] == 11:
phi = [0.50, 0.50, 1.45, 1.45, 2.40, 3.35, 4.30,
5.25, 6.2, 7.5, 8.7, 10, 12, 14, 16.7, 19.5]
if data[0, 72] == 21:
phi = [0.50, 0.50, 1.45, 1.45, 2.40, 3.35, 4.30,
6.00, 9.00, 14.6, 19.5]
if data[0, 72] == 31:
phi = [0.50, 0.50, 1.50, 1.50, 2.50, 2.50, 3.50, 4.50]
if data[0, 72] == 32:
phi = [0.50, 0.50, 2.50, 3.50, 4.50]
# define data
g1 = np.zeros([len(data), 460])
h1 = np.zeros([len(data), 460])
i1 = np.zeros([len(data), 460])
j1 = np.zeros([len(data), 460])
# process data
count = 0
while count < len(data):
print("径向数据编号 : ", count)
b1 = data[count, 44] + 256 * data[count, 45] # 仰角序数
c1 = ((data[count, 36] + 256 * data[count, 37]) /
8 * 180 / 4096) # 方位角
d1 = data[count, 54] + 256 * data[count, 55] # 径向库
print("仰角序数,方位角,径向库 : ", b1, c1, d1)
if d1 == 0:
count += 1
continue
else:
count += 1
i = 0
while i < 460:
g1[count - 1, i] = phi[b1 - 1] # 仰角
h1[count - 1, i] = c1 # 方位角
i1[count - 1, i] = 0.5 + i - 1 # 径向
if i > d1: # 反射率
j1[count - 1, i] = 0
else:
if data[count - 1, 128 + i] == 0: # 无数据
j1[count - 1, i] = 0
else:
if data[count - 1, 128 + i] == 1: # 距离模糊
j1[count - 1, i] = 0
else: # 数据正常
j1[count - 1, i] = ((data[count - 1, 128 + i] - 2) /
2 - 32)
i += 1
# calculate angle index
n = 3
a2 = 0 # 仰角序数
while a2 < len(data):
if data[a2, 44] > (n - 1):
break
a2 += 1
a3 = a2
while a3 < len(data):
if data[a3, 44] > n:
break
a3 += 1
# put data
yj = g1[a2:a3, :] # 仰角
fwj = h1[a2:a3, :] # 方位角
jx = i1[a2:a3, :] # 径向
fsl = j1[a2:a3, :] # 反射率
# return data
return yj, fwj, jx, fsl
def sph2cart(elevation, azimuth, r):
"""
Convert spherical coordinates to cartesian.
"""
ele, a = np.deg2rad([elevation, azimuth])
x = r * np.cos(ele) * np.cos(a)
y = r * np.cos(ele) * np.sin(a)
z = r * np.sin(ele)
return x, y, z

@ -0,0 +1,179 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
Read FY satellite awx format file.
"""
import numpy as np
def read_awx_cloud(fname):
"""
Read satellite awx format file.
:param fname: file pathname.
:return: data list
"""
# read part of binary
# refer to
# https://stackoverflow.com/questions/14245094/how-to-read-part-of-binary-file-with-numpy
pass
def read_himawari(fname, resolution):
"""
Read the japan himawari satellite standard data file.
refere to
https://github.com/smft/Read_Himawari_binary_data/blob/master/read_Himawari.py
:param fname: data file pathname.
:param resolution: data resolution.
:return: data list.
"""
# define resolution
if resolution == 1:
res = 12100000
nlin = 1100
ncol = 11000
elif resolution == 2:
res = 3025000
nlin = 550
ncol = 5500
else:
res = 48400000
nlin = 2200
ncol = 22000
# define binary file format
formation = [('bn', 'i1', 1),
('bl', 'i2', 1),
('thb', 'i2', 1),
('bo', 'i1', 1),
('sn', 'S1', 16),
('pcn', 'S1', 16),
('oa', 'S1', 4),
('obf', 'S1', 2),
('ot', 'i2', 1),
('ost', 'float64', 1),
('oet', 'float64', 1),
('fct', 'float64', 1),
('thl', 'i4', 1),
('tdl', 'i4', 1),
('qf1', 'i1', 1),
('qf2', 'i1', 1),
('qf3', 'i1', 1),
('qf4', 'i1', 1),
('ffv', 'S1', 32),
('fn', 'S1', 128),
('null1', 'S1', 40),
('bn2', 'i1', 1),
('bl2', 'i2', 1),
('nbpp', 'i2', 1),
('noc', 'i2', 1),
('nol', 'i2', 1),
('cffdb', 'i1', 1),
('null2', 'S1', 40),
('bn3', 'i1', 1),
('bl3', 'i2', 1),
('sl', 'float64', 1),
('CFAC', 'i4', 1),
('LFAC', 'i4', 1),
('COFF', 'float32', 1),
('LOFF', 'float32', 1),
('dfectvs', 'float64', 1),
('eer', 'float64', 1),
('epr', 'float64', 1),
('var1', 'float64', 1),
('var2', 'float64', 1),
('var3', 'float64', 1),
('cfsd', 'float64', 1),
('rt', 'i2', 1),
('rs', 'i2', 1),
('null3', 'S1', 40),
('bn4', 'i1', 1),
('bl4', 'i2', 1),
('ni', 'float64', 1),
('ssplon', 'float64', 1),
('ssplat', 'float64', 1),
('dfects4', 'float64', 1),
('nlat', 'float64', 1),
('nlon', 'float64', 1),
('sp', 'float64', 3),
('mp', 'float64', 3),
('null4', 'S1', 40),
('bn5', 'i1', 1),
('bl5', 'i2', 1),
('bdn', 'i2', 1),
('cwl', 'float64', 1),
('vnobpp', 'i2', 1),
('cvoep', 'uint16', 1),
('cvoposa', 'uint16', 1),
('gfcce', 'float64', 1),
('cfcce', 'float64', 1),
('c0', 'float64', 1),
('c1', 'float64', 1),
('c2', 'float64', 1),
('C0', 'float64', 1),
('C1', 'float64', 1),
('C2', 'float64', 1),
('sol', 'float64', 1),
('pc', 'float64', 1),
('bc', 'float64', 1),
('null5', 'S1', 40),
('b06n01', 'i1', 1),
('b06n02', 'i2', 1),
('b06n03', 'float64', 1),
('b06n04', 'float64', 1),
('b06n05', 'float64', 1),
('b06n06', 'float64', 1),
('b06n07', 'float64', 1),
('b06n08', 'float64', 1),
('b06n09', 'float64', 1),
('b06n10', 'float64', 1),
('b06n11', 'float32', 1),
('b06n12', 'float32', 1),
('b06n13', 'S1', 128),
('b06n14', 'S1', 56),
('b07n01', 'i1', 1),
('b07n02', 'i2', 1),
('b07n03', 'i1', 1),
('b07n04', 'i1', 1),
('b07n05', 'i2', 1),
('b07n06', 'S1', 40),
('b08n01', 'i1', 1),
('b08n02', 'i2', 1),
('b08n03', 'float32', 1),
('b08n04', 'float32', 1),
('b08n05', 'float64', 1),
('b08n06', 'i2', 1),
('b08n07', 'i2', 1),
('b08n08', 'float32', 1),
('b08n09', 'float32', 1),
('b08n10', 'S1', 50),
('b09n01', 'i1', 1),
('b09n02', 'i2', 1),
('b09n03', 'i2', 1),
('b09n04', 'i2', 1),
('b09n05', 'float64', 1),
('b09n06', 'S1', 70),
('b10n01', 'i1', 1),
('b10n02', 'i4', 1),
('b10n03', 'i2', 1),
('b10n04', 'i2', 1),
('b10n05', 'i2', 1),
('b10n06', 'S1', 36),
('b11n01', 'i1', 1),
('b11n02', 'i2', 1),
('b11n03', 'S1', 256),
('b12n01', 'i2', res)]
data = np.fromfile(fname, dtype=formation)['b12n01'].reshape(nlin, ncol)
return list(data)

@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
Retrieve historical data from CIMISS service.
"""
import os
import calendar
import urllib.request
import numpy as np
from nmc_met_io.retrieve_cimiss_server import cimiss_obs_by_time_range
from nmc_met_io.retrieve_cimiss_server import cimiss_obs_in_rect_by_time_range
from nmc_met_io.retrieve_cimiss_server import cimiss_obs_file_by_time_range
def get_day_hist_obs(years=np.arange(2000, 2011, 1),
month_range=(1, 12),
elements=None,
sta_levels=None,
outfname='day_rain_obs',
outdir='.'):
"""
Download historical daily observations and write to data files,
each month a file.
:param years: years for historical data
:param month_range: month range each year, like (1, 12)
:param elements: elements for retrieve, 'ele1, ele2, ...'
:param sta_levels: station levels
:param outfname: output file name + '_year' + '_month'
:param outdir: output file directory
:return: output file names.
:Example:
>>> get_day_hist_obs(years=np.arange(2000, 2016, 1), outdir="D:/")
"""
# check elements
if elements is None:
elements = "Station_Id_C,Station_Name,Datetime,Lat,Lon,PRE_Time_0808"
# check output directory
if not os.path.exists(outdir):
os.makedirs(outdir)
# define months
months = np.arange(1, 13, 1)
# Because of the CIMISS data mount limit,
# so loop every year to download the data.
out_files = []
for iy in years:
if calendar.isleap(iy):
last_day = ['31', '29', '31', '30', '31', '30',
'31', '31', '30', '31', '30', '31']
else:
last_day = ['31', '28', '31', '30', '31', '30',
'31', '31', '30', '31', '30', '31']
for i, im in enumerate(months):
# check month range
if not (month_range[0] <= im <= month_range[1]):
continue
month = '%02d' % im
start_time = str(iy) + month + '01' + '000000'
end_time = str(iy) + month + last_day[i] + '000000'
time_range = "[" + start_time + "," + end_time + "]"
# retrieve observations from CIMISS server
data = cimiss_obs_by_time_range(
time_range, sta_levels=sta_levels,
data_code="SURF_CHN_MUL_DAY", elements=elements)
if data is None:
continue
# save observation data to file
out_files.append(os.path.join(
outdir, outfname + "_" + str(iy) + "_" + month + ".pkl"))
data.to_pickle(out_files[-1])
return out_files
def get_mon_hist_obs(years=np.arange(2000, 2011, 1),
limit=(3, 73, 54, 136),
elements=None,
outfname='mon_surface_obs',
outdir='.'):
"""
Download historical monthly observations and write to data files,
each year a file.
:param years: years for historical data
:param limit: spatial limit [min_lat, min_lon, max_lat, max_lon]
:param elements: elements for retrieve, 'ele1, ele2, ...'
:param outfname: output file name + 'year'
:param outdir: output file directory
:return: Output filenames
"""
# check elements
if elements is None:
elements = ("Station_Id_C,Station_Name,Year,"
"Mon,Lat,Lon,Alti,PRE_Time_0808")
# check output directory
if not os.path.exists(outdir):
os.makedirs(outdir)
# Loop every year to download the data.
out_files = []
for iy in years:
# check out file
outfile = os.path.join(outdir, outfname + "_" + str(iy) + ".pkl")
if os.path.isfile(outfile):
continue
# set time range
start_time = str(iy) + '0101' + '000000'
end_time = str(iy) + '1201' + '000000'
time_range = "[" + start_time + "," + end_time + "]"
# retrieve observations from CIMISS server
data = cimiss_obs_in_rect_by_time_range(
time_range, limit, data_code='SURF_CHN_MUL_MON',
elements=elements)
if data is None:
continue
# save observation data to file
out_files.append(outfile)
data.to_pickle(out_files[-1])
return out_files
def get_cmpas_hist_files(time_range, outdir='.'):
"""
Download CMAPS QPE gridded data files.
Arguments:
time_range {string} -- time range for retrieve,
"[YYYYMMDDHHMISS,YYYYMMDDHHMISS]"
outdir {string} -- output directory.
:Exampels:
>>> time_range = "[20180101000000,20180331230000]"
>>> get_cmpas_hist_files(time_range, outdir='G:/CMAPS')
"""
# check output directory
if not os.path.exists(outdir):
os.makedirs(outdir)
files = cimiss_obs_file_by_time_range(
time_range, data_code="SURF_CMPA_NRT_NC")
filenames = files['DS']
for file in filenames:
outfile = os.path.join(outdir, file['FILE_NAME'])
if not os.path.isfile(outfile):
urllib.request.urlretrieve(file['FILE_URL'], outfile)

@ -0,0 +1,434 @@
# _*_ coding: utf-8 _*_
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
Retrieve the CIMISS data using REST API with pure python code.
refer to:
http://10.20.76.55/cimissapiweb/MethodData_list.action
https://github.com/babybearming/CIMISSDataGet/blob/master/cimissRead_v0.1.py
"""
import json
from datetime import datetime, timedelta
import urllib3
import numpy as np
import pandas as pd
import xarray as xr
from nmc_met_io.config import _get_config_from_rcfile
def get_http_result(interface_id, params, data_format='json'):
"""
Get the http result from CIMISS REST api service.
:param interface_id: MUSIC interface id.
:param params: dictionary for MUSIC parameters.
:param data_format: MUSIC server data format.
:return:
"""
# set MUSIC server dns and user information
config = _get_config_from_rcfile()
dns = config['CIMISS']['DNS']
user_id = config['CIMISS']['USER_ID']
pwd = config['CIMISS']['PASSWORD']
# construct url
url = 'http://' + dns + '/cimiss-web/api?userId=' + user_id + \
'&pwd=' + pwd + '&interfaceId=' + interface_id
# params
for key in params:
url += '&' + key + '=' + params[key]
# data format
url += '&dataFormat=' + data_format
# request http contents
http = urllib3.PoolManager()
req = http.request('GET', url)
if req.status != 200:
print('Can not access the url: ' + url)
return None
return req.data
def cimiss_obs_by_time_range(time_range, sta_levels=None,
data_code="SURF_CHN_MUL_HOR_N",
elements="Station_Id_C,Datetime,Lat,Lon,TEM"):
"""
Retrieve station records from CIMISS by time and station ID.
:param time_range: time range for retrieve,
"[YYYYMMDDHHMISS, YYYYMMDDHHMISS]",
like"[201509010000,20150903060000]"
:param sta_levels: station levels, like "011,012,013" for standard,
base and general stations.
:param data_code: dataset code, like "SURF_CHN_MUL_HOR",
"SURF_CHN_MUL_HOR_N", and so on.
:param elements: elements for retrieve, 'ele1,ele2,...'
:return: observation records, pandas DataFrame type
:Example:
>>> time_range = "[20180219000000,20180221000000]"
>>> sta_levels = "011,012,013"
>>> data_code = "SURF_CHN_MUL_DAY"
>>> elements = "Station_Id_C,Station_Name,Datetime,Lat,Lon,PRE_Time_0808"
>>> data = cimiss_obs_by_time_range(time_range, sta_levels=sta_levels,
data_code=data_code, elements=elements)
>>> print "retrieve successfully" if data is not None else "failed"
retrieve successfully
"""
# set retrieve parameters
params = {'dataCode': data_code,
'elements': elements,
'timeRange': time_range}
if sta_levels is not None:
params['staLevels'] = sta_levels
# interface id
interface_id = "getSurfEleByTimeRange"
# retrieve data contents
contents = get_http_result(interface_id, params)
if contents is None:
return None
contents = json.loads(contents.decode('utf-8'))
if contents['returnCode'] != '0':
return None
# construct pandas DataFrame
data = pd.DataFrame(contents['DS'])
# return
return data
def cimiss_obs_by_time_and_id(times, data_code="SURF_CHN_MUL_HOR_N",
elements="Station_Id_C,Datetime,TEM",
sta_ids="54511"):
"""
Retrieve station records from CIMISS by time and station ID
:param times: time for retrieve, 'YYYYMMDDHHMISS,YYYYMMDDHHMISS,...'
:param data_code: dataset code, like "SURF_CHN_MUL_HOR",
"SURF_CHN_MUL_HOR_N", and so on.
:param elements: elements for retrieve, 'ele1,ele2,...'
:param sta_ids: station ids, 'xxxxx,xxxxx,...'
:return: observation records, pandas DataFrame type
:Example:
>>> data = cimiss_obs_by_time_and_id('20170318000000')
"""
# set retrieve parameters
params = {'dataCode': data_code,
'elements': elements,
'times': times,
'staIds': sta_ids,
'orderby': "Datetime:ASC"}
# interface id
interface_id = "getSurfEleByTimeAndStaID"
# retrieve data contents
contents = get_http_result(interface_id, params)
if contents is None:
return None
contents = json.loads(contents.decode('utf-8'))
if contents['returnCode'] != '0':
return None
# construct pandas DataFrame
data = pd.DataFrame(contents['DS'])
# return
return data
def cimiss_obs_in_rect_by_time(times, limit, data_code="SURF_CHN_MUL_HOR_N",
elements="Station_Id_C,Datetime,Lat,Lon,TEM"):
"""
Retrieve station records from CIMISS in region by time.
:param times: times for retrieve, 'YYYYMMDDHHMISS,YYYYMMDDHHMISS,...'
:param limit: [min_lat, min_lon, max_lat, max_lon]
:param data_code: dataset code, like "SURF_CHN_MUL_HOR",
"SURF_CHN_MUL_HOR_N", and so on
:param elements: elements for retrieve, 'ele1,ele2,...'
:return: observation records, pandas DataFrame type
:Example:
>>> data = cimiss_obs_in_rect_by_time('20170320000000', [35, 110, 45, 120])
"""
# set retrieve parameters
params = {'dataCode': data_code,
'elements': elements,
'times': times,
'minLat': '{:.10f}'.format(limit[0]),
'minLon': '{:.10f}'.format(limit[1]),
'maxLat': '{:.10f}'.format(limit[2]),
'maxLon': '{:.10f}'.format(limit[3]),
'orderby': "Datetime:ASC"}
# interface id
interface_id = "getSurfEleInRectByTime"
# retrieve data contents
contents = get_http_result(interface_id, params)
if contents is None:
return None
contents = json.loads(contents.decode('utf-8'))
if contents['returnCode'] != '0':
return None
# construct pandas DataFrame
data = pd.DataFrame(contents['DS'])
# return
return data
def cimiss_obs_in_rect_by_time_range(
time_range, limit,
data_code="SURF_CHN_MUL_HOR_N",
elements="Station_Id_C,Datetime,Lat,Lon,TEM"):
"""
Retrieve observation records from CIMISS by rect and time range.
:param time_range: time range for retrieve,
"[YYYYMMDDHHMISS,YYYYMMDDHHMISS]"
:param limit: (min_lat, min_lon, max_lat, max_lon)
:param data_code: dataset code, like "SURF_CHN_MUL_HOR",
"SURF_CHN_MUL_HOR_N", and so on.
:param elements: elements for retrieve, 'ele1,ele2,...'
:return: observation records, pandas DataFrame type
:Example:
>>> elements = ("Station_Id_C,Station_Id_d,Station_Name,"
"Station_levl,Datetime,Lat,Lon,PRE_Time_0808")
>>> time_range = "[20160801000000,20160801000000]"
>>> data_code = "SURF_CHN_MUL_DAY"
>>> data = cimiss_obs_in_rect_by_time_range(
time_range,[35,110,45,120], data_code=data_code,
elements=elements)
"""
# set retrieve parameters
params = {'dataCode': data_code,
'elements': elements,
'timeRange': time_range,
'minLat': '{:.10f}'.format(limit[0]),
'minLon': '{:.10f}'.format(limit[1]),
'maxLat': '{:.10f}'.format(limit[2]),
'maxLon': '{:.10f}'.format(limit[3]),
'orderby': "Datetime:ASC"}
# interface id
interface_id = "getSurfEleInRectByTimeRange"
# retrieve data contents
contents = get_http_result(interface_id, params)
if contents is None:
return None
contents = json.loads(contents.decode('utf-8'))
if contents['returnCode'] != '0':
return None
# construct pandas DataFrame
data = pd.DataFrame(contents['DS'])
# return
return data
def cimiss_obs_grid_by_time(
time_str, data_code="SURF_CMPA_RT_NC", fcst_ele="PRE"):
"""
Retrieve surface analysis grid products,
like CMPAS-V2.1融合降水分析实时数据产品NC.
For SURF_CMPA_RT_NC, this function will retrieve
the 0.01 resolution data and take long time.
:param time_str: analysis time string, like "2017100800"
:param data_code: data code
:param fcst_ele: elements
:return: data, xarray type
:Example:
>>> time_str = "2017110612"
>>> data_code = "SURF_CMPA_RT_NC"
>>> data = cimiss_obs_grid_by_time(time_str, data_code=data_code,
fcst_ele="PRE")
"""
# set retrieve parameters
params = {'dataCode': data_code,
'time': time_str + "0000",
'fcstEle': fcst_ele}
# set interface id
interface_id = "getSurfEleGridByTime"
# retrieve data contents
contents = get_http_result(interface_id, params)
if contents is None:
return None
contents = json.loads(contents.decode('utf-8'))
if contents['returnCode'] != '0':
return None
# get time information
time = datetime.strptime(time_str, '%Y%m%d%H')
# extract coordinates and data
start_lat = float(contents['startLat'])
start_lon = float(contents['startLon'])
nlon = int(contents['lonCount'])
nlat = int(contents['latCount'])
dlon = float(contents['lonStep'])
dlat = float(contents['latStep'])
lon = start_lon + np.arange(nlon) * dlon
lat = start_lat + np.arange(nlat) * dlat
name = contents['fieldNames']
units = contents['fieldUnits']
# construct xarray
data = np.array(contents['DS'])
data = data[np.newaxis, ...]
data = xr.DataArray(data, coords=[time, lat, lon],
dims=['time', 'lat', 'lon'], name=name)
# add attributes
data.attrs['units'] = units
data.attrs['organization'] = 'Created by NMC.'
# return data
return data
def cimiss_obs_file_by_time_range(time_range,
data_code="SURF_CMPA_RT_NC"):
"""
Retrieve CIMISS data file information.
:param time_range: time range for retrieve,
"[YYYYMMDDHHMISS,YYYYMMDDHHMISS]"
:param data_code: data code
:return: dictionary
:Examples:
>>> time_range = "[20180401000000,20180402000000]"
>>> files = cimiss_obs_file_by_time_range(time_range)
>>> filenames = files['DS']
>>> print(files['DS'][0]['FILE_URL'])
"""
# set retrieve parameters
params = {'dataCode': data_code,
'timeRange': time_range}
# set interface id
interface_id = "getSurfFileByTimeRange"
# retrieve data contents
contents = get_http_result(interface_id, params)
if contents is None:
return None
contents = json.loads(contents.decode('utf-8'))
if contents['returnCode'] != '0':
return None
# return
return contents
def cimiss_model_by_time(init_time_str, limit=None,
data_code='NAFP_FOR_FTM_HIGH_EC_GLB',
fcst_level=0, valid_time=0, fcst_ele="TEF2"):
"""
Retrieve grid data from CIMISS service.
:param init_time_str: model run time, like "2016081712"
:param limit: [min_lat, min_lon, max_lat, max_lon]
:param data_code: MUSIC data code, default is "NAFP_FOR_FTM_HIGH_EC_GLB"
:param fcst_level: vertical level, default is 0.
:param valid_time: forecast element, default is 2m temperature "TEF2"
:param fcst_ele: forecast hour, default is 0
:return:
"""
# set retrieve parameters
if limit is None:
params = {'dataCode': data_code,
'time': init_time_str + '0000',
'fcstLevel': '{:d}'.format(fcst_level),
'validTime': '{:d}'.format(valid_time),
'fcstEle': fcst_ele}
interface_id = 'getNafpEleGridByTimeAndLevelAndValidtime'
else:
params = {'dataCode': data_code,
'time': init_time_str + '0000',
'minLat': '{:.10f}'.format(limit[0]),
"minLon": '{:.10f}'.format(limit[1]),
"maxLat": '{:.10f}'.format(limit[2]),
"maxLon": '{:.10f}'.format(limit[3]),
'fcstLevel': '{:d}'.format(fcst_level),
'validTime': '{:d}'.format(valid_time),
'fcstEle': fcst_ele}
interface_id = 'getNafpEleGridInRectByTimeAndLevelAndValidtime'
# retrieve data contents
contents = get_http_result(interface_id, params)
if contents is None:
return None
contents = json.loads(contents.decode('utf-8'))
if contents['returnCode'] != '0':
return None
# get time information
init_time = datetime.strptime(init_time_str, '%Y%m%d%H')
fhour = valid_time
time = init_time + timedelta(hours=fhour)
# extract coordinates and data
start_lat = float(contents['startLat'])
start_lon = float(contents['startLon'])
nlon = int(contents['lonCount'])
nlat = int(contents['latCount'])
dlon = float(contents['lonStep'])
dlat = float(contents['latStep'])
lon = start_lon + np.arange(nlon)*dlon
lat = start_lat + np.arange(nlat)*dlat
name = contents['fieldNames']
units = contents['fieldUnits']
# construct xarray
data = np.array(contents['DS'])
if fcst_level == 0:
data = data[np.newaxis, ...]
data = xr.DataArray(data, coords=[time, lat, lon],
dims=['time', 'lat', 'lon'], name=name)
else:
data = data[np.newaxis, np.newaxis, ...]
data = xr.DataArray(data, coords=[time, fcst_level, lat, lon],
dims=['time', 'level', 'lat', 'lon'], name=name)
# add time coordinates
data.coords['init_time'] = ('time', init_time)
data.coords['fhour'] = ('time', fhour)
# add attributes
data.attrs['units'] = units
data.attrs['organization'] = 'Created by NMC.'
# return data
return data

@ -0,0 +1,775 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2019 NMC Developers.
# Distributed under the terms of the GPL V3 License.
"""
This is the retrieve module which get data from MICAPS cassandra service
with Python API.
Checking url, like:
http://10.32.8.164:8080/DataService?requestType=getLatestDataName&directory=ECMWF_HR/TMP/850&fileName=&filter=*.024
"""
import re
import http.client
from datetime import datetime, timedelta
import numpy as np
import xarray as xr
import pandas as pd
from nmc_met_io import DataBlock_pb2
from nmc_met_io.config import _get_config_from_rcfile
def get_http_result(host, port, url):
"""
Get the http contents.
"""
http_client = None
try:
http_client = http.client.HTTPConnection(host, port, timeout=120)
http_client.request('GET', url)
response = http_client.getresponse()
return response.status, response.read()
except Exception as e:
print(e)
return 0,
finally:
if http_client:
http_client.close()
class GDSDataService:
def __init__(self):
# set MICAPS GDS服务器地址
config = _get_config_from_rcfile()
self.gdsIp = config['MICAPS']['GDS_IP']
self.gdsPort = config['MICAPS']['GDS_PORT']
def getLatestDataName(self, directory, filter):
return get_http_result(
self.gdsIp, self.gdsPort, "/DataService" +
self.get_concate_url("getLatestDataName", directory, "", filter))
def getData(self, directory, fileName):
return get_http_result(
self.gdsIp, self.gdsPort, "/DataService" +
self.get_concate_url("getData", directory, fileName, ""))
# 将请求参数拼接到url
def get_concate_url(self, requestType, directory, fileName, filter):
url = ""
url += "?requestType=" + requestType
url += "&directory=" + directory
url += "&fileName=" + fileName
url += "&filter=" + filter
return url
def get_model_grid(directory, filename=None, suffix="*.024"):
"""
Retrieve numeric model grid forecast from MICAPS cassandra service.
Support ensemble member forecast.
:param directory: the data directory on the service
:param filename: the data filename, if none, will be the latest file.
:param suffix: the filename filter pattern which will be used to
find the specified file.
:return: data, xarray type
:Examples:
>>> data = get_model_grid("ECMWF_HR/TMP/850")
>>> data_ens = get_model_grid("ECMWF_ENSEMBLE/RAW/HGT/500",
filename='18021708.024')
"""
# connect to data service
service = GDSDataService()
# get data file name
if filename is None:
try:
status, response = service.getLatestDataName(directory, suffix)
except ValueError:
print('Can not retrieve data from ' + directory)
return None
StringResult = DataBlock_pb2.StringResult()
if status == 200:
StringResult.ParseFromString(response)
if StringResult is not None:
filename = StringResult.name
if filename == '':
return None
else:
return None
# get data contents
try:
status, response = service.getData(directory, filename)
except ValueError:
print('Can not retrieve data' + filename + ' from ' + directory)
return None
ByteArrayResult = DataBlock_pb2.ByteArrayResult()
if status == 200:
ByteArrayResult.ParseFromString(response)
if ByteArrayResult is not None:
byteArray = ByteArrayResult.byteArray
# define head information structure (278 bytes)
head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
('modelName', 'S20'), ('element', 'S50'),
('description', 'S30'), ('level', 'f4'),
('year', 'i4'), ('month', 'i4'), ('day', 'i4'),
('hour', 'i4'), ('timezone', 'i4'),
('period', 'i4'), ('startLongitude', 'f4'),
('endLongitude', 'f4'), ('longitudeGridSpace', 'f4'),
('longitudeGridNumber', 'i4'),
('startLatitude', 'f4'), ('endLatitude', 'f4'),
('latitudeGridSpace', 'f4'),
('latitudeGridNumber', 'i4'),
('isolineStartValue', 'f4'),
('isolineEndValue', 'f4'),
('isolineSpace', 'f4'),
('perturbationNumber', 'i2'),
('ensembleTotalNumber', 'i2'),
('minute', 'i2'), ('second', 'i2'),
('Extent', 'S92')]
# read head information
head_info = np.fromstring(byteArray[0:278], dtype=head_dtype)
# get required grid information
data_type = head_info['type'][0]
nlon = head_info['longitudeGridNumber'][0]
nlat = head_info['latitudeGridNumber'][0]
nmem = head_info['ensembleTotalNumber'][0]
# define data structure
if data_type == 4:
data_dtype = [('data', 'f4', (nlat, nlon))]
data_len = nlat * nlon * 4
elif data_type == 11:
data_dtype = [('data', 'f4', (2, nlat, nlon))]
data_len = 2 * nlat * nlon * 4
else:
raise Exception("Data type is not supported")
# read data
if nmem == 0:
data = np.fromstring(byteArray[278:], dtype=data_dtype)
data = np.squeeze(data['data'])
else:
if data_type == 4:
data = np.full((nmem, nlat, nlon), np.nan)
else:
data = np.full((2, nmem, nlat, nlon), np.nan)
ind = 0
for imem in range(nmem):
head_info_mem = np.fromstring(
byteArray[ind:(ind+278)], dtype=head_dtype)
ind += 278
data_mem = np.fromstring(
byteArray[ind:(ind+data_len)], dtype=data_dtype)
ind += data_len
number = head_info_mem['perturbationNumber'][0]
if data_type == 4:
data[number, :, :] = np.squeeze(data_mem['data'])
else:
data[:, number, :, :] = np.squeeze(data_mem['data'])
# construct longitude and latitude coordinates
slon = head_info['startLongitude'][0]
dlon = head_info['longitudeGridSpace'][0]
slat = head_info['startLatitude'][0]
dlat = head_info['latitudeGridSpace'][0]
lon = np.arange(nlon) * dlon + slon
lat = np.arange(nlat) * dlat + slat
level = np.array([head_info['level'][0]])
# construct initial time and forecast hour
init_time = datetime(head_info['year'][0], head_info['month'][0],
head_info['day'][0], head_info['hour'][0])
fhour = np.array([head_info['period'][0]], dtype=np.float)
time = init_time + timedelta(hours=fhour[0])
init_time = np.array([init_time], dtype='datetime64[m]')
time = np.array([time], dtype='datetime64[m]')
# construct ensemble number
if nmem != 0:
number = np.arange(nmem)
# create to xarray
if data_type == 4:
if nmem == 0:
if level[0] == 0:
data = data[np.newaxis, ...]
data = xr.DataArray(
data, coords=[time, lat, lon],
dims=['time', 'lat', 'lon'], name="data")
else:
data = data[np.newaxis, np.newaxis, ...]
data = xr.DataArray(
data, coords=[time, level, lat, lon],
dims=['time', 'level', 'lat', 'lon'],
name="data")
else:
if level[0] == 0:
data = data[np.newaxis, ...]
data = xr.DataArray(
data, coords=[time, number, lat, lon],
dims=['time', 'number', 'lat', 'lon'],
name="data")
else:
data = data[np.newaxis, :, np.newaxis, ...]
data = xr.DataArray(
data, coords=[time, number, level, lat, lon],
dims=['time', 'number', 'level', 'lat', 'lon'],
name="data")
elif data_type == 11:
if nmem == 0:
speed = np.squeeze(data[0, :, :])
angle = np.squeeze(data[1, :, :])
if level[0] == 0:
speed = speed[np.newaxis, ...]
angle = angle[np.newaxis, ...]
data = xr.Dataset({
'speed': (['time', 'lat', 'lon'], speed),
'angle': (['time', 'lat', 'lon'], angle)},
coords={'lon': lon, 'lat': lat, 'time': time})
else:
speed = speed[np.newaxis, np.newaxis, ...]
angle = angle[np.newaxis, np.newaxis, ...]
data = xr.Dataset({
'speed': (['time', 'level', 'lat', 'lon'], speed),
'angle': (['time', 'level', 'lat', 'lon'], angle)},
coords={'lon': lon, 'lat': lat, 'level': level,
'time': time})
else:
speed = np.squeeze(data[0, :, :, :])
angle = np.squeeze(data[1, :, :, :])
if level[0] == 0:
speed = speed[np.newaxis, ...]
angle = angle[np.newaxis, ...]
data = xr.Dataset({
'speed': (
['time', 'number', 'lat', 'lon'], speed),
'angle': (
['time', 'number', 'lat', 'lon'], angle)},
coords={
'lon': lon, 'lat': lat, 'number': number,
'time': time})
else:
speed = speed[np.newaxis, :, np.newaxis, ...]
angle = angle[np.newaxis, :, np.newaxis, ...]
data = xr.Dataset({
'speed': (
['time', 'number', 'level', 'lat', 'lon'],
speed),
'angle': (
['time', 'number', 'level', 'lat', 'lon'],
angle)},
coords={
'lon': lon, 'lat': lat, 'level': level,
'number': number, 'time': time})
# add time coordinates
data.coords['init_time'] = ('time', init_time)
data.coords['fhour'] = ('time', fhour)
# add attributes
data.attrs['data_directory'] = directory
data.attrs['data_filename'] = filename
data.attrs['organization'] = 'Created by NMC.'
# return data
return data
else:
return None
else:
return None
def get_station_data(directory, filename=None, suffix="*.000"):
"""
Retrieve station data from MICAPS cassandra service.
:param directory: the data directory on the service
:param filename: the data filename, if none, will be the latest file.
:param suffix: the filename filter pattern which will
be used to find the specified file.
:return: pandas DataFrame.
:example:
>>> data = get_station_data("SURFACE/PLOT_10MIN")
"""
# connect to data service
service = GDSDataService()
# get data file name
if filename is None:
try:
status, response = service.getLatestDataName(directory, suffix)
except ValueError:
print('Can not retrieve data from ' + directory)
return None
StringResult = DataBlock_pb2.StringResult()
if status == 200:
StringResult.ParseFromString(response)
if StringResult is not None:
filename = StringResult.name
if filename == '':
return None
else:
return None
# get data contents
try:
status, response = service.getData(directory, filename)
except ValueError:
print('Can not retrieve data' + filename + ' from ' + directory)
return None
ByteArrayResult = DataBlock_pb2.ByteArrayResult()
if status == 200:
ByteArrayResult.ParseFromString(response)
if ByteArrayResult is not None:
byteArray = ByteArrayResult.byteArray
# define head structure
head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
('description', 'S100'),
('level', 'f4'), ('levelDescription', 'S50'),
('year', 'i4'), ('month', 'i4'), ('day', 'i4'),
('hour', 'i4'), ('minute', 'i4'), ('second', 'i4'),
('Timezone', 'i4'), ('extent', 'S100')]
# read head information
head_info = np.fromstring(byteArray[0:288], dtype=head_dtype)
ind = 288
# read the number of stations
station_number = np.fromstring(
byteArray[ind:(ind+4)], dtype='i4')[0]
ind += 4
# read the number of elements
element_number = np.fromstring(
byteArray[ind:(ind+2)], dtype='i2')[0]
ind += 2
# construct record structure
element_type_map = {
1: 'b1', 2: 'i2', 3: 'i4', 4: 'i8', 5: 'f4', 6: 'f8', 7: 'S1'}
element_map = {}
for i in range(element_number):
element_id = str(
np.fromstring(byteArray[ind:(ind+2)], dtype='i2')[0])
ind += 2
element_type = np.fromstring(
byteArray[ind:(ind+2)], dtype='i2')[0]
ind += 2
element_map[element_id] = element_type_map[element_type]
# loop every station to retrieve record
record_head_dtype = [
('ID', 'i4'), ('lon', 'f4'), ('lat', 'f4'), ('numb', 'i2')]
records = []
for i in range(station_number):
record_head = np.fromstring(
byteArray[ind:(ind+14)], dtype=record_head_dtype)
ind += 14
record = {
'ID': record_head['ID'][0], 'lon': record_head['lon'][0],
'lat': record_head['lat'][0]}
for j in range(record_head['numb'][0]):
element_id = str(
np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0])
ind += 2
element_type = element_map[element_id]
element_len = int(element_type[1])
record[element_id] = np.fromstring(
byteArray[ind:(ind + element_len)],
dtype=element_type)[0]
ind += element_len
records += [record]
# convert to pandas data frame
records = pd.DataFrame(records)
records.set_index('ID')
# get time
time = datetime(
head_info['year'][0], head_info['month'][0],
head_info['day'][0], head_info['hour'][0],
head_info['minute'][0], head_info['second'][0])
records['time'] = time
# change column name for common observation
records.rename(columns={
'1001': 'precipitation', '1201': 'visibility_1min'},
inplace=True)
# return
return records
else:
return None
else:
return None
def get_fy_awx(directory, filename=None, suffix="*.AWX"):
"""
Retrieve FY satellite cloud awx format file.
:param directory: the data directory on the service
:param filename: the data filename, if none, will be the latest file.
:param suffix: the filename filter pattern which will be used to
find the specified file.
:return: satellite information and data.
:Examples:
>>> directory = "SATELLITE/FY2E/L1/IR1/EQUAL"
>>> data = get_fy_awx(directory)
"""
# connect to data service
service = GDSDataService()
# get data file name
if filename is None:
try:
status, response = service.getLatestDataName(directory, suffix)
except ValueError:
print('Can not retrieve data from ' + directory)
return None
StringResult = DataBlock_pb2.StringResult()
if status == 200:
StringResult.ParseFromString(response)
if StringResult is not None:
filename = StringResult.name
if filename == '':
return None
else:
return None
# get data contents
try:
status, response = service.getData(directory, filename)
except ValueError:
print('Can not retrieve data' + filename + ' from ' + directory)
return None
ByteArrayResult = DataBlock_pb2.ByteArrayResult()
if status == 200:
ByteArrayResult.ParseFromString(response)
if ByteArrayResult is not None:
byteArray = ByteArrayResult.byteArray
# define head structure
head_dtype = [
('SAT96', 'S12'), # SAT96 filename
('byteSequence', 'i2'), # integer number byte sequence
('firstClassHeadLength', 'i2'),
('secondClassHeadLength', 'i2'),
('padDataLength', 'i2'),
('recordLength', 'i2'),
('headRecordNumber', 'i2'),
('dataRecordNumber', 'i2'),
('productCategory', 'i2'),
('compressMethod', 'i2'),
('formatString', 'S8'),
('qualityFlag', 'i2'),
('satelliteName', 'S8'),
('year', 'i2'), ('month', 'i2'),
('day', 'i2'), ('hour', 'i2'),
('minute', 'i2'),
('channel', 'i2'),
('flagOfProjection', 'i2'),
('widthOfImage', 'i2'),
('heightOfImage', 'i2'),
('scanLineNumberOfImageTopLeft', 'i2'),
('pixelNumberOfImageTopLeft', 'i2'),
('sampleRatio', 'i2'),
('latitudeOfNorth', 'i2'),
('latitudeOfSouth', 'i2'),
('longitudeOfWest', 'i2'),
('longitudeOfEast', 'i2'),
('centerLatitudeOfProjection', 'i2'),
('centerLongitudeOfProjection', 'i2'),
('standardLatitude1', 'i2'),
('standardLatitude2', 'i2'),
('horizontalResolution', 'i2'),
('verticalResolution', 'i2'),
('overlapFlagGeoGrid', 'i2'),
('overlapValueGeoGrid', 'i2'),
('dataLengthOfColorTable', 'i2'),
('dataLengthOfCalibration', 'i2'),
('dataLengthOfGeolocation', 'i2'),
('reserved', 'i2')]
head_info = np.fromstring(byteArray[0:104], dtype=head_dtype)
ind = 104
# head rest information
head_rest_len = (head_info['recordLength'][0].astype(np.int) *
head_info['headRecordNumber'][0] - ind)
head_rest = np.fromstring(
byteArray[ind:(ind + head_rest_len)],
dtype='u1', count=head_rest_len)
ind += head_rest_len
# retrieve data records
data_len = (head_info['recordLength'][0].astype(np.int) *
head_info['dataRecordNumber'][0])
data = np.fromstring(
byteArray[ind:(ind + data_len)], dtype='u1',
count=data_len)
data.shape = (head_info['recordLength'][0],
head_info['dataRecordNumber'][0])
# return
return head_info, data
else:
return None
else:
return None
def get_radar_mosaic(directory, filename=None, suffix="*.LATLON"):
"""
该程序主要用于读取和处理中国气象局CRaMS系统的雷达回波全国拼图数据.
:param directory: the data directory on the service
:param filename: the data filename, if none, will be the latest file.
:param suffix: the filename filter pattern which will be used to
find the specified file.
:return: xarray object.
:Example:
>>> data = get_radar_mosaic("RADARMOSAIC/CREF/")
"""
# connect to data service
service = GDSDataService()
# get data file name
if filename is None:
try:
status, response = service.getLatestDataName(directory, suffix)
except ValueError:
print('Can not retrieve data from ' + directory)
return None
StringResult = DataBlock_pb2.StringResult()
if status == 200:
StringResult.ParseFromString(response)
if StringResult is not None:
filename = StringResult.name
if filename == '':
return None
else:
return None
# get data contents
try:
status, response = service.getData(directory, filename)
except ValueError:
print('Can not retrieve data' + filename + ' from ' + directory)
return None
ByteArrayResult = DataBlock_pb2.ByteArrayResult()
if status == 200:
ByteArrayResult.ParseFromString(response)
if ByteArrayResult is not None:
byteArray = ByteArrayResult.byteArray
# define head structure
head_dtype = [
('description', 'S128'),
# product name, QREF=基本反射率, CREF=组合反射率,
# VIL=液态水含量, OHP=一小时降水
('name', 'S32'),
('organization', 'S16'),
('grid_flag', 'u2'), # 经纬网格数据标识固定值19532
('data_byte', 'i2'), # 数据单元字节数固定值2
('slat', 'f4'), # 数据区的南纬(度)
('wlon', 'f4'), # 数据区的西经(度)
('nlat', 'f4'), # 数据区的北纬(度)
('elon', 'f4'), # 数据区的东经(度)
('clat', 'f4'), # 数据区中心纬度(度)
('clon', 'f4'), # 数据区中心经度(度)
('rows', 'i4'), # 数据区的行数
('cols', 'i4'), # 每行数据的列数
('dlat', 'f4'), # 纬向分辨率(度)
('dlon', 'f4'), # 经向分辨率(度)
('nodata', 'f4'), # 无数据区的编码值
('levelbybtes', 'i4'), # 单层数据字节数
('levelnum', 'i2'), # 数据层个数
('amp', 'i2'), # 数值放大系数
('compmode', 'i2'), # 数据压缩存储时为1否则为0
('dates', 'u2'), # 数据观测时间为1970年1月1日以来的天数
('seconds', 'i4'), # 数据观测时间的秒数
('min_value', 'i2'), # 放大后的数据最小取值
('max_value', 'i2'), # 放大后的数据最大取值
('reserved', 'i2', 6) # 保留字节
]
# read head information
head_info = np.fromstring(byteArray[0:256], dtype=head_dtype)
ind = 256
# define data variable
rows = head_info['rows'][0]
cols = head_info['cols'][0]
dlat = head_info['dlat'][0]
dlon = head_info['dlon'][0]
data = np.full(rows*cols, -9999, dtype=np.int32)
# put data into array
while ind < len(byteArray):
irow = np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0]
ind += 2
icol = np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0]
ind += 2
if irow == -1 or icol == -1:
break
nrec = np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0]
ind += 2
recd = np.fromstring(
byteArray[ind:(ind + 2*nrec)], dtype='i2', count=nrec)
ind += 2*nrec
position = (irow-1)*cols+icol-1
data[position:(position+nrec)] = recd - 1
# reshape data
data.shape = (rows, cols)
# set longitude and latitude coordinates
lats = head_info['nlat'][0] - np.arange(rows)*dlat - dlat/2.0
lons = head_info['wlon'][0] - np.arange(cols)*dlon - dlon/2.0
# reverse latitude axis
data = np.flip(data, 0)
lats = lats[::-1]
# set time coordinates
time = datetime(1970, 1, 1) + timedelta(
days=head_info['dates'][0].astype(np.float64),
seconds=head_info['seconds'][0].astype(np.float64))
time = np.array([time], dtype='datetime64[m]')
data = np.expand_dims(data, axis=0)
# create xarray
data = xr.DataArray(
data, coords=[time, lats, lons],
dims=['time', 'latitude', 'longitude'],
name="radar_mosaic")
# return
return data
else:
return None
else:
return None
def get_tlogp(directory, filename=None, suffix="*.000"):
"""
该程序用于读取micaps服务器上TLOGP数据信息, 文件格式与MICAPS第5类格式相同.
:param directory: the data directory on the service
:param filename: the data filename, if none, will be the latest file.
:param suffix: the filename filter pattern which will be used to
find the specified file.
:return: pandas DataFrame object.
>>> data = get_tlogp("UPPER_AIR/TLOGP/")
"""
# connect to data service
service = GDSDataService()
# get data file name
if filename is None:
try:
status, response = service.getLatestDataName(directory, suffix)
except ValueError:
print('Can not retrieve data from ' + directory)
return None
StringResult = DataBlock_pb2.StringResult()
if status == 200:
StringResult.ParseFromString(response)
if StringResult is not None:
filename = StringResult.name
if filename == '':
return None
else:
return None
# get data contents
try:
status, response = service.getData(directory, filename)
except ValueError:
print('Can not retrieve data' + filename + ' from ' + directory)
return None
ByteArrayResult = DataBlock_pb2.ByteArrayResult()
if status == 200:
ByteArrayResult.ParseFromString(response)
if ByteArrayResult is not None:
byteArray = ByteArrayResult.byteArray
# decode bytes to string
txt = byteArray.decode("utf-8")
txt = list(filter(None, re.split(' |\n', txt)))
# observation date and time
if len(txt[3]) < 4:
year = int(txt[3]) + 2000
else:
year = int(txt[3])
month = int(txt[4])
day = int(txt[5])
hour = int(txt[6])
time = datetime(year, month, day, hour)
# the number of records
number = int(txt[7])
if number < 1:
return None
# cut the data
txt = txt[8:]
# put the data into dictionary
index = 0
records = []
while index < len(txt):
# get the record information
ID = txt[index].strip()
lon = float(txt[index+1])
lat = float(txt[index+2])
alt = float(txt[index+3])
number = int(int(txt[index+4])/6)
index += 5
# get the sounding records
for i in range(number):
record = {
'ID': ID, 'lon': lon, 'lat': lat, 'alt': alt,
'time': time,
'p': float(txt[index]), 'h': float(txt[index+1]),
't': float(txt[index+2]), 'td': float(txt[index+3]),
'wind_angle': float(txt[index+4]),
'wind_speed': float(txt[index+5])}
records.append(record)
index += 6
# transform to pandas data frame
records = pd.DataFrame(records)
records.set_index('ID')
# return
return records
else:
return None
else:
return None

63
setup.py Normal file

@ -0,0 +1,63 @@
# _*_ coding: utf-8 _*_
from os import path
from setuptools import find_packages, setup
from codecs import open
name = 'nmc_met_io'
author = __import__(name).__author__
version = __import__(name).__version__
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()
setup(
name=name,
version=version,
description=("Collection of tools for I/O or"
"accessing meteorological data."),
long_description=long_description,
# author
author=author,
author_email='kan.dai@foxmail.com',
# LICENSE
license='GPL3',
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'Programming Language :: Python :: 3',
],
packages=find_packages(exclude=['docs', 'tests', 'build', 'dist']),
include_package_data=True,
exclude_package_data={'': ['.gitignore']},
install_requires=['numpy>=1.12.1',
'scipy>=0.19.0',
'xarray>=0.9.6',
'pandas>=0.20.0',
'pyproj>=1.9.5.1',
'protobuf>=3.5.0',
'urllib3>=1.20',
'python-dateutil']
)
# development mode (DOS command):
# python setup.py develop
# python setup.py develop --uninstall
# build mode
# python setup.py build --build-base=D:/test/python/build
# distribution mode:
# python setup.py sdist # create source tar.gz file in /dist
# python setup.py bdist_wheel # create wheel binary in /dist