使用python将时间序列数据从netCDF文件提取到.csv文件中_Python_Pandas_Indexoutofboundsexception_Netcdf_Netcdf4

使用python将时间序列数据从netCDF文件提取到.csv文件中

python pandas

使用python将时间序列数据从netCDF文件提取到.csv文件中,python,pandas,indexoutofboundsexception,netcdf,netcdf4,Python,Pandas,Indexoutofboundsexception,Netcdf,Netcdf4,我希望在这里能找到帮助。我试图在netcdf文件中提取温度变量的时间序列，以便将数据帧写入.csv。在代码末尾，我出现了以下错误：IndexError:Index18224超出了大小为1的轴1的界限，您能帮我解决这个问题吗？下面是我的代码、注释和一些打印（）供您更好地理解我的问题。谢谢大家! import netCDF4 from netCDF4 import Dataset import numpy as np import pandas as pd data = Dataset(r

我希望在这里能找到帮助。我试图在netcdf文件中提取温度变量的时间序列，以便将数据帧写入.csv。在代码末尾，我出现了以下错误：IndexError:Index18224超出了大小为1的轴1的界限，您能帮我解决这个问题吗？下面是我的代码、注释和一些打印（）供您更好地理解我的问题。谢谢大家!

import netCDF4
from netCDF4 import Dataset
import numpy as np
import pandas as pd


data = Dataset(r'/gpfs/home/UDCPP/barrier_c/Test_NCO/temp_Corse_10m_201704.nc', 'r')

lat = data.variables['latitude'][:]
>>> print(lat)
[[41.123375  41.123375  41.123375  ... 41.123375  41.123375  41.123375 ]
 [41.1341975 41.1341975 41.1341975 ... 41.1341975 41.1341975 41.1341975]
 [41.14502   41.14502   41.14502   ... 41.14502   41.14502   41.14502  ]
 ...
 [43.26623   43.26623   43.26623   ... 43.26623   43.26623   43.26623  ]
 [43.2770525 43.2770525 43.2770525 ... 43.2770525 43.2770525 43.2770525]
 [43.287875  43.287875  43.287875  ... 43.287875  43.287875  43.287875 ]]

lon = data.variables['longitude'][:]
>>> print(lon)
[[ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
 [ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
 [ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
 ...
 [ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
 [ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
 [ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]]

##Calvi is just an example
lat_calvi =  42.57
lon_calvi =  8.75

##Squared difference of lat and lon
sq_diff_lat = (lat - lat_calvi)**2
sq_diff_lon = (lon - lon_calvi)**2

##Identifying the index of the minimum value for lat and lon
min_index_lat = sq_diff_lat.argmin()
min_index_lon = sq_diff_lon.argmin()

temp = data.variables['TEMP'][:]
>>> print(temp)
[[[[14.295403480529785 14.60593032836914 15.037308692932129 ...
    13.44691276550293 13.448591232299805 13.447751998901367]
   [14.130069732666016 14.316385269165039 14.63278579711914 ...
    13.44691276550293 13.448591232299805 13.447751998901367]
   [14.061250686645508 14.13510513305664 14.323938369750977 ...
    13.44691276550293 13.448591232299805 13.447751998901367]
   ...
##create an empty table
##starting date index 7 + the date
starting_date = data.variables['time'].units[7] + '2017-04-01'
>>> starting_date
' 2017-04-01'

#ending date index 7 + the date
ending_date = data.variables['time'].units[7] + '2017-04-30'
>>> ending_date
' 2017-04-30'

date_range = pd.date_range(start = starting_date, end = ending_date)
>>> date_range
DatetimeIndex(['2017-04-01', '2017-04-02', '2017-04-03', '2017-04-04',
               '2017-04-05', '2017-04-06', '2017-04-07', '2017-04-08',
               '2017-04-09', '2017-04-10', '2017-04-11', '2017-04-12',
               '2017-04-13', '2017-04-14', '2017-04-15', '2017-04-16',
               '2017-04-17', '2017-04-18', '2017-04-19', '2017-04-20',
               '2017-04-21', '2017-04-22', '2017-04-23', '2017-04-24',
               '2017-04-25', '2017-04-26', '2017-04-27', '2017-04-28',
               '2017-04-29', '2017-04-30'],
              dtype='datetime64[ns]', freq='D')


df = pd.DataFrame(0, columns = ['temp'], index = date_range)
>>> df
                   temp
2017-04-01            0
2017-04-02            0
2017-04-03            0
2017-04-04            0
2017-04-05            0
2017-04-06            0
2017-04-07            0
2017-04-08            0
2017-04-09            0
2017-04-10            0
2017-04-11            0
2017-04-12            0
2017-04-13            0
2017-04-14            0
2017-04-15            0
2017-04-16            0
2017-04-17            0
2017-04-18            0
2017-04-19            0
2017-04-20            0
2017-04-21            0
2017-04-22            0
2017-04-23            0
2017-04-24            0
2017-04-25            0
2017-04-26            0
2017-04-27            0
2017-04-28            0
2017-04-29            0
2017-04-30            0



dt = np.arange(0, data.variables['time'].size)

for time_index in dt:
    df.iloc[time_index] = temp[time_index,min_index_lat ,min_index_lon]
... 
Traceback (most recent call last):
  File "<stdin>", line 2, in <module>
  File "/gpfs/apps/miniconda3/lib/python3.7/site-packages/numpy/ma/core.py", line 3188, in __getitem__
    dout = self.data[indx]
IndexError: index 18224 is out of bounds for axis 1 with size 1


##save time serie into a .csv
df.to_csv('temp_test.csv')

导入netCDF4
从netCDF4导入数据集
将numpy作为np导入
作为pd进口熊猫
数据=数据集（r'/gpfs/home/UDCPP/barrier\u c/Test\u NCO/temp\u Corse\u 10m\u 201704.nc'，r'）
lat=data.variables['latitude'][：]
>>>打印（lat）
[[41.123375  41.123375  41.123375  ... 41.123375  41.123375  41.123375 ]
[41.1341975 41.1341975 41.1341975 ... 41.1341975 41.1341975 41.1341975]
[41.14502   41.14502   41.14502   ... 41.14502   41.14502   41.14502  ]
...
[43.26623   43.26623   43.26623   ... 43.26623   43.26623   43.26623  ]
[43.2770525 43.2770525 43.2770525 ... 43.2770525 43.2770525 43.2770525]
[43.287875  43.287875  43.287875  ... 43.287875  43.287875  43.287875 ]]
lon=data.variables['longitude'][：]
>>>打印（lon）
[[ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
[ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
[ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
...
[ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
[ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]
[ 8.218151   8.2326964  8.2472418 ... 10.1526892 10.1672346 10.18178  ]]
##卡尔维只是一个例子
lat_calvi=42.57
长径比=8.75
##lat和lon的平方差
sq_diff_lat=（lat-lat_calvi）**2
sq_diff_lon=（lon-lon_calvi）**2
##确定lat和lon最小值的索引
min_index_lat=sq_diff_lat.argmin（）
min_index_lon=sq_diff_lon.argmin（）
temp=数据。变量['temp'][：]
>>>打印（临时）
[[[[14.295403480529785 14.60593032836914 15.037308692932129 ...
13.44691276550293 13.448591232299805 13.447751998901367]
[14.130069732666016 14.316385269165039 14.63278579711914 ...
13.44691276550293 13.448591232299805 13.447751998901367]
[14.061250686645508 14.13510513305664 14.323938369750977 ...
13.44691276550293 13.448591232299805 13.447751998901367]
...
##创建一个空表
##开始日期索引7+日期
起始日期=数据.变量['time'].单位[7]+'2017-04-01'
>>>开始日期
' 2017-04-01'
#结束日期索引7+日期
结束日期=数据.变量['time'].单位[7]+'2017-04-30'
>>>结束日期
' 2017-04-30'
日期范围=pd.日期范围（开始=开始日期，结束=结束日期）
>>>日期范围
日期时间索引（['2017-04-01'、'2017-04-02'、'2017-04-03'、'2017-04-04'，
'2017-04-05', '2017-04-06', '2017-04-07', '2017-04-08',
'2017-04-09', '2017-04-10', '2017-04-11', '2017-04-12',
'2017-04-13', '2017-04-14', '2017-04-15', '2017-04-16',
'2017-04-17', '2017-04-18', '2017-04-19', '2017-04-20',
'2017-04-21', '2017-04-22', '2017-04-23', '2017-04-24',
'2017-04-25', '2017-04-26', '2017-04-27', '2017-04-28',
'2017-04-29', '2017-04-30'],
dtype='datetime64[ns]'，freq='D'）
df=pd.DataFrame（0，列=['temp']，索引=date\u范围）
>>>df
临时雇员
2017-04-01            0
2017-04-02            0
2017-04-03            0
2017-04-04            0
2017-04-05            0
2017-04-06            0
2017-04-07            0
2017-04-08            0
2017-04-09            0
2017-04-10            0
2017-04-11            0
2017-04-12            0
2017-04-13            0
2017-04-14            0
2017-04-15            0
2017-04-16            0
2017-04-17            0
2017-04-18            0
2017-04-19            0
2017-04-20            0
2017-04-21            0
2017-04-22            0
2017-04-23            0
2017-04-24            0
2017-04-25            0
2017-04-26            0
2017-04-27            0
2017-04-28            0
2017-04-29            0
2017-04-30            0
dt=np.arange（0，数据变量['time'].size）
对于dt中的时间指数：
df.iloc[时间索引]=临时[时间索引，最小索引，最小索引]
... 
回溯（最近一次呼叫最后一次）：
文件“”，第2行，在
文件“/gpfs/apps/miniconda3/lib/python3.7/site packages/numpy/ma/core.py”，第3188行，在__
dout=self.data[indx]
索引器：索引18224超出大小为1的轴1的界限
##将时间序列保存到.csv中
df.to_csv（'temp_test.csv'））

编辑：

下面是df.to_csv的全部输出：

>>df.to_csv（'temp_test.csv'））
回溯（最近一次呼叫最后一次）：
文件“”，第1行，在
文件“/gpfs/apps/miniconda3/lib/python3.7/site packages/pandas/core/generic.py”，第3204行，输入到csv
格式化程序。保存（）
保存中的文件“/gpfs/apps/miniconda3/lib/python3.7/site packages/pandas/io/formats/csvs.py”，第188行
压缩=dict（self.compression参数，方法=self.compression），
文件“/gpfs/apps/miniconda3/lib/python3.7/site packages/pandas/io/common.py”，第455行，在get_handle中
f=打开（路径\或\ buf，模式，编码=编码，换行=“”）
PermissionError:[Errno 13]权限被拒绝：“temp_test.csv”

这可以使用xarray完成：

import xarray as xr
import pandas as pd


data = xr.open_dataset(r'/gpfs/home/UDCPP/barrier_c/Test_NCO/temp_Corse_10m_201704.nc', 'r')
df = data.to_dataframe().reset_index()
df.to_csv('temp_test.csv')

并检查df的内容，因为这可能包括netcdf中的BND。

如果这不是全部回溯，请将其全部发布。谢谢@Robert Wilson，事实上我也尝试过xarray，它可以工作，但最后一个函数df.to_csv（“temp_test.csv”）除外。我得到以下错误：PermissionError:[Errno 13]权限被拒绝：“temp_test.csv”。我还不知道问题出在哪里。感谢您的帮助！这可能意味着temp_test.csv已被另一个应用程序打开，或者您的Python脚本已被另一个应用程序打开。谢谢您的快速回复。确实，我看到了这可能是解决方案，但在我的情况下，该文件不是由应用程序打开的

import xarray as xr
import pandas as pd


data = xr.open_dataset(r'/gpfs/home/UDCPP/barrier_c/Test_NCO/temp_Corse_10m_201704.nc', 'r')
df = data.to_dataframe().reset_index()
df.to_csv('temp_test.csv')