Python 熊猫读取sas错误:';ascii';编解码器可以';t解码位置0处的字节0xd8:序号不在范围内(128)
我正在使用Pandas 0.18打开一个Python 熊猫读取sas错误:';ascii';编解码器可以';t解码位置0处的字节0xd8:序号不在范围内(128),python,pandas,sas,Python,Pandas,Sas,我正在使用Pandas 0.18打开一个sas7bdat数据集 我只是使用: df=pd.read_sas(P:/myfile.sas7bdat) 我得到以下错误 buf[0:text_block_size].rstrip(b"\x00 ").decode()) UnicodeDecodeError: 'ascii' codec can't decode byte 0xd8 in position 0: ordinal not in range(128) 如果我使用 import
sas7bdat
数据集
我只是使用:
df=pd.read_sas(P:/myfile.sas7bdat)
我得到以下错误
buf[0:text_block_size].rstrip(b"\x00 ").decode())
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd8 in position 0: ordinal not in range(128)
如果我使用
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
我明白了
Pandas可以很好地处理我文件夹中的其他sas7bdat
文件
当我在SAS中打开该文件时,我看到列名非常长,跨越了几行,但在其他方面,文件看起来很好
read_sas
中没有太多可能的选项。。。怎么办?我
非常感谢 您可能必须将编码设置为UTF-8。类似于这样的情况(根据报告):
我也有同样的问题 问题是我有
encoding='utf-8'
我仍然得到以下错误:
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-20-5deb45266124> in <module>
----> 1 df = pd.read_sas("/workspace/em_data1/dev/sas_data/bureau/data_validation/dnb/freq_202008/_freq_2138_201503_202009.sas7bdat",encoding='utf-8')
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sasreader.py in read_sas(filepath_or_buffer, format, index, encoding, chunksize, iterator)
121
122 reader = SAS7BDATReader(
--> 123 filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
124 )
125 else:
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in __init__(self, path_or_buf, index, convert_dates, blank_missing, chunksize, encoding, convert_text, convert_header_text)
144
145 self._get_properties()
--> 146 self._parse_metadata()
147
148 def column_data_lengths(self):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _parse_metadata(self)
349 self.close()
350 raise ValueError("Failed to read a meta data page from the SAS file.")
--> 351 done = self._process_page_meta()
352
353 def _process_page_meta(self):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_page_meta(self)
355 pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
356 if self._current_page_type in pt:
--> 357 self._process_page_metadata()
358 is_data_page = self._current_page_type & const.page_data_type
359 is_mix_page = self._current_page_type in const.page_mix_types
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_page_metadata(self)
390 subheader_signature, pointer.compression, pointer.ptype
391 )
--> 392 self._process_subheader(subheader_index, pointer)
393
394 def _get_subheader_index(self, signature, compression, ptype):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_subheader(self, subheader_index, pointer)
458 raise ValueError("unknown subheader index")
459
--> 460 processor(offset, length)
461
462 def _process_rowsize_subheader(self, offset, length):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_columntext_subheader(self, offset, length)
512 cname = cname_raw
513 if self.convert_header_text:
--> 514 cname = cname.decode(self.encoding or self.default_encoding)
515 self.column_names_strings.append(cname)
516
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 0: invalid continuation byte
@你能发布一个数据集的样本吗?不幸的是,不是真的。。但是SAS的第一行在我看来很好。这个错误意味着什么?数据中有一个奇怪的字符?这意味着数据的编码不是ascii,可能尝试不同的编码?我正试图找到确切的答案,但github现在关闭了,所以可能几分钟后…这应该是正确的解决方案,但只适用于以后版本的Pandas。
df=pd.read_sas(P:/myfile.sas7bdat, encoding='utf-8')
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-20-5deb45266124> in <module>
----> 1 df = pd.read_sas("/workspace/em_data1/dev/sas_data/bureau/data_validation/dnb/freq_202008/_freq_2138_201503_202009.sas7bdat",encoding='utf-8')
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sasreader.py in read_sas(filepath_or_buffer, format, index, encoding, chunksize, iterator)
121
122 reader = SAS7BDATReader(
--> 123 filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
124 )
125 else:
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in __init__(self, path_or_buf, index, convert_dates, blank_missing, chunksize, encoding, convert_text, convert_header_text)
144
145 self._get_properties()
--> 146 self._parse_metadata()
147
148 def column_data_lengths(self):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _parse_metadata(self)
349 self.close()
350 raise ValueError("Failed to read a meta data page from the SAS file.")
--> 351 done = self._process_page_meta()
352
353 def _process_page_meta(self):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_page_meta(self)
355 pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
356 if self._current_page_type in pt:
--> 357 self._process_page_metadata()
358 is_data_page = self._current_page_type & const.page_data_type
359 is_mix_page = self._current_page_type in const.page_mix_types
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_page_metadata(self)
390 subheader_signature, pointer.compression, pointer.ptype
391 )
--> 392 self._process_subheader(subheader_index, pointer)
393
394 def _get_subheader_index(self, signature, compression, ptype):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_subheader(self, subheader_index, pointer)
458 raise ValueError("unknown subheader index")
459
--> 460 processor(offset, length)
461
462 def _process_rowsize_subheader(self, offset, length):
/opt/Anaconda/2018.12/lib/python3.7/site-packages/pandas/io/sas/sas7bdat.py in _process_columntext_subheader(self, offset, length)
512 cname = cname_raw
513 if self.convert_header_text:
--> 514 cname = cname.decode(self.encoding or self.default_encoding)
515 self.column_names_strings.append(cname)
516
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 0: invalid continuation byte
echo $LANG
en_US.UTF-8