Matplotlib 无法在Databrick上运行Pandas分析

Matplotlib 无法在Databrick上运行Pandas分析,matplotlib,databricks,pandas-profiling,Matplotlib,Databricks,Pandas Profiling,我正试图在Databricks环境的样本数据帧上运行Pandas评测。获取与marplotlib相关的错误,不确定此问题是否与Matplotlib或pandas分析相关。任何帮助都将不胜感激 Databricks运行时配置: 7.4毫升(包括Apache Spark 3.0.1、Scala 2.12) 这样安装 !pip安装[笔记本] 代码 import numpy as np import pandas as pd from pandas_profiling import ProfileRep

我正试图在Databricks环境的样本数据帧上运行Pandas评测。获取与marplotlib相关的错误,不确定此问题是否与Matplotlib或pandas分析相关。任何帮助都将不胜感激

Databricks运行时配置: 7.4毫升(包括Apache Spark 3.0.1、Scala 2.12)

这样安装
!pip安装[笔记本]

代码

import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport
profile = ProfileReport(df, title='EDA Report', explorative=True)
profile.to_file("/dbfs/mnt/sb2/EDA_Reports/EDA.html")
Summarize dataset:  93%|█████████▎| 106/114 [11:30<07:27, 55.91s/it, Calculate cramers correlation]/databricks/python/lib/python3.7/site-packages/pandas_profiling/model/correlations.py:139: UserWarning: There was an attempt to calculate the cramers correlation, but this failed.
To hide this warning, disable the calculation
(using `df.profile_report(correlations={"cramers": {"calculate": False}})`
If this is problematic for your use case, please report this as an issue:
https://github.com/pandas-profiling/pandas-profiling/issues
(include the error message: 'No data; `observed` has size 0.')
  (include the error message: '{error}')"""
Summarize dataset:  94%|█████████▍| 107/114 [11:56<00:46,  6.69s/it, Get scatter matrix]           
RuntimeError: "/databricks/python/lib/python3.7/site-packages/matplotlib/mpl-data" should be a path but it does not exist
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/context.py in manage_matplotlib_context()
     79         sns.set_style(style="white")
---> 80         yield
     81     finally:

/databricks/python/lib/python3.7/contextlib.py in inner(*args, **kwds)
     73             with self._recreate_cm():
---> 74                 return func(*args, **kwds)
     75         return inner

/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/plot.py in scatter_pairwise(series1, series2, x_label, y_label)
    276         plt.scatter(series1, series2, color=color)
--> 277     return plot_360_n0sc0pe(plt)
    278 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/utils.py in plot_360_n0sc0pe(plt, image_format, attempts)
     67                 image_str = StringIO()
---> 68                 plt.savefig(image_str, format=image_format)
     69                 image_str.seek(0)

/databricks/python/lib/python3.7/site-packages/matplotlib/pyplot.py in savefig(*args, **kwargs)

/databricks/python/lib/python3.7/site-packages/matplotlib/figure.py in savefig(self, fname, transparent, **kwargs)

/databricks/python/lib/python3.7/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, **kwargs)

/databricks/python/lib/python3.7/site-packages/matplotlib/backend_bases.py in _get_output_canvas(self, fmt)

/databricks/python/lib/python3.7/site-packages/matplotlib/backend_bases.py in get_registered_canvas_class(format)

/databricks/python/lib/python3.7/importlib/__init__.py in import_module(name, package)
    126             level += 1
--> 127     return _bootstrap._gcd_import(name[level:], package, level)
    128 

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _gcd_import(name, package, level)

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _find_and_load(name, import_)

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _find_spec(name, path, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in find_spec(cls, fullname, path, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in _get_spec(cls, fullname, path, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in find_spec(self, fullname, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in _fill_cache(self)

OSError: [Errno 116] Stale file handle: '/databricks/python/lib/python3.7/site-packages/matplotlib/backends'

During handling of the above exception, another exception occurred:

RuntimeError                              Traceback (most recent call last)
<command-3404575914441933> in <module>
      1 profile = ProfileReport(df, title='EDA Report', explorative=True)
----> 2 profile.to_file("/dbfs/mnt/sb2/naga/dataset/EDA_Reports/Digital_HO_New_Features_EDA.html")

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in to_file(self, output_file, silent)
    272                 create_html_assets(output_file)
    273 
--> 274             data = self.to_html()
    275 
    276             if output_file.suffix != ".html":

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in to_html(self)
    376 
    377         """
--> 378         return self.html
    379 
    380     def to_json(self) -> str:

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in html(self)
    195     def html(self):
    196         if self._html is None:
--> 197             self._html = self._render_html()
    198         return self._html
    199 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in _render_html(self)
    302         from pandas_profiling.report.presentation.flavours import HTMLReport
    303 
--> 304         report = self.report
    305 
    306         disable_progress_bar = not config["progress_bar"].get(bool)

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in report(self)
    189     def report(self):
    190         if self._report is None:
--> 191             self._report = get_report_structure(self.description_set)
    192         return self._report
    193 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in description_set(self)
    169         if self._description_set is None:
    170             self._description_set = describe_df(
--> 171                 self.title, self.df, self.summarizer, self.typeset, self._sample
    172             )
    173         return self._description_set

/databricks/python/lib/python3.7/site-packages/pandas_profiling/model/describe.py in describe(title, df, summarizer, typeset, sample)
    105         # Scatter matrix
    106         pbar.set_postfix_str("Get scatter matrix")
--> 107         scatter_matrix = get_scatter_matrix(df, interval_columns)
    108         pbar.update()
    109 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/model/summary.py in get_scatter_matrix(df, continuous_variables)
    283                     df_temp = df[[x, y]].dropna()
    284                     scatter_matrix[x][y] = scatter_pairwise(
--> 285                         df_temp[x], df_temp[y], x, y
    286                     )
    287     else:

/databricks/python/lib/python3.7/contextlib.py in inner(*args, **kwds)
     72         def inner(*args, **kwds):
     73             with self._recreate_cm():
---> 74                 return func(*args, **kwds)
     75         return inner
     76 

/databricks/python/lib/python3.7/contextlib.py in __exit__(self, type, value, traceback)
    128                 value = type()
    129             try:
--> 130                 self.gen.throw(type, value, traceback)
    131             except StopIteration as exc:
    132                 # Suppress StopIteration *unless* it's the same exception that

/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/context.py in manage_matplotlib_context()
     83         with warnings.catch_warnings():
     84             warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
---> 85             matplotlib.rcParams.update(originalRcParams)  # revert to original rcParams

/databricks/python/lib/python3.7/_collections_abc.py in update(*args, **kwds)
    839             if isinstance(other, Mapping):
    840                 for key in other:
--> 841                     self[key] = other[key]
    842             elif hasattr(other, "keys"):
    843                 for key in other.keys():

/databricks/python/lib/python3.7/site-packages/matplotlib/__init__.py in __setitem__(self, key, val)

/databricks/python/lib/python3.7/site-packages/matplotlib/rcsetup.py in validate_path_exists(s)

RuntimeError: "/databricks/python/lib/python3.7/site-packages/matplotlib/mpl-data" should be a path but it does not exist
错误日志跟踪

import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport
profile = ProfileReport(df, title='EDA Report', explorative=True)
profile.to_file("/dbfs/mnt/sb2/EDA_Reports/EDA.html")
Summarize dataset:  93%|█████████▎| 106/114 [11:30<07:27, 55.91s/it, Calculate cramers correlation]/databricks/python/lib/python3.7/site-packages/pandas_profiling/model/correlations.py:139: UserWarning: There was an attempt to calculate the cramers correlation, but this failed.
To hide this warning, disable the calculation
(using `df.profile_report(correlations={"cramers": {"calculate": False}})`
If this is problematic for your use case, please report this as an issue:
https://github.com/pandas-profiling/pandas-profiling/issues
(include the error message: 'No data; `observed` has size 0.')
  (include the error message: '{error}')"""
Summarize dataset:  94%|█████████▍| 107/114 [11:56<00:46,  6.69s/it, Get scatter matrix]           
RuntimeError: "/databricks/python/lib/python3.7/site-packages/matplotlib/mpl-data" should be a path but it does not exist
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/context.py in manage_matplotlib_context()
     79         sns.set_style(style="white")
---> 80         yield
     81     finally:

/databricks/python/lib/python3.7/contextlib.py in inner(*args, **kwds)
     73             with self._recreate_cm():
---> 74                 return func(*args, **kwds)
     75         return inner

/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/plot.py in scatter_pairwise(series1, series2, x_label, y_label)
    276         plt.scatter(series1, series2, color=color)
--> 277     return plot_360_n0sc0pe(plt)
    278 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/utils.py in plot_360_n0sc0pe(plt, image_format, attempts)
     67                 image_str = StringIO()
---> 68                 plt.savefig(image_str, format=image_format)
     69                 image_str.seek(0)

/databricks/python/lib/python3.7/site-packages/matplotlib/pyplot.py in savefig(*args, **kwargs)

/databricks/python/lib/python3.7/site-packages/matplotlib/figure.py in savefig(self, fname, transparent, **kwargs)

/databricks/python/lib/python3.7/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, **kwargs)

/databricks/python/lib/python3.7/site-packages/matplotlib/backend_bases.py in _get_output_canvas(self, fmt)

/databricks/python/lib/python3.7/site-packages/matplotlib/backend_bases.py in get_registered_canvas_class(format)

/databricks/python/lib/python3.7/importlib/__init__.py in import_module(name, package)
    126             level += 1
--> 127     return _bootstrap._gcd_import(name[level:], package, level)
    128 

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _gcd_import(name, package, level)

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _find_and_load(name, import_)

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)

/databricks/python/lib/python3.7/importlib/_bootstrap.py in _find_spec(name, path, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in find_spec(cls, fullname, path, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in _get_spec(cls, fullname, path, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in find_spec(self, fullname, target)

/databricks/python/lib/python3.7/importlib/_bootstrap_external.py in _fill_cache(self)

OSError: [Errno 116] Stale file handle: '/databricks/python/lib/python3.7/site-packages/matplotlib/backends'

During handling of the above exception, another exception occurred:

RuntimeError                              Traceback (most recent call last)
<command-3404575914441933> in <module>
      1 profile = ProfileReport(df, title='EDA Report', explorative=True)
----> 2 profile.to_file("/dbfs/mnt/sb2/naga/dataset/EDA_Reports/Digital_HO_New_Features_EDA.html")

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in to_file(self, output_file, silent)
    272                 create_html_assets(output_file)
    273 
--> 274             data = self.to_html()
    275 
    276             if output_file.suffix != ".html":

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in to_html(self)
    376 
    377         """
--> 378         return self.html
    379 
    380     def to_json(self) -> str:

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in html(self)
    195     def html(self):
    196         if self._html is None:
--> 197             self._html = self._render_html()
    198         return self._html
    199 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in _render_html(self)
    302         from pandas_profiling.report.presentation.flavours import HTMLReport
    303 
--> 304         report = self.report
    305 
    306         disable_progress_bar = not config["progress_bar"].get(bool)

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in report(self)
    189     def report(self):
    190         if self._report is None:
--> 191             self._report = get_report_structure(self.description_set)
    192         return self._report
    193 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/profile_report.py in description_set(self)
    169         if self._description_set is None:
    170             self._description_set = describe_df(
--> 171                 self.title, self.df, self.summarizer, self.typeset, self._sample
    172             )
    173         return self._description_set

/databricks/python/lib/python3.7/site-packages/pandas_profiling/model/describe.py in describe(title, df, summarizer, typeset, sample)
    105         # Scatter matrix
    106         pbar.set_postfix_str("Get scatter matrix")
--> 107         scatter_matrix = get_scatter_matrix(df, interval_columns)
    108         pbar.update()
    109 

/databricks/python/lib/python3.7/site-packages/pandas_profiling/model/summary.py in get_scatter_matrix(df, continuous_variables)
    283                     df_temp = df[[x, y]].dropna()
    284                     scatter_matrix[x][y] = scatter_pairwise(
--> 285                         df_temp[x], df_temp[y], x, y
    286                     )
    287     else:

/databricks/python/lib/python3.7/contextlib.py in inner(*args, **kwds)
     72         def inner(*args, **kwds):
     73             with self._recreate_cm():
---> 74                 return func(*args, **kwds)
     75         return inner
     76 

/databricks/python/lib/python3.7/contextlib.py in __exit__(self, type, value, traceback)
    128                 value = type()
    129             try:
--> 130                 self.gen.throw(type, value, traceback)
    131             except StopIteration as exc:
    132                 # Suppress StopIteration *unless* it's the same exception that

/databricks/python/lib/python3.7/site-packages/pandas_profiling/visualisation/context.py in manage_matplotlib_context()
     83         with warnings.catch_warnings():
     84             warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
---> 85             matplotlib.rcParams.update(originalRcParams)  # revert to original rcParams

/databricks/python/lib/python3.7/_collections_abc.py in update(*args, **kwds)
    839             if isinstance(other, Mapping):
    840                 for key in other:
--> 841                     self[key] = other[key]
    842             elif hasattr(other, "keys"):
    843                 for key in other.keys():

/databricks/python/lib/python3.7/site-packages/matplotlib/__init__.py in __setitem__(self, key, val)

/databricks/python/lib/python3.7/site-packages/matplotlib/rcsetup.py in validate_path_exists(s)

RuntimeError: "/databricks/python/lib/python3.7/site-packages/matplotlib/mpl-data" should be a path but it does not exist
汇总数据集:93%|█████████▎| 106/114[11:30 378 return self.html
379
380 def to_json(self)->str:
/html格式的databricks/python/lib/python3.7/site-packages/pandas\u profiling/profile\u report.py(self)
195 def html(自):
196如果self.\u html为无:
-->197 self.\u html=self.\u render\u html()
198返回自我。\u html
199
/databricks/python/lib/python3.7/site-packages/pandas\u profiling/profile\u report.py in\u render\u html(self)
302从pandas_profiling.report.presentation.flavors导入HTMLReport
303
-->304报告=自我报告
305
306禁用进度条=不配置[“进度条”]。获取(bool)
/databricks/python/lib/python3.7/site-packages/pandas\u profiling/profile\u report.py in report(self)
189 def报告(自我):
190如果自我报告为无:
-->191 self.\u report=获取报告结构(self.description\u set)
192返回自我报告
193
/描述集中的databricks/python/lib/python3.7/site-packages/pandas\u profiling/profile\u report.py(self)
169如果自我描述集为无:
170自我描述集=描述(
-->171 self.title,self.df,self.summarier,self.typeset,self.\u示例
172             )
173返回自我描述集合
/描述中的databricks/python/lib/python3.7/site-packages/pandas_profiling/model/descripe.py(标题、df、摘要、排版、示例)
105#散射矩阵
106 pbar.set_postfix_str(“获取散布矩阵”)
-->107散布矩阵=获取散布矩阵(df,间隔列)
108 pbar.update()
109
/获取散布矩阵(df,连续变量)中的databricks/python/lib/python3.7/site-packages/pandas\u profiling/model/summary.py
283 df_temp=df[[x,y]].dropna()
284散布矩阵[x][y]=成对散布(
-->285测向温度[x],测向温度[y],x,y
286                     )
287其他:
/内部的databricks/python/lib/python3.7/contextlib.py(*args,**kwds)
72 def内部(*参数,**kwds):
73与self.__cm():
--->74返回函数(*args,**kwds)
75返回内部
76
/databricks/python/lib/python3.7/contextlib.py in____退出__(self、type、value、traceback)
128值=类型()
129尝试:
-->130自生成抛出(类型、值、回溯)
131除作为exc的停止迭代外:
132#抑制StopIteration*除非*与
/manage_matplotlib_context()中的databricks/python/lib/python3.7/site-packages/pandas_profiling/visualization/context.py
83带有警告。捕获警告()
84警告。过滤器警告(“忽略”,category=matplotlib.cbook.mplDeprecation)
--->85 matplotlib.rcParams.update(原始rcParams)#还原为原始rcParams
/更新中的databricks/python/lib/python3.7//\u collections\u abc.py(*args,**kwds)
839如果存在(其他,映射):
840用于输入其他信息:
-->841自[键]=其他[键]
842 elif hasattr(其他“钥匙”):
843对于其他.keys()中的键:
/databricks/python/lib/python3.7/site packages/matplotlib/__init__u;.py in___设置项(self、key、val)
/验证路径中的databricks/python/lib/python3.7/site-packages/matplotlib/rcsetup.py存在
运行时错误:“/databricks/python/lib/python3.7/site packages/matplotlib/mpl data”应该是路径,但它不存在

以下代码应在databricks中工作:

pip install pandas-profiling

# importing packages
import pandas as pd
import pandas_profiling

from pandas_profiling import ProfileReport
  
  
 # dictionary of data
 dct = {'ID': {0: 23, 1: 43, 2: 12, 3: 13, 
          4: 67, 5: 89, 6: 90, 7: 56, 
          8: 34}, 
   'Name': {0: 'Ram', 1: 'Deep', 2: 'Yash',
            3: 'Aman', 4: 'Arjun', 5: 'Aditya',
            6: 'Divya', 7: 'Chalsea',
            8: 'Akash' }, 
   'Marks': {0: 89, 1: 97, 2: 45, 3: 78,
             4: 56, 5: 76, 6: 100, 7: 87,
             8: 81}, 
   'Grade': {0: 'B', 1: 'A', 2: 'F', 3: 'C',
             4: 'E', 5: 'C', 6: 'A', 7: 'B',
             8: 'B'}
  }
  

# forming dataframe and printing
data = pd.DataFrame(dct)
print(data)
  
# forming ProfileReport and save
# as output.html file
profile = ProfileReport(data)
#profile.to_file("/dbfs/tmp/output.html")

#text_raw = profile.to_html()
p = profile.to_html()
displayHTML(p)

有什么进展吗?我也有同样的错误,我试图修复它,但没有答案,但您可能想尝试考拉并使用databricks display()。您是否使用社区版?