Python TypeError:无法为列生成TypeSpec_Python_Python 3.x_Tensorflow_Machine Learning

Python TypeError:无法为列生成TypeSpec

python python-3.x tensorflow machine-learning

Python TypeError:无法为列生成TypeSpec,python,python-3.x,tensorflow,machine-learning,Python,Python 3.x,Tensorflow,Machine Learning,我试图从以下数据集中的“名称”、“平台”、“流派”、“出版商”和“年份”值预测全球销售额：这是我培训模型的代码： from __future__ import absolute_import, division, print_function, unicode_literals import numpy as np import pandas as pd import matplotlib.pyplot as plt from IPython.display import clear_out

我试图从以下数据集中的“名称”、“平台”、“流派”、“出版商”和“年份”值预测全球销售额：

这是我培训模型的代码：

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow as tf

dftrain = pd.read_csv('./vgsales_eval.csv')
dfeval = pd.read_csv('./vgsales_train.csv')

print(dftrain[dftrain.isnull().any(axis=1)])

y_train = dftrain.pop('Global_Sales')
y_eval = dfeval.pop('Global_Sales')

CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))

print(feature_columns)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds
  return input_function  

train_input_fn = make_input_fn(dftrain, y_train)  
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)

我得到以下错误：

Traceback (most recent call last):
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 93, in normalize_element
    spec = type_spec_from_value(t, use_fallback=False)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 466, in type_spec_from_value
    (element, type(element).__name__))
TypeError: Could not build a TypeSpec for 0                 Tecmo Koei
1       Nippon Ichi Software
2                    Ubisoft
3                 Activision
4                      Atari
                ...
6594                   Kemco
6595              Infogrames
6596              Activision
6597                7G//AMES
6598                 Wanadoo
Name: Publisher, Length: 6599, dtype: object with type Series

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 45, in <module>
    linear_est.train(train_input_fn)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 349, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1175, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1201, in _train_model_default
    self._get_features_and_labels_from_input_fn(input_fn, ModeKeys.TRAIN))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1037, in _get_features_and_labels_from_input_fn
    self._call_input_fn(input_fn, mode))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1130, in _call_input_fn
    return input_fn(**kwargs)
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 34, in input_function
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 682, in from_tensor_slices
    return TensorSliceDataset(tensors)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 3001, in __init__
    element = structure.normalize_element(element)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 98, in normalize_element
    ops.convert_to_tensor(t, name="component_%d" % i))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1499, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 338, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
    allow_broadcast=True)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 282, in _constant_impl
    allow_broadcast=allow_broadcast))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 563, in make_tensor_proto
    append_fn(tensor_proto, proto_values)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in SlowAppendObjectArrayToTensorProto
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in <listcomp>
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\compat.py", line 87, in as_bytes
    (bytes_or_text,))
TypeError: Expected binary or unicode string, got nan

回溯（最近一次呼叫最后一次）：
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\util\structure.py”，第93行，在normalize\u元素中
spec=类型\u spec\u from\u value（t，使用\u fallback=False）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\util\structure.py”，第466行，类型为\u spec\u from\u value
（元素，类型（元素）。_名称_;））
TypeError:无法为0 Tecmo Koei生成TypeSpec
1日本一软件公司
2育碧软件
3动视
4雅达利
...
6594凯姆科
6595份资讯报
6596活动
6597 7G//AMES
6598瓦纳多
名称：Publisher，长度：6599，数据类型：类型为Series的对象
在处理上述异常期间，发生了另一个异常：
回溯（最近一次呼叫最后一次）：
文件“c:\Users\kuhn-\Documents\Github\Tensorflow\u Test\videogamessales\u Test\main.py”，第45行，在
线性测试序列（序列输入）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”，第349行，列车中
损失=自我训练模型（输入、挂钩、保存侦听器）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”，第1175行，列车模型
返回self.\u train\u model\u default（输入\u fn、挂钩、保存\u侦听器）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”，第1201行，默认为列车模型
self.\u从\u input\u fn（input\u fn，ModeKeys.TRAIN）获取\u功能\u和\u标签\u
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator\estimator.py”，第1037行，位于从输入获取特征和标签中
自。\调用\输入\ fn（输入\ fn，模式））
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”，第1130行，输入
返回输入_fn（**kwargs）
文件“c:\Users\kuhn-\Documents\Github\Tensorflow\u Test\videogamessales\u Test\main.py”，第34行，在输入函数中
ds=tf.data.Dataset.from_tensor_切片（（dict（data_df），label_df））
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\ops\dataset\u ops.py”，第682行，在from\u tensor\u切片中
返回张量数据集（张量）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\ops\dataset\u ops.py”，第3001行，在\uuu init中__
元素=结构。规范化元素（元素）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\util\structure.py”，第98行，在normalize\u元素中
运算。将\u转换为\u张量（t，name=“组件%d”%i））
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\ops.py”，第1499行，在convert\u-tensor中
ret=conversion\u func（值，dtype=dtype，name=name，as\u ref=as\u ref）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\constant\u op.py”，第338行，在常量张量转换函数中
返回常量（v，dtype=dtype，name=name）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\constant\u op.py”，第264行，常量
允许（广播=真）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\constant\u op.py”，第282行，在常量\u impl中
允许广播=允许广播）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\tensor\u util.py”，第563行，在make\u tensor\u proto中
附加_fn（张量_原型，原型值）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\tensor\u util.py”，第155行，位于SlowAppendObjectArrayToTensorProto中
tensor_proto.string_val.extend（[compat.as_bytes（x）表示proto_值中的x]）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\tensor\u util.py”，第155行，在
tensor_proto.string_val.extend（[compat.as_bytes（x）表示proto_值中的x]）
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\util\compat.py”，第87行，以字节为单位
（字节或文本）
TypeError:应为二进制或unicode字符串，未找到nan

我做错了什么？这是数据集的问题还是我必须以不同的方式读取值？

这基本上与您获取的数据中存在的

null

值有关，您需要在加载数据时对其进行处理

我做了一些改变

要删除空值的记录，还可以根据列和需要填充的值（考虑数据类型）执行

df.fillna

我已将列

Year

数据类型从

float

更改为

int

。因为这会导致张量切片的另一个问题

下面是修改后的代码，与您获取的数据相同

df = pd.read_csv('/content/vgsales.csv')
# print(df.head())
print(df[df.isnull().any(axis=1)])
# df.fillna('', inplace=True)
df.dropna(how="any",inplace = True)
df.Year = df.Year.astype(int) 

CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year'] 

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = df[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))

print(feature_columns)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds
  return input_function  

train_input_fn = make_input_fn(df, y_train)  
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)

您确定要在Tensorflow 2.x中使用

估计器吗？Tensorflow 2.x的大部分功能都在Keras中（tf.Keras
）？