Python TypeError:无法为列生成TypeSpec

Python TypeError:无法为列生成TypeSpec,python,python-3.x,tensorflow,machine-learning,Python,Python 3.x,Tensorflow,Machine Learning,我试图从以下数据集中的“名称”、“平台”、“流派”、“出版商”和“年份”值预测全球销售额: 这是我培训模型的代码: from __future__ import absolute_import, division, print_function, unicode_literals import numpy as np import pandas as pd import matplotlib.pyplot as plt from IPython.display import clear_out

我试图从以下数据集中的“名称”、“平台”、“流派”、“出版商”和“年份”值预测全球销售额:

这是我培训模型的代码:

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow as tf

dftrain = pd.read_csv('./vgsales_eval.csv')
dfeval = pd.read_csv('./vgsales_train.csv')

print(dftrain[dftrain.isnull().any(axis=1)])

y_train = dftrain.pop('Global_Sales')
y_eval = dfeval.pop('Global_Sales')

CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
NUMERIC_COLUMNS = ['Year']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))

print(feature_columns)

def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
    if shuffle:
      ds = ds.shuffle(1000)  
    ds = ds.batch(batch_size).repeat(num_epochs)  
    return ds
  return input_function  

train_input_fn = make_input_fn(dftrain, y_train)  
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
我得到以下错误:

Traceback (most recent call last):
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 93, in normalize_element
    spec = type_spec_from_value(t, use_fallback=False)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 466, in type_spec_from_value
    (element, type(element).__name__))
TypeError: Could not build a TypeSpec for 0                 Tecmo Koei
1       Nippon Ichi Software
2                    Ubisoft
3                 Activision
4                      Atari
                ...
6594                   Kemco
6595              Infogrames
6596              Activision
6597                7G//AMES
6598                 Wanadoo
Name: Publisher, Length: 6599, dtype: object with type Series

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 45, in <module>
    linear_est.train(train_input_fn)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 349, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1175, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1201, in _train_model_default
    self._get_features_and_labels_from_input_fn(input_fn, ModeKeys.TRAIN))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1037, in _get_features_and_labels_from_input_fn
    self._call_input_fn(input_fn, mode))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1130, in _call_input_fn
    return input_fn(**kwargs)
  File "c:\Users\kuhn-\Documents\Github\Tensorflow_Test\VideoGameSales_Test\main.py", line 34, in input_function
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 682, in from_tensor_slices
    return TensorSliceDataset(tensors)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 3001, in __init__
    element = structure.normalize_element(element)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\util\structure.py", line 98, in normalize_element
    ops.convert_to_tensor(t, name="component_%d" % i))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1499, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 338, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
    allow_broadcast=True)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\constant_op.py", line 282, in _constant_impl
    allow_broadcast=allow_broadcast))
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 563, in make_tensor_proto
    append_fn(tensor_proto, proto_values)
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in SlowAppendObjectArrayToTensorProto
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 155, in <listcomp>
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\compat.py", line 87, in as_bytes
    (bytes_or_text,))
TypeError: Expected binary or unicode string, got nan
回溯(最近一次呼叫最后一次):
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\util\structure.py”,第93行,在normalize\u元素中
spec=类型\u spec\u from\u value(t,使用\u fallback=False)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\util\structure.py”,第466行,类型为\u spec\u from\u value
(元素,类型(元素)。_名称_;))
TypeError:无法为0 Tecmo Koei生成TypeSpec
1日本一软件公司
2育碧软件
3动视
4雅达利
...
6594凯姆科
6595份资讯报
6596活动
6597 7G//AMES
6598瓦纳多
名称:Publisher,长度:6599,数据类型:类型为Series的对象
在处理上述异常期间,发生了另一个异常:
回溯(最近一次呼叫最后一次):
文件“c:\Users\kuhn-\Documents\Github\Tensorflow\u Test\videogamessales\u Test\main.py”,第45行,在
线性测试序列(序列输入)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”,第349行,列车中
损失=自我训练模型(输入、挂钩、保存侦听器)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”,第1175行,列车模型
返回self.\u train\u model\u default(输入\u fn、挂钩、保存\u侦听器)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”,第1201行,默认为列车模型
self.\u从\u input\u fn(input\u fn,ModeKeys.TRAIN)获取\u功能\u和\u标签\u
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator\estimator.py”,第1037行,位于从输入获取特征和标签中
自。\调用\输入\ fn(输入\ fn,模式))
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\u estimator\Python\estimator\estimator.py”,第1130行,输入
返回输入_fn(**kwargs)
文件“c:\Users\kuhn-\Documents\Github\Tensorflow\u Test\videogamessales\u Test\main.py”,第34行,在输入函数中
ds=tf.data.Dataset.from_tensor_切片((dict(data_df),label_df))
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\ops\dataset\u ops.py”,第682行,在from\u tensor\u切片中
返回张量数据集(张量)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\ops\dataset\u ops.py”,第3001行,在\uuu init中__
元素=结构。规范化元素(元素)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\data\util\structure.py”,第98行,在normalize\u元素中
运算。将\u转换为\u张量(t,name=“组件%d”%i))
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\ops.py”,第1499行,在convert\u-tensor中
ret=conversion\u func(值,dtype=dtype,name=name,as\u ref=as\u ref)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\constant\u op.py”,第338行,在常量张量转换函数中
返回常量(v,dtype=dtype,name=name)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\constant\u op.py”,第264行,常量
允许(广播=真)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\constant\u op.py”,第282行,在常量\u impl中
允许广播=允许广播)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\tensor\u util.py”,第563行,在make\u tensor\u proto中
附加_fn(张量_原型,原型值)
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\tensor\u util.py”,第155行,位于SlowAppendObjectArrayToTensorProto中
tensor_proto.string_val.extend([compat.as_bytes(x)表示proto_值中的x])
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\framework\tensor\u util.py”,第155行,在
tensor_proto.string_val.extend([compat.as_bytes(x)表示proto_值中的x])
文件“C:\Users\kuhn-\AppData\Local\Programs\Python\Python36\lib\site packages\tensorflow\Python\util\compat.py”,第87行,以字节为单位
(字节或文本)
TypeError:应为二进制或unicode字符串,未找到nan

我做错了什么?这是数据集的问题还是我必须以不同的方式读取值?

这基本上与您获取的数据中存在的
null
值有关,您需要在加载数据时对其进行处理

我做了一些改变

  • 要删除空值的记录,还可以根据列和需要填充的值(考虑数据类型)执行
    df.fillna
  • 我已将列
    Year
    数据类型从
    float
    更改为
    int
    。因为这会导致张量切片的另一个问题
  • 下面是修改后的代码,与您获取的数据相同

    df = pd.read_csv('/content/vgsales.csv')
    # print(df.head())
    print(df[df.isnull().any(axis=1)])
    # df.fillna('', inplace=True)
    df.dropna(how="any",inplace = True)
    df.Year = df.Year.astype(int) 
    
    CATEGORICAL_COLUMNS = ['Name', 'Platform', 'Genre', 'Publisher']
    NUMERIC_COLUMNS = ['Year'] 
    
    feature_columns = []
    for feature_name in CATEGORICAL_COLUMNS:
      vocabulary = df[feature_name].unique()  # gets a list of all unique values from given feature column
      feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))
    
    for feature_name in NUMERIC_COLUMNS:
      feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int64))
    
    print(feature_columns)
    
    def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
      def input_function():  
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  
        if shuffle:
          ds = ds.shuffle(1000)  
        ds = ds.batch(batch_size).repeat(num_epochs)  
        return ds
      return input_function  
    
    train_input_fn = make_input_fn(df, y_train)  
    linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
    

    您确定要在Tensorflow 2.x中使用
    估计器吗?Tensorflow 2.x的大部分功能都在
    Keras中(
    tf.Keras
    )?