如何使用tensorflow feature_列作为keras模型的输入
Tensorflow对于非数值特征处理非常有用。然而,当前的API文档更多地是关于使用tensorflow估计器的feature_列。是否有可能使用feature_列进行分类特征表示,然后基于tf.keras构建模型 我找到的唯一参考是下面的教程。它显示了如何将要素列馈送到Keras顺序模型: 代码片段如下所示:如何使用tensorflow feature_列作为keras模型的输入,tensorflow,keras,Tensorflow,Keras,Tensorflow对于非数值特征处理非常有用。然而,当前的API文档更多地是关于使用tensorflow估计器的feature_列。是否有可能使用feature_列进行分类特征表示,然后基于tf.keras构建模型 我找到的唯一参考是下面的教程。它显示了如何将要素列馈送到Keras顺序模型: 代码片段如下所示: from tensorflow.python.feature_column import feature_column_v2 as fc feature_columns = [fc.
from tensorflow.python.feature_column import feature_column_v2 as fc
feature_columns = [fc.embedding_column(ccv, dimension=3), ...]
feature_layer = fc.FeatureLayer(feature_columns)
model = tf.keras.Sequential([
feature_layer,
tf.keras.layers.Dense(128, activation=tf.nn.relu),
tf.keras.layers.Dense(64, activation=tf.nn.relu),
tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
...
model.fit(dataset, steps_per_epoch=8) # dataset is created from tensorflow Dataset API
问题是如何使用带有keras功能模型API的定制模型。我尝试了以下方法,但没有成功(tensorflow 1.12版)
错误日志:
ValueError: Input tensors to a Model must come from `tf.layers.Input`. Received: Tensor("feature_layer/concat:0", shape=(4, 3), dtype=float32) (missing previous layer metadata).
我不知道如何将特征列转换为keras模型的输入。
使用此函数,此api文档有一个示例。
您可以将featur_列转换为Tensor,然后将其用于Mode()我最近在TensorFlow 2.0 alpha版本中阅读过。它有使用Keras和featurecolumnapi的例子。不确定TF2.0是否是您将要使用的如果您使用tensorflow数据集API,该代码可能会很好
featurlayer = keras.layers.DenseFeatures(feature_columns=feature_columns)
train_dataset = train_dataset.map(lambda x, y: (featurlayer(x), y))
test_dataset = test_dataset.map(lambda x, y: (featurlayer(x), y))
model.fit(train_dataset, epochs=, steps_per_epoch=, # all_data/batch_num =
validation_data=test_dataset,
validation_steps=)
您想要的行为可以实现,并且它能够将
tf.feature\u column
和keras函数API
结合起来。事实上,TF文档中没有提到
这至少在TF2.0.0-beta1中起作用,但在以后的版本中可能会被更改甚至简化
请检查TensorFlow github存储库中的问题。在那里,您可以找到有关tf.feature\u列
和Keras函数API
的有用注释
因为您询问一般方法,所以我将从上面的链接复制带有示例的代码段更新:下面的代码应该可以工作
from __future__ import absolute_import, division, print_function
import numpy as np
import pandas as pd
#!pip install tensorflow==2.0.0-alpha0
import tensorflow as tf
from tensorflow import feature_column
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')
dataframe = pd.read_csv(csv_file, nrows = 10000)
dataframe.head()
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')
# Define method to create tf.data dataset from Pandas Dataframe
# This worked with tf 2.0 but does not work with tf 2.2
def df_to_dataset_tf_2_0(dataframe, label_column, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
#labels = dataframe.pop(label_column)
labels = dataframe[label_column]
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
def df_to_dataset(dataframe, label_column, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop(label_column)
#labels = dataframe[label_column]
ds = tf.data.Dataset.from_tensor_slices((dataframe.to_dict(orient='list'), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(train, label_column = 'target', batch_size=batch_size)
val_ds = df_to_dataset(val,label_column = 'target', shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, label_column = 'target', shuffle=False, batch_size=batch_size)
age = feature_column.numeric_column("age")
feature_columns = []
feature_layer_inputs = {}
# numeric cols
for header in ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'slope', 'ca']:
feature_columns.append(feature_column.numeric_column(header))
feature_layer_inputs[header] = tf.keras.Input(shape=(1,), name=header)
# bucketized cols
age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35])
feature_columns.append(age_buckets)
# indicator cols
thal = feature_column.categorical_column_with_vocabulary_list(
'thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)
feature_layer_inputs['thal'] = tf.keras.Input(shape=(1,), name='thal', dtype=tf.string)
# embedding cols
thal_embedding = feature_column.embedding_column(thal, dimension=8)
feature_columns.append(thal_embedding)
# crossed cols
crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)
crossed_feature = feature_column.indicator_column(crossed_feature)
feature_columns.append(crossed_feature)
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
feature_layer_outputs = feature_layer(feature_layer_inputs)
x = layers.Dense(128, activation='relu')(feature_layer_outputs)
x = layers.Dense(64, activation='relu')(x)
baggage_pred = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=baggage_pred)
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(train_ds)
这是正确的答案,我在TF1.13下测试了它。它应该获得更多的选票。但是您需要使用tensorflow.python.feature\u column import feature\u column\u v2中的
,作为fc dense\u features=fc.DenseFeatures(columns)
现在应该可以使用了。诀窍是将输入设置为输入层列表,如下所示[feature_layer_Inputs.values()]中的v代表v。谢谢!我曾试图将DenseFeatures
预先添加到一个现有的Sequential
模型中,但最终它只能在功能性模型中使用这两种方法,并且inputs=feature\u layer\u inputs
。为什么这一行的输入形状=1:feature\u layer\u inputs['thal']=tf.keras.Input(shape=(1,),name='thal',dtype=tf.string)
@HARSHNILESHPATHAK,“thal”列的示例说明了字符串
值的预处理。这意味着输入数据集的每个记录在'thal'列中只包含一个字符串值,这就是为什么我们要求tf.keras.input()
使用shape=(1,)。然后输入层将该字符串值传递给DenseFeatures(feature\u columns)
层中定义的feature\u列。每个特征列根据其自身的逻辑扩展形状。与“thal”一样,这里显示的thal\u-one\u-hot
和thal\u-embedding
@prog\u-guy,不,thal\u-one\u-hot
和thal\u-embedding
只是将不同类型的功能列的示例分开
from __future__ import absolute_import, division, print_function
import numpy as np
import pandas as pd
#!pip install tensorflow==2.0.0-alpha0
import tensorflow as tf
from tensorflow import feature_column
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')
dataframe = pd.read_csv(csv_file, nrows = 10000)
dataframe.head()
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')
# Define method to create tf.data dataset from Pandas Dataframe
# This worked with tf 2.0 but does not work with tf 2.2
def df_to_dataset_tf_2_0(dataframe, label_column, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
#labels = dataframe.pop(label_column)
labels = dataframe[label_column]
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
def df_to_dataset(dataframe, label_column, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop(label_column)
#labels = dataframe[label_column]
ds = tf.data.Dataset.from_tensor_slices((dataframe.to_dict(orient='list'), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(train, label_column = 'target', batch_size=batch_size)
val_ds = df_to_dataset(val,label_column = 'target', shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, label_column = 'target', shuffle=False, batch_size=batch_size)
age = feature_column.numeric_column("age")
feature_columns = []
feature_layer_inputs = {}
# numeric cols
for header in ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'slope', 'ca']:
feature_columns.append(feature_column.numeric_column(header))
feature_layer_inputs[header] = tf.keras.Input(shape=(1,), name=header)
# bucketized cols
age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35])
feature_columns.append(age_buckets)
# indicator cols
thal = feature_column.categorical_column_with_vocabulary_list(
'thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)
feature_layer_inputs['thal'] = tf.keras.Input(shape=(1,), name='thal', dtype=tf.string)
# embedding cols
thal_embedding = feature_column.embedding_column(thal, dimension=8)
feature_columns.append(thal_embedding)
# crossed cols
crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)
crossed_feature = feature_column.indicator_column(crossed_feature)
feature_columns.append(crossed_feature)
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
feature_layer_outputs = feature_layer(feature_layer_inputs)
x = layers.Dense(128, activation='relu')(feature_layer_outputs)
x = layers.Dense(64, activation='relu')(x)
baggage_pred = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=baggage_pred)
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(train_ds)