Python 使用文本的Tensorflow输入管道
在过去的几周里,我尝试使用tensorflow(TF2.0.1)下的tf.records运行输入管道。从CSV中加载句子并生成记录:Python 使用文本的Tensorflow输入管道,python,tensorflow,keras,Python,Tensorflow,Keras,在过去的几周里,我尝试使用tensorflow(TF2.0.1)下的tf.records运行输入管道。从CSV中加载句子并生成记录: import tensorflow as tf import pathlib import sys import csv PATH_PARENT = str(pathlib.Path(__file__).parent.absolute()) if PATH_PARENT.endswith('models'): PATH_PARENT = PATH_PAR
import tensorflow as tf
import pathlib
import sys
import csv
PATH_PARENT = str(pathlib.Path(__file__).parent.absolute())
if PATH_PARENT.endswith('models'):
PATH_PARENT = PATH_PARENT[:-len('models')]
PATH_PARENT = PATH_PARENT.replace("\\", '/')
sys.path.append(PATH_PARENT)
def create_tf_example(features, label):
tf_example = tf.train.Example(features=tf.train.Features(feature={
'Sentence': tf.train.Feature(bytes_list=tf.train.BytesList(value=[features.encode('utf-8')])),
'Class': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label.encode('utf-8')])),
}))
return tf_example
intent_load_list = ["training_data_intent_Music_controler_0.csv"] # Example: musik,<slot_0>,play,<slot_music_controle>
for load_intent in intent_load_list:
start = 14
end = load_intent.rfind("_")
label = load_intent[start : end]
print("loading intent " + label)
csv_data = []
with open(PATH_PARENT + "models/" + load_intent, 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
clean_output = ''
for word in row:
if '<' not in word:
clean_output = clean_output + word + ' '
csv_data.append(clean_output)
with tf.io.TFRecordWriter(PATH_PARENT + "models/dataset.tfrecords") as writer:
for row in csv_data:
features = row
example = create_tf_example(features, label)
writer.write(example.SerializeToString())
writer.close()
有没有人尝试过类似的东西或想法?不幸的是,文档没有多大帮助。提前感谢当您未在数据中传递
标签时,tensorflow版本2.0.1
会出现此错误。在下面的示例中,我使用TFRecordWriter
编写虚拟Input
值,然后使用TFRecordDataset
读取它并将其传递给模型
如果在tensorflow版本2.1.0
中运行相同的代码,则错误语句将更改为索引器错误:元组索引超出范围
此外,如果您在tensorflow版本2.2.0
中运行相同的代码,错误语句将更改为ValueError:没有为任何变量提供渐变:['densite/kernel:0','densite/bias:0','densite\u 1/kernel:0','densite\u 1/bias:0','densite\u 2/kernel 0','densite\u 2/bias:0'.
重新生成错误的代码-
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Input = [20191221.123 + x for x in range(0,10)]
print("Writing Input - ", Input)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Input':tf.train.Feature(float_list=tf.train.FloatList(value=Input))
}
))
writer = tf.io.TFRecordWriter("Data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Input': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Input = features['Input']
return Input
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
write_date_tfrecord()
generator = dataset_generator()
build_model = tf.keras.Sequential()
build_model.add(tf.keras.layers.Input(shape=(1,)))
build_model.add(tf.keras.layers.Dense(50, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(20, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(3, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
print("done")
2.0.1
Writing Input - [20191221.123, 20191222.123, 20191223.123, 20191224.123, 20191225.123, 20191226.123, 20191227.123, 20191228.123, 20191229.123, 20191230.123]
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 50) 100
_________________________________________________________________
dropout (Dropout) (None, 50) 0
_________________________________________________________________
dense_1 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_1 (Dropout) (None, 20) 0
_________________________________________________________________
dense_2 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/Unknown - 0s 60ms/step
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-1-d1c5c463cdc2> in <module>()
47 build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
48
---> 49 build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
50 print("done")
20 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
164
165 if hasattr(loss_fn, 'reduction'):
--> 166 per_sample_losses = loss_fn.call(targets[i], outs[i])
167 weighted_losses = losses_utils.compute_weighted_loss(
168 per_sample_losses,
IndexError: list index out of range
2.1.0
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_12 (Dense) (None, 50) 100
_________________________________________________________________
dropout_8 (Dropout) (None, 50) 0
_________________________________________________________________
dense_13 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_9 (Dropout) (None, 20) 0
_________________________________________________________________
dense_14 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/1 [==============================] - 0s 362ms/step - loss: 8705754.0000 - accuracy: 0.0000e+00
Epoch 2/5
1/1 [==============================] - 0s 14ms/step - loss: 4458477.5000 - accuracy: 0.2222
Epoch 3/5
1/1 [==============================] - 0s 16ms/step - loss: 5933292.5000 - accuracy: 0.2222
Epoch 4/5
1/1 [==============================] - 0s 16ms/step - loss: 4305070.0000 - accuracy: 0.1111
Epoch 5/5
1/1 [==============================] - 0s 14ms/step - loss: 5578528.5000 - accuracy: 0.1111
done
输出-
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Input = [20191221.123 + x for x in range(0,10)]
print("Writing Input - ", Input)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Input':tf.train.Feature(float_list=tf.train.FloatList(value=Input))
}
))
writer = tf.io.TFRecordWriter("Data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Input': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Input = features['Input']
return Input
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
write_date_tfrecord()
generator = dataset_generator()
build_model = tf.keras.Sequential()
build_model.add(tf.keras.layers.Input(shape=(1,)))
build_model.add(tf.keras.layers.Dense(50, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(20, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(3, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
print("done")
2.0.1
Writing Input - [20191221.123, 20191222.123, 20191223.123, 20191224.123, 20191225.123, 20191226.123, 20191227.123, 20191228.123, 20191229.123, 20191230.123]
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 50) 100
_________________________________________________________________
dropout (Dropout) (None, 50) 0
_________________________________________________________________
dense_1 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_1 (Dropout) (None, 20) 0
_________________________________________________________________
dense_2 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/Unknown - 0s 60ms/step
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-1-d1c5c463cdc2> in <module>()
47 build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
48
---> 49 build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
50 print("done")
20 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
164
165 if hasattr(loss_fn, 'reduction'):
--> 166 per_sample_losses = loss_fn.call(targets[i], outs[i])
167 weighted_losses = losses_utils.compute_weighted_loss(
168 per_sample_losses,
IndexError: list index out of range
2.1.0
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_12 (Dense) (None, 50) 100
_________________________________________________________________
dropout_8 (Dropout) (None, 50) 0
_________________________________________________________________
dense_13 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_9 (Dropout) (None, 20) 0
_________________________________________________________________
dense_14 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/1 [==============================] - 0s 362ms/step - loss: 8705754.0000 - accuracy: 0.0000e+00
Epoch 2/5
1/1 [==============================] - 0s 14ms/step - loss: 4458477.5000 - accuracy: 0.2222
Epoch 3/5
1/1 [==============================] - 0s 16ms/step - loss: 5933292.5000 - accuracy: 0.2222
Epoch 4/5
1/1 [==============================] - 0s 16ms/step - loss: 4305070.0000 - accuracy: 0.1111
Epoch 5/5
1/1 [==============================] - 0s 14ms/step - loss: 5578528.5000 - accuracy: 0.1111
done
输出-
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Input = [20191221.123 + x for x in range(0,10)]
print("Writing Input - ", Input)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Input':tf.train.Feature(float_list=tf.train.FloatList(value=Input))
}
))
writer = tf.io.TFRecordWriter("Data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Input': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Input = features['Input']
return Input
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
write_date_tfrecord()
generator = dataset_generator()
build_model = tf.keras.Sequential()
build_model.add(tf.keras.layers.Input(shape=(1,)))
build_model.add(tf.keras.layers.Dense(50, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(20, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(3, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
print("done")
2.0.1
Writing Input - [20191221.123, 20191222.123, 20191223.123, 20191224.123, 20191225.123, 20191226.123, 20191227.123, 20191228.123, 20191229.123, 20191230.123]
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 50) 100
_________________________________________________________________
dropout (Dropout) (None, 50) 0
_________________________________________________________________
dense_1 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_1 (Dropout) (None, 20) 0
_________________________________________________________________
dense_2 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/Unknown - 0s 60ms/step
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-1-d1c5c463cdc2> in <module>()
47 build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
48
---> 49 build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
50 print("done")
20 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
164
165 if hasattr(loss_fn, 'reduction'):
--> 166 per_sample_losses = loss_fn.call(targets[i], outs[i])
167 weighted_losses = losses_utils.compute_weighted_loss(
168 per_sample_losses,
IndexError: list index out of range
2.1.0
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_12 (Dense) (None, 50) 100
_________________________________________________________________
dropout_8 (Dropout) (None, 50) 0
_________________________________________________________________
dense_13 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_9 (Dropout) (None, 20) 0
_________________________________________________________________
dense_14 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/1 [==============================] - 0s 362ms/step - loss: 8705754.0000 - accuracy: 0.0000e+00
Epoch 2/5
1/1 [==============================] - 0s 14ms/step - loss: 4458477.5000 - accuracy: 0.2222
Epoch 3/5
1/1 [==============================] - 0s 16ms/step - loss: 5933292.5000 - accuracy: 0.2222
Epoch 4/5
1/1 [==============================] - 0s 16ms/step - loss: 4305070.0000 - accuracy: 0.1111
Epoch 5/5
1/1 [==============================] - 0s 14ms/step - loss: 5578528.5000 - accuracy: 0.1111
done
还建议您仔细阅读这篇文章,其中解释了“如何将TFRecord提供给训练Keras模型”
希望这能回答你的问题。快乐学习