Python 如何运行DefineTensorFlow图所有变量都在float16而不是float32中

Python 如何运行DefineTensorFlow图所有变量都在float16而不是float32中,python,tensorflow,Python,Tensorflow,默认情况下,变量Tensorflow位于float32中。为了节省内存,我尝试在float16中运行。在我的图表中,每个我可以将数据类型定义为float16的地方,我都这样做了。但是,我在运行代码时遇到了一个错误 下面是我的代码 import math import numpy as np import tensorflow as tf vocabulary_size = 10 batch_size = 64 embedding_size = 100 num_inputs =4 num_s

默认情况下,变量Tensorflow位于float32中。为了节省内存,我尝试在float16中运行。在我的图表中,每个我可以将数据类型定义为float16的地方,我都这样做了。但是,我在运行代码时遇到了一个错误

下面是我的代码

import math
import numpy as np
import tensorflow as tf

vocabulary_size = 10
batch_size = 64 
embedding_size = 100 
num_inputs =4
num_sampled = 128 

graph = tf.Graph()

with graph.as_default(): #took out " , tf.device('/cpu:0')"


    train_dataset = tf.placeholder(tf.int32, shape=[batch_size, num_inputs ])
    train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])

    embeddings = tf.get_variable( 'embeddings', dtype=tf.float16,
        initializer= tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0, dtype=tf.float16) )

    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float16,
        initializer= tf.truncated_normal([vocabulary_size, embedding_size],
                             stddev=1.0 / math.sqrt(embedding_size), dtype=tf.float16 ) )

    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float16,
        initializer= tf.zeros([vocabulary_size], dtype=tf.float16),  trainable=False )

    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

    embed_reshaped = tf.reshape( embed, [batch_size*num_inputs, embedding_size] )

    segments= np.arange(batch_size).repeat(num_inputs)

    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

    sam_sof_los = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
                                   labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size)

    loss = tf.reduce_mean( sam_sof_los )

    optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 

    saver = tf.train.Saver()
这是错误信息

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    509                 as_ref=input_arg.is_ref,
--> 510                 preferred_dtype=default_dtype)
    511           except TypeError as err:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
   1143     if ret is None:
-> 1144       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1145 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in _TensorTensorConversionFunction(t, dtype, name, as_ref)
    980         "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
--> 981         (dtype.name, t.dtype.name, str(t)))
    982   return t

ValueError: Tensor conversion requested dtype float16 for Tensor with dtype float32: 'Tensor("sampled_softmax_loss/Log:0", shape=(64, 1), dtype=float32)'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-2-12d508b9e5d7> in <module>()
     46 
     47     sam_sof_los = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
---> 48                                    labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size)
     49 
     50     loss = tf.reduce_mean( sam_sof_los )

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
   1347       partition_strategy=partition_strategy,
   1348       name=name,
-> 1349       seed=seed)
   1350   labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
   1351   sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
   1126     if subtract_log_q:
   1127       # Subtract log of Q(l), prior probability that l appears in sampled.
-> 1128       true_logits -= math_ops.log(true_expected_count)
   1129       sampled_logits -= math_ops.log(sampled_expected_count)
   1130 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py in binary_op_wrapper(x, y)
    860     with ops.name_scope(None, op_name, [x, y]) as name:
    861       if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor):
--> 862         return func(x, y, name=name)
    863       elif not isinstance(y, sparse_tensor.SparseTensor):
    864         try:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py in sub(x, y, name)
   8316   if _ctx is None or not _ctx._eager_context.is_eager:
   8317     _, _, _op = _op_def_lib._apply_op_helper(
-> 8318         "Sub", x=x, y=y, name=name)
   8319     _result = _op.outputs[:]
   8320     _inputs_flat = _op.inputs

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    544                   "%s type %s of argument '%s'." %
    545                   (prefix, dtypes.as_dtype(attrs[input_arg.type_attr]).name,
--> 546                    inferred_from[input_arg.type_attr]))
    547 
    548           types = [values.dtype]

TypeError: Input 'y' of 'Sub' Op has type float32 that does not match type float16 of argument 'x'.
---------------------------------------------------------------------------
ValueError回溯(最近一次调用上次)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in_apply_op_helper(self,op_type_name,name,**关键字)
509 as_ref=输入参数为,
-->510首选类型(默认类型)
511除类型错误作为错误外:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py在内部\u convert\u to \u tensor中(值、数据类型、名称、as\u ref、首选\u数据类型、ctx)
1143如果ret为无:
->1144 ret=conversion\u func(值,dtype=dtype,name=name,as\u ref=as\u ref)
1145
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in_tensortensortransversionfunction(t,dtype,name,as_ref)
980“张量转换请求了数据类型为%s的张量的数据类型%s:%r”%
-->981(dtype.name,t.dtype.name,str(t)))
982返回t
ValueError:Tensor转换请求使用dtype float32的Tensor的dtype float16:'Tensor(“采样的\u softmax\u丢失/Log:0”,形状=(64,1),dtype=float32)'
在处理上述异常期间,发生了另一个异常:
TypeError回溯(最近一次调用上次)
在()
46
47采样软件服务水平=tf.nn.采样软件最大损耗(权重=软件最大权重,偏差=软件最大偏差,输入=平均嵌入,
--->48个标签=序列标签,抽样数量=抽样数量,分类数量=词汇表大小)
49
50损失=tf.减少平均值(sam\U sof\U los)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(权重、偏差、标签、输入、num_sampled、num_classes、num_true、sampled_值、删除意外点击、分区策略、名称、种子)
1347分区策略=分区策略,
1348 name=名称,
->1349种子=种子)
1350 labels=数组_ops.stop_gradient(labels,name=“labels_stop_gradient”)
1351采样损耗=nn运算。软件最大值交叉熵与逻辑v2(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in_compute_sampled_logits(权重、偏差、标签、输入、采样数、num_类、num_真、采样值、减去log_q、删除意外命中、分区策略、名称、种子)
1126如果减去日志q:
1127#减去Q(l)的对数,即l出现在样本中的先验概率。
->1128 true\u logits-=math\u ops.log(true\u expected\u count)
1129采样对数-=数学运算对数(采样计数)
1130
/二进制op_包装中的usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py(x,y)
860,操作名称\范围(无,操作名称,[x,y])作为名称:
861如果isinstance(x,运算张量)和isinstance(y,运算张量):
-->862返回函数(x,y,name=name)
863 elif不存在(y,稀疏张量,稀疏传感器):
864尝试:
/sub(x,y,name)中的usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py
8316如果_ctx为None或not _ctx._eager_context.is eager:
8317 u,u,_op=_op_def_lib._apply_op_helper(
->8318“Sub”,x=x,y=y,name=name)
8319 _结果=_运算输出[:]
8320 _输入_平坦=_操作输入
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in_apply_op_helper(self,op_type_name,name,**关键字)
544“%s”类型%s,参数“%s”。%
545(前缀,dtypes.as\u dtype(attrs[input\u arg.type\u attr])。名称,
-->546根据[输入参数类型属性])推断
547
548类型=[values.dtype]
TypeError:“Sub”Op的输入“y”的类型float32与参数“x”的类型float16不匹配。
该错误来自行
tf.nn.sampled\u softmax\u loss

# Redefine it as the tensorflow one is not exposed.
LogUniformCandidateSampler = namedtuple("namedtuple", ["sampled_candidates", "true_expected_count", "sampled_expected_count"]) 
sampled_values = tf.nn.log_uniform_candidate_sampler(
      true_classes=tf.cast(train_labels, tf.int64), num_sampled=num_sampled,
      num_true=1,
      unique=True,
      range_max=vocabulary_size,
      seed=None)

sampled_value_16 = LogUniformCandidateSampler(
    sampled_values.sampled_candidates,
    tf.cast(sampled_values.true_expected_count, tf.float16),
    tf.cast(sampled_values.sampled_expected_count, tf.float16))

sam_sof_los = tf.nn.sampled_softmax_loss(
    weights=softmax_weights,
    biases=softmax_biases,
    inputs=averaged_embeds,
    labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size, 
    sampled_values=sampled_value_16)
起初我认为tf.segment_-mean可能会将输出转换为float32,所以我尝试将平均_嵌入转换为float16,但仍然得到相同的错误

从文档来看,似乎没有一种方法可以定义采样的\u softmax\u损耗中的任何数据类型


据我所知,你只能用黑客来做

问题来自于对以下方面的呼吁:

  if sampled_values is None:
      sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(
          true_classes=labels,
          num_true=num_true,
          num_sampled=num_sampled,
          unique=True,
          range_max=num_classes,
          seed=seed)
将输出此类型的对象:

LogUniformCandidateSampler(
    sampled_candidates=<tf.Tensor 'LogUniformCandidateSampler:0' shape=(128,) dtype=int64>,
    true_expected_count=<tf.Tensor 'LogUniformCandidateSampler:1' shape=(64, 1) dtype=float32>,
    sampled_expected_count=<tf.Tensor 'LogUniformCandidateSampler:2' shape=(128,) dtype=float32>
)

但这确实是一个黑客行为,可能会产生意想不到的后果(一个预期的结果是
tf.cast
操作是不可微的)。

采样值
应该是随机选择的softmax权重/偏差,对吗?我们能不能定义一个函数,它随机接受softmax权重/偏差,然后将其作为
采样值的输入?Tf没有优化候选采样器的任何参数,也许它没有对候选采样器进行任何区分。返回的第一个张量是随机选择的数字,这种函数无论如何都不是微分函数。其他两个张量基本上是负样本数/类总数(至少对于均匀分布的类)。有变化;每次运行候选样本时,值都有点不同。我会尝试破解,但扮演float32看看。你是对的,没有区别是把wrt带到候选采样器,所以你在这方面很好。我称之为hack,因为在截断
LogUniformCandidateSampler
时,从
float32
截断为
float16
数字。我只是不确定有没有什么好消息