Python 关于tensorflow中的几个变量计算hessian

Python 关于tensorflow中的几个变量计算hessian,python,tensorflow,hessian-matrix,Python,Tensorflow,Hessian Matrix,在tensorflow中计算Hessian非常简单: x = tf.Variable([1., 1., 1.], dtype=tf.float32, name="x") f = (x[0] + x[1] ** 2 + x[0] * x[1] + x[2]) ** 2 hessian = tf.hessians(f, x) 这将正确返回 [[ 8., 20., 4.], [20., 34., 6.], [ 4., 6., 2.]] 在我的实际情况中,不是使用一个变量x来保存

在tensorflow中计算Hessian非常简单:

x = tf.Variable([1., 1., 1.], dtype=tf.float32, name="x")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + x[2]) ** 2
hessian = tf.hessians(f, x)
这将正确返回

[[ 8., 20.,  4.],
   [20., 34.,  6.],
   [ 4.,  6.,  2.]]
在我的实际情况中,不是使用一个变量
x
来保存三个值,而是需要将其拆分为两个变量:
x
(保存前两个)和
y
(保存最后一个)

我试过天真的方法

hessian = tf.hessians(f, [x, y])
但是我得到:
[[8,20.],[20,34.]],[2.]

我还尝试:

xy = tf.concat([x, y], axis=-1)
但是当定义黑森人的时候

hessian = tf.hessians(f, xy)
我得到一个非常严重的错误:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    510                 as_ref=input_arg.is_ref,
--> 511                 preferred_dtype=default_dtype)
    512           except TypeError as err:

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
   1174     if ret is None:
-> 1175       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1176 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    303   _ = as_ref
--> 304   return constant(v, dtype=dtype, name=name)
    305 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
    244   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245                         allow_broadcast=True)
    246 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    282           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283           allow_broadcast=allow_broadcast))
    284   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
    453     if values is None:
--> 454       raise ValueError("None values not supported.")
    455     # if dtype is provided, forces numpy array to be the type

ValueError: None values not supported.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    524               observed = ops.internal_convert_to_tensor(
--> 525                   values, as_ref=input_arg.is_ref).dtype.name
    526             except ValueError as err:

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
   1174     if ret is None:
-> 1175       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1176 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    303   _ = as_ref
--> 304   return constant(v, dtype=dtype, name=name)
    305 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
    244   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245                         allow_broadcast=True)
    246 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    282           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283           allow_broadcast=allow_broadcast))
    284   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
    453     if values is None:
--> 454       raise ValueError("None values not supported.")
    455     # if dtype is provided, forces numpy array to be the type

ValueError: None values not supported.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-358-70bce7e5d400> in <module>
      3 f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
      4 xy = tf.concat([x, y], axis=-1)
----> 5 hessian = tf.hessians(f, xy)

~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gradients_impl.py in hessians(ys, xs, name, colocate_gradients_with_ops, gate_gradients, aggregation_method)
   1405   for gradient, x in zip(_gradients, xs):
   1406     # change shape to one-dimension without graph branching
-> 1407     gradient = array_ops.reshape(gradient, [-1])
   1408 
   1409     # Declare an iterator and tensor array loop variables for the gradients.

~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py in reshape(tensor, shape, name)
   7178   try:
   7179     _, _, _op = _op_def_lib._apply_op_helper(
-> 7180         "Reshape", tensor=tensor, shape=shape, name=name)
   7181   except (TypeError, ValueError):
   7182     result = _dispatch.dispatch(

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    527               raise ValueError(
    528                   "Tried to convert '%s' to a tensor and failed. Error: %s" %
--> 529                   (input_name, err))
    530             prefix = ("Input '%s' of '%s' Op has type %s that does not match" %
    531                       (input_name, op_type_name, observed))

ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported.


1
---------------------------------------------------------------------------
ValueError回溯(最近一次调用上次)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in_apply_op_helper(self,op_type_name,name,**关键字)
510 as_ref=输入参数is_ref,
-->511首选类型=默认类型)
512除TypeError作为错误外:
内部转换为张量中的~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py(值、数据类型、名称、as-ref、首选数据类型、ctx、接受符号张量)
1174如果ret为无:
->1175 ret=conversion\u func(值,dtype=dtype,name=name,as\u ref=as\u ref)
1176
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant\u op.py in\u constant\u tensor\u conversion\u函数(v,dtype,name,as\u ref)
303=作为参考
-->304返回常量(v,dtype=dtype,name=name)
305
常量中的~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py(值、数据类型、形状、名称)
244返回\u常量\u impl(值、数据类型、形状、名称、验证\u形状=False,
-->245允许_广播=真)
246
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant\u op.py in\u constant\u impl(值、数据类型、形状、名称、验证形状、允许广播)
282值,dtype=dtype,shape=shape,verify\u shape=verify\u shape,
-->283允许广播=允许广播)
284 dtype\u value=attr\u value\u pb2.AttrValue(type=tensor\u value.tensor.dtype)
make\u tensor\u proto中的~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor\u util.py(值、数据类型、形状、验证形状、允许广播)
453如果值为无:
-->454 raise VALUE ERROR(“不支持无值”)
455#如果提供了dtype,则强制numpy数组为该类型
ValueError:不支持任何值。
在处理上述异常期间,发生了另一个异常:
ValueError回溯(最近一次调用上次)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in_apply_op_helper(self,op_type_name,name,**关键字)
524观测值=操作。内部转换为张量(
-->525个值,如_ref=input_arg.is_ref).dtype.name
526除ValueError作为错误外:
内部转换为张量中的~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py(值、数据类型、名称、as-ref、首选数据类型、ctx、接受符号张量)
1174如果ret为无:
->1175 ret=conversion\u func(值,dtype=dtype,name=name,as\u ref=as\u ref)
1176
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant\u op.py in\u constant\u tensor\u conversion\u函数(v,dtype,name,as\u ref)
303=作为参考
-->304返回常量(v,dtype=dtype,name=name)
305
常量中的~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py(值、数据类型、形状、名称)
244返回\u常量\u impl(值、数据类型、形状、名称、验证\u形状=False,
-->245允许_广播=真)
246
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant\u op.py in\u constant\u impl(值、数据类型、形状、名称、验证形状、允许广播)
282值,dtype=dtype,shape=shape,verify\u shape=verify\u shape,
-->283允许广播=允许广播)
284 dtype\u value=attr\u value\u pb2.AttrValue(type=tensor\u value.tensor.dtype)
make\u tensor\u proto中的~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor\u util.py(值、数据类型、形状、验证形状、允许广播)
453如果值为无:
-->454 raise VALUE ERROR(“不支持无值”)
455#如果提供了dtype,则强制numpy数组为该类型
ValueError:不支持任何值。
在处理上述异常期间,发生了另一个异常:
ValueError回溯(最近一次调用上次)
在里面
3 f=(x[0]+x[1]**2+x[0]*x[1]+y)**2
4xy=tf.concat([x,y],轴=-1)
---->5黑森人=黑森人(f,xy)
黑森语中的~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gradients\u impl.py(ys,xs,name,colocate\u gradients\u with\u ops,gate\u gradients,aggregation\u method)
1405用于渐变,拉链中的x(_渐变,xs):
1406#将形状更改为一维,无图分支
->1407 gradient=阵列操作重塑(渐变,[-1])
1408
1409#为渐变声明迭代器和张量数组循环变量。
整形中的~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gen\u array\u ops.py(张量、形状、名称)
7178尝试:
7179 u,u,_op=_op_def_lib._apply_op_helper(
->7180“重塑”,张量=张量,形状=形状,名称=名称)
7181除外(类型错误、值错误):
7182结果=_dispatch.dispatch(
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in_apply_op_helper(self,op_type_name,name,**关键字)
527升值错误(
528“试图将“%s”转换为张量,但失败。错误:%s”%
-->529(输入名称,错误))
530前缀=(“%s”Op的输入“%s”具有不匹配的类型%s)%
531(输入_名称,操作_类型_名称,观察))
ValueError:尝试将“张量”转换为
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    510                 as_ref=input_arg.is_ref,
--> 511                 preferred_dtype=default_dtype)
    512           except TypeError as err:

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
   1174     if ret is None:
-> 1175       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1176 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    303   _ = as_ref
--> 304   return constant(v, dtype=dtype, name=name)
    305 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
    244   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245                         allow_broadcast=True)
    246 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    282           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283           allow_broadcast=allow_broadcast))
    284   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
    453     if values is None:
--> 454       raise ValueError("None values not supported.")
    455     # if dtype is provided, forces numpy array to be the type

ValueError: None values not supported.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    524               observed = ops.internal_convert_to_tensor(
--> 525                   values, as_ref=input_arg.is_ref).dtype.name
    526             except ValueError as err:

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
   1174     if ret is None:
-> 1175       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1176 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    303   _ = as_ref
--> 304   return constant(v, dtype=dtype, name=name)
    305 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
    244   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245                         allow_broadcast=True)
    246 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    282           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283           allow_broadcast=allow_broadcast))
    284   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
    453     if values is None:
--> 454       raise ValueError("None values not supported.")
    455     # if dtype is provided, forces numpy array to be the type

ValueError: None values not supported.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-358-70bce7e5d400> in <module>
      3 f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
      4 xy = tf.concat([x, y], axis=-1)
----> 5 hessian = tf.hessians(f, xy)

~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gradients_impl.py in hessians(ys, xs, name, colocate_gradients_with_ops, gate_gradients, aggregation_method)
   1405   for gradient, x in zip(_gradients, xs):
   1406     # change shape to one-dimension without graph branching
-> 1407     gradient = array_ops.reshape(gradient, [-1])
   1408 
   1409     # Declare an iterator and tensor array loop variables for the gradients.

~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py in reshape(tensor, shape, name)
   7178   try:
   7179     _, _, _op = _op_def_lib._apply_op_helper(
-> 7180         "Reshape", tensor=tensor, shape=shape, name=name)
   7181   except (TypeError, ValueError):
   7182     result = _dispatch.dispatch(

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    527               raise ValueError(
    528                   "Tried to convert '%s' to a tensor and failed. Error: %s" %
--> 529                   (input_name, err))
    530             prefix = ("Input '%s' of '%s' Op has type %s that does not match" %
    531                       (input_name, op_type_name, observed))

ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported.


1
from itertools import combinations, count
import tensorflow as tf

def jacobian(y, x, tf_loop=False):
    # If the shape of Y is fully defined you can choose between a
    # Python-level or TF-level loop to make the Jacobian matrix
    # If the shape of Y is not fully defined you must use TF loop
    # In both cases it is just a matter of stacking gradients for each Y
    if tf_loop or y.shape.num_elements() is None:
        i = tf.constant(0, dtype=tf.int32)
        y_size = tf.size(y)
        rows = tf.TensorArray(dtype=y.dtype, size=y_size, element_shape=x.shape)
        _, rows = tf.while_loop(
            lambda i, rows: i < y_size,
            lambda i, rows: [i + 1, rows.write(i, tf.gradients(y[i], x)[0])],
            [i, rows])
        return rows.stack()
    else:
        return tf.stack([tf.gradients(y[i], x)[0]
                         for i in range(y.shape.num_elements())], axis=0)

def hessian_multivar(ys, xs, tf_loop=False):
    # List of list of pieces of the Hessian matrix
    hessian_pieces = [[None] * len(xs) for _ in xs]
    # Hessians with respect to each x (diagonal pieces of the full Hessian)
    for i, h in enumerate(tf.hessians(ys, xs)):
        hessian_pieces[i][i] = h
    # First-order derivatives
    xs_grad = tf.gradients(ys, xs)
    # Pairwise second order derivatives as Jacobian matrices
    for (i1, (x1, g1)), (i2, (x2, g2)) in combinations(zip(count(), zip(xs, xs_grad)), 2):
        # Derivates in both orders
        hessian_pieces[i1][i2] = jacobian(g1, x2, tf_loop=tf_loop)
        hessian_pieces[i2][i1] = jacobian(g2, x1, tf_loop=tf_loop)
    # Concatenate everything together
    return tf.concat([tf.concat(hp, axis=1) for hp in hessian_pieces], axis=0)

# Test it with three variables
with tf.Graph().as_default():
    x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
    y = tf.Variable([1.], dtype=tf.float32, name="y")
    z = tf.Variable([1., 1.], dtype=tf.float32, name="z")
    f = (x[0] + x[1] ** 2 + x[0] * x[1] + y + x * y * z) ** 2
    hessian = hessian_multivar(f, [x, y, z])
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        print(sess.run(hessian))
import tensorflow as tf

x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
# X and Y pieces of Hessian
hx, hy = tf.hessians(f, [x, y])
# First-order X and Y derivatives
gx, gy = tf.gradients(f, [x, y])
# Remanining elements of Hessian can be computed as Jacobian matrices with
# X, Y and first-order derivatives. However TensorFlow does not implement this
# (https://github.com/tensorflow/tensorflow/issues/675)
# So you have to build it "by hand"
hxy = [tf.gradients(gx[i], y)[0] for i in range(x.shape.num_elements())]
hxy = tf.concat(hxy, axis=0)
# Here since Y has one element only it is easier
hyx, = tf.gradients(gy, x)
# Combine pieces of Hessian
h1 = tf.concat([hx, tf.expand_dims(hxy, 1)], axis=1)
h2 = tf.concat([tf.expand_dims(hyx, 0), hy], axis=1)
hessian = tf.concat([h1, h2], axis=0)
# Test it
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init_op)
    print(sess.run(hessian))