Python 当使用vmap时,Jax中不支持不可散列的静态参数

Python 当使用vmap时,Jax中不支持不可散列的静态参数,python,jax,Python,Jax,这是有关的。经过一些工作,我设法把它改为最后一个错误。代码现在看起来像这样 import jax.numpy as jnp from jax import grad, jit, value_and_grad from jax import vmap, pmap from jax import random import jax from jax import lax from jax import custom_jvp def p_tau(z, tau, alpha=1.5): re

这是有关的。经过一些工作,我设法把它改为最后一个错误。代码现在看起来像这样

import jax.numpy as jnp
from jax import grad, jit, value_and_grad
from jax import vmap, pmap
from jax import random
import jax
from jax import lax
from jax import custom_jvp


def p_tau(z, tau, alpha=1.5):
    return jnp.clip((alpha - 1) * z - tau, 0) ** (1 / (alpha - 1))


def get_tau(tau, tau_max, tau_min, z_value):
    return lax.cond(z_value < 1,
                    lambda _: (tau, tau_min),
                    lambda _: (tau_max, tau),
                    operand=None
                    )


def body(kwargs, x):
    tau_min = kwargs['tau_min']
    tau_max = kwargs['tau_max']
    z = kwargs['z']
    alpha = kwargs['alpha']

    tau = (tau_min + tau_max) / 2
    z_value = p_tau(z, tau, alpha).sum()
    taus = get_tau(tau, tau_max, tau_min, z_value)
    tau_max, tau_min = taus[0], taus[1]
    return {'tau_min': tau_min, 'tau_max': tau_max, 'z': z, 'alpha': alpha}, None

@jax.partial(jax.jit, static_argnums=(2,))
def map_row(z_input, alpha, T):
    z = (alpha - 1) * z_input

    tau_min, tau_max = jnp.min(z) - 1, jnp.max(z) - z.shape[0] ** (1 - alpha)
    result, _ = lax.scan(body, {'tau_min': tau_min, 'tau_max': tau_max, 'z': z, 'alpha': alpha}, xs=None,
                         length=T)
    tau = (result['tau_max'] + result['tau_min']) / 2
    result = p_tau(z, tau, alpha)
    return result / result.sum()

@jax.partial(jax.jit, static_argnums=(1,3,))
def _entmax(input, axis=-1, alpha=1.5, T=20):
    result = vmap(jax.partial(map_row, alpha, T), axis)(input)
    return result

@jax.partial(custom_jvp, nondiff_argnums=(1, 2, 3,))
def entmax(input, axis=-1, alpha=1.5, T=10):
    return _entmax(input, axis, alpha, T)

@jax.partial(jax.jit, static_argnums=(0,2,))    
def _entmax_jvp_impl(axis, alpha, T, primals, tangents):
    input = primals[0]
    Y = entmax(input, axis, alpha, T)
    gppr = Y  ** (2 - alpha)
    grad_output = tangents[0]
    dX = grad_output * gppr
    q = dX.sum(axis=axis) / gppr.sum(axis=axis)
    q = jnp.expand_dims(q, axis=axis)
    dX -= q * gppr
    return Y, dX


@entmax.defjvp
def entmax_jvp(axis, alpha, T, primals, tangents):
    return _entmax_jvp_impl(axis, alpha, T, primals, tangents)

import numpy as np
input = jnp.array(np.random.randn(64, 10)).block_until_ready()
weight = jnp.array(np.random.randn(64, 10)).block_until_ready()

def toy(input, weight):
    return (weight*entmax(input, 0, 1.5, 20)).sum()

jax.jit(value_and_grad(toy))(input, weight)
将jax.numpy作为jnp导入
从jax导入梯度、jit、值和梯度
从jax导入vmap、pmap
从jax随机导入
导入jax
从jax导入lax
从jax导入自定义jvp
def p_tau(z,tau,alpha=1.5):
返回jnp.clip((α-1)*z-tau,0)**(1/(α-1))
def get_tau(tau,tau_max,tau_min,z_值):
返回松弛状态(z_值<1,
lambda:(tau,tau_min),
lambda:(tau_max,tau),
操作数=无
)
def主体(kwargs,x):
tau_min=kwargs['tau_min']
tau_max=kwargs['tau_max']
z=kwargs['z']
alpha=kwargs['alpha']
tau=(tau_最小值+tau最大值)/2
z_值=p_tau(z,tau,alpha).sum()
taus=获取tau(tau,tau_最大,tau最小,z_值)
tau_max,tau_min=taus[0],taus[1]
返回{'tau_min':tau_min,'tau_max':tau_max,'z''alpha':alpha},无
@partial(jax.jit,static_argnums=(2,))
def map_行(z_输入,alpha,T):
z=(α-1)*z_输入
tau_min,tau_max=jnp.min(z)-1,jnp.max(z)-z.shape[0]**(1-alpha)
结果,u=lax.scan(主体,{'tau_min':tau_min,'tau_max':tau_max,'z','alpha':alpha},xs=None,
长度=T)
tau=(结果['tau_max']+结果['tau_min'])/2
结果=p_τ(z,τ,α)
返回结果/result.sum()
@partial(jax.jit,static_argnums=(1,3,)
def_entmax(输入,轴=-1,α=1.5,T=20):
结果=vmap(jax.partial(映射行,alpha,T),轴)(输入)
返回结果
@jax.partial(custom_jvp,nondiff_argnums=(1,2,3,)
def entmax(输入,轴=-1,α=1.5,T=10):
返回_entmax(输入、轴、alpha、T)
@partial(jax.jit,static_argnums=(0,2,)
def_entmax_jvp_impl(轴、alpha、T、原形、切线):
输入=原始值[0]
Y=entmax(输入,轴,α,T)
gppr=Y**(2-α)
梯度输出=切线[0]
dX=梯度输出*gppr
q=dX.sum(轴=轴)/gppr.sum(轴=轴)
q=jnp.展开尺寸(q,轴=轴)
dX-=q*gppr
返回Y,dX
@entmax.defjvp
def entmax_jvp(轴、alpha、T、基元、切线):
返回_entmax_jvp_impl(轴、alpha、T、原形、切线)
将numpy作为np导入
input=jnp.array(np.random.randn(64,10)).block_直到_就绪()
weight=jnp.array(np.random.randn(64,10)).block_直到_就绪()
def玩具(输入,重量):
返回值(权重*entmax(输入,0,1.5,20)).sum()
jit(值和梯度(玩具))(输入,权重)
这导致(我希望)的是最终的错误,即

Non-hashable static arguments are not supported, as this can lead to unexpected cache-misses. Static argument (index 2) of type <class 'jax.interpreters.batching.BatchTracer'> for function map_row is non-hashable.
不支持不可散列的静态参数,因为这可能导致意外的缓存未命中。函数map_row类型的静态参数(索引2)不可散列。

这是非常奇怪的,因为我认为我已经标记了每个everywhere
看起来是静态的,但它仍然告诉我它是跟踪的。

当您编写一个带有位置参数的
部分
函数时,这些参数首先被传递。因此:

jax.partial(映射行,alpha,T)
本质上等同于:

lambda z_输入:映射行(alpha、T、z_输入)
请注意参数的顺序不正确–这就是导致错误的原因:您正在将不可散列的跟踪程序
z_input
传递给预期为静态的参数

您可以将上面的
partial
语句替换为:

lambda z:map_行(z,alpha,T)
然后您的代码将正确运行