Python 值错误:要解压缩的值太多(应为3个)
我试图实现值迭代算法。 我有一个网格Python 值错误:要解压缩的值太多(应为3个),python,python-3.x,tuples,artificial-intelligence,valueerror,Python,Python 3.x,Tuples,Artificial Intelligence,Valueerror,我试图实现值迭代算法。 我有一个网格 grid = [[0, 0, 0, +1], [0, "W", 0, -1], [0, 0, 0, 0]] 行动清单 actlist = {UP:1, DOWN:2, LEFT:3, RIGHT:4} 和一个奖励函数 reward = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] 我编写了一个函数T,它返回3个元组的元组 def T(i,j,actions): if(i
grid = [[0, 0, 0, +1],
[0, "W", 0, -1],
[0, 0, 0, 0]]
行动清单
actlist = {UP:1, DOWN:2, LEFT:3, RIGHT:4}
和一个奖励函数
reward = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]
我编写了一个函数T,它返回3个元组的元组
def T(i,j,actions):
if(i == 0 and j == 0):
if(actions == UP):
return (i,i,0.8),(i,i,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i+1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,i,0.1),(i+1,j,0.1)
elif (i == 0 and j == 1):
if(actions == UP):
return (i,i,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)
elif(i == 0 and j == 2):
if(actions == UP):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return(i+1,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i+1,j,0.1)
elif(i == 0 and j == 3):
if(actions == UP):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
elif(actions == DOWN):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
elif(actions == LEFT):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
elif(actions == RIGHT):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
# 2nd row
elif (i == 1 and j == 0):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == DOWN):
return (i+1,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(i == 1 and j ==1):
if(actions == UP):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif (i == 1 and j == 2):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i+1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(i == 1 and j == 3):
if(actions == UP):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
elif(actions == DOWN):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
elif(actions == LEFT):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
elif(actions == RIGHT):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
# 3rd row
elif(i == 2 and j == 0):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j,0.1),(i,j+1,1,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)
elif (i == 2 and j == 1):
if(actions == UP):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)
elif(i == 2 and j == 2):
if(actions == UP):
return (i-1,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i-1,j,0.1),(i,j,1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)
elif(i == 2 and j == 3):
if(actions == UP):
return (i-1,j,0.8),(i,j-1,0.1),(i,j,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i-1,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j,0.8),(i-1,j,0.1),(i,j,0.1)
在值迭代函数中调用此函数:
def value_iteration():
U1 = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]
while True:
U=U1.copy()
delta = 0
for i in range(len(grid)):
for j in range(len(grid[i])):
U1[i][j] = max(sum(p*(R(k,l)+gamma*U[k][l]) for (k,l,p) in T(i,j,a)) for a in actlist)
print(i,j,U1[i][j])
delta = max(delta, abs(U1[i][j] - U[i][j]))
if delta <= epsilon*(1 - gamma)/gamma:
return U
但随后代码停止并出现错误
ValueError: too many values to unpack (expected 3)
看看****中的元组,也许这就是原因
# 3rd row
elif(i == 2 and j == 0):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j,0.1),**(i,j+1,1,0.1)**
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)
elif (i == 2 and j == 1):
if(actions == UP):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)
看看****中的元组,也许这就是原因
# 3rd row
elif(i == 2 and j == 0):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j,0.1),**(i,j+1,1,0.1)**
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)
elif (i == 2 and j == 1):
if(actions == UP):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)
正如@EdwardMinnix所提到的,您应该使用map,并且永远不要使用这样的if/else构造。或者,如果有任何与这些值相关的模式,请查看 这将对您有所帮助:
...
VALUE_A = 0.8
VALUE_B = 0.1
def new_T(i, j, actions):
result_map = {(0, 0, 1): ((i, i, VALUE_A), (i, i, VALUE_B), (i, j + 1, VALUE_B)),
(0, 0, 2): ((i + 1, j, VALUE_A), (i, j, VALUE_B), (i, j + 1, VALUE_B)),
(0, 0, 3): ((i, j, VALUE_A), (i, j, VALUE_B), (i + 1, j, VALUE_B)),
(0, 0, 4): ((i, j + 1, VALUE_A), (i, i, VALUE_B), (i + 1, j, VALUE_B)),
(0, 1, 1): ((i, i, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 1, 2): ((i, j, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 1, 3): ((i, j - 1, VALUE_A), (i, j, VALUE_B), (i, j, VALUE_B)),
(0, 1, 4): ((i, j + 1, VALUE_A), (i, j, VALUE_B), (i, j, VALUE_B)),
(0, 2, 1): ((i, j, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 2, 2): ((i + 1, j, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 2, 3): ((i, j - 1, VALUE_A), (i, j, VALUE_B), (i + 1, j, VALUE_B)),
(0, 2, 4): ((i, j + 1, VALUE_A), (i, j, VALUE_B), (i + 1, j, VALUE_B))}
return result_map.get((i, j, actions))
for i, j, action in itertools.product(range(4), range(4), range(1, 5)):
print('%s %s %s' % (i, j, action))
T_value = T(i, j, action)
new_T_value = new_T(i, j, action)
if T_value != new_T_value:
raise AssertionError('Error! \nT: %s \nNew T: %s' % (T_value, new_T_value))
正如@EdwardMinnix所提到的,您应该使用map,并且永远不要使用这样的if/else构造。或者,如果有任何与这些值相关的模式,请查看 这将对您有所帮助:
...
VALUE_A = 0.8
VALUE_B = 0.1
def new_T(i, j, actions):
result_map = {(0, 0, 1): ((i, i, VALUE_A), (i, i, VALUE_B), (i, j + 1, VALUE_B)),
(0, 0, 2): ((i + 1, j, VALUE_A), (i, j, VALUE_B), (i, j + 1, VALUE_B)),
(0, 0, 3): ((i, j, VALUE_A), (i, j, VALUE_B), (i + 1, j, VALUE_B)),
(0, 0, 4): ((i, j + 1, VALUE_A), (i, i, VALUE_B), (i + 1, j, VALUE_B)),
(0, 1, 1): ((i, i, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 1, 2): ((i, j, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 1, 3): ((i, j - 1, VALUE_A), (i, j, VALUE_B), (i, j, VALUE_B)),
(0, 1, 4): ((i, j + 1, VALUE_A), (i, j, VALUE_B), (i, j, VALUE_B)),
(0, 2, 1): ((i, j, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 2, 2): ((i + 1, j, VALUE_A), (i, j - 1, VALUE_B), (i, j + 1, VALUE_B)),
(0, 2, 3): ((i, j - 1, VALUE_A), (i, j, VALUE_B), (i + 1, j, VALUE_B)),
(0, 2, 4): ((i, j + 1, VALUE_A), (i, j, VALUE_B), (i + 1, j, VALUE_B))}
return result_map.get((i, j, actions))
for i, j, action in itertools.product(range(4), range(4), range(1, 5)):
print('%s %s %s' % (i, j, action))
T_value = T(i, j, action)
new_T_value = new_T(i, j, action)
if T_value != new_T_value:
raise AssertionError('Error! \nT: %s \nNew T: %s' % (T_value, new_T_value))
你能提供完整的错误信息以便我们知道错误被触发的确切位置吗?
U1[i][j]=max(sum(p*(R(k,l)+gamma*U[k][l])for(k,l,p)in T(i,j,a))for a in actlist)
错误是在这一行触发的。附带问题:如果你所有的函数都像一个表一样,为什么不使用字典呢?@Edwardminix,我不知道该怎么做。@ShifatEArman您只需制作一个字典,其中键是三元素元组,其值是(I,j,action)
,其值是您将从函数返回的结果(虽然它们需要硬编码。如果你不熟悉字典,我建议你参考一下。你能提供完整的错误信息吗?这样我们就知道触发错误的确切位置了吗?U1[I][j]=max(对于actlist中的a,T(I,j,a)中的(k,l,p)的和(p*(R(k,l)+gamma*U[k][l]))
这一行触发了错误。附带问题:如果你的函数只是像一个表一样工作,为什么不使用字典呢?@EdwardMinnix,我不知道怎么做。@ShifatEArman你只需要制作一个字典,其中键是三元素元组,值是(I,j,action)
,其值是您将从函数返回的结果(尽管它们需要硬编码。如果您不熟悉字典,我建议您引用愚蠢的错误。非常感谢。:)当然。另外,我正在更新U=U1.copy()
在两个for循环都完成后。但是它在我的for循环中自动更新。我不知道原因。不确定这能解决您的问题,但每次使用该功能时,您都会“重置”U和U1,并返回U
,这是一个仅为零的错误。非常感谢。)当然。还有一件事,在两个for循环完成后,我正在更新U=U1.copy()
。但是它在我的for循环中自动更新。我不知道原因。我不确定这是否解决了您的问题,但每次使用函数时,您都会“重置”U和U1
,并返回U
,仅为零