Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/windows/16.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Wolfram mathematica Mathematica SVD推荐系统,循环内分配问题_Wolfram Mathematica - Fatal编程技术网

Wolfram mathematica Mathematica SVD推荐系统,循环内分配问题

Wolfram mathematica Mathematica SVD推荐系统,循环内分配问题,wolfram-mathematica,Wolfram Mathematica,我正在将以下用ruby编写的内容翻译成Mathematica: require 'linalg' users = { 1 => "Ben", 2 => "Tom", 3 => "John", 4 => "Fred" } m = Linalg::DMatrix[ #Ben, Tom, John, Fred [5,5,0,5], # season 1 [5,0,3,4], # season 2

我正在将以下用ruby编写的内容翻译成Mathematica:

require 'linalg'

users = { 1 => "Ben", 2 => "Tom", 3 => "John", 4 => "Fred" }
m = Linalg::DMatrix[
           #Ben, Tom, John, Fred
            [5,5,0,5], # season 1
            [5,0,3,4], # season 2
            [3,4,0,3], # season 3
            [0,0,5,3], # season 4
            [5,4,4,5], # season 5
            [5,4,5,5]  # season 6
            ]

# Compute the SVD Decomposition
u, s, vt = m.singular_value_decomposition
vt = vt.transpose

# Take the 2-rank approximation of the Matrix
#   - Take first and second columns of u  (6x2)
#   - Take first and second columns of vt (4x2)
#   - Take the first two eigen-values (2x2)
u2 = Linalg::DMatrix.join_columns [u.column(0), u.column(1)]
v2 = Linalg::DMatrix.join_columns [vt.column(0), vt.column(1)]
eig2 = Linalg::DMatrix.columns [s.column(0).to_a.flatten[0,2], s.column(1).to_a.flatten[0,2]]

# Here comes Bob, our new user
bob = Linalg::DMatrix[[5,5,0,0,0,5]]
bobEmbed = bob * u2 * eig2.inverse

# Compute the cosine similarity between Bob and every other User in our 2-D space
user_sim, count = {}, 1
v2.rows.each { |x|
    user_sim[count] = (bobEmbed.transpose.dot(x.transpose)) / (x.norm * bobEmbed.norm)
    count += 1
  }

# Remove all users who fall below the 0.90 cosine similarity cutoff and sort by similarity
similar_users = user_sim.delete_if {|k,sim| sim < 0.9 }.sort {|a,b| b[1] <=> a[1] }
similar_users.each { |u| printf "%s (ID: %d, Similarity: %0.3f) \\n", users[u[0]], u[0], u[1]  }

# We'll use a simple strategy in this case:
#   1) Select the most similar user
#   2) Compare all items rated by this user against your own and select items that you have not yet rated
#   3) Return the ratings for items I have not yet seen, but the most similar user has rated
similarUsersItems = m.column(similar_users[0][0]-1).transpose.to_a.flatten
myItems = bob.transpose.to_a.flatten

not_seen_yet = {}
myItems.each_index { |i|
  not_seen_yet[i+1] = similarUsersItems[i] if myItems[i] == 0 and similarUsersItems[i] != 0
}

printf "\\n %s recommends: \\n", users[similar_users[0][0]]
not_seen_yet.sort {|a,b| b[1] <=> a[1] }.each { |item|
  printf "\\tSeason %d .. I gave it a rating of %d \\n", item[0], item[1]
}

print "We've seen all the same seasons, bugger!" if not_seen_yet.size == 0
需要“linalg”
用户={1=>“本”,2=>“汤姆”,3=>“约翰”,4=>“弗雷德”}
m=Linalg::DMatrix[
#本,汤姆,约翰,弗雷德
[5,5,0,5],#第1季
[5,0,3,4],#第二季
[3,4,0,3],#第三季
[0,0,5,3],#第四季
[5,4,4,5],#第五季
[5,4,5,5]#第六季
]
#计算奇异值分解
u、 s,vt=m.奇异值分解
vt=vt.转置
#取矩阵的2阶近似值
#-取u(6x2)的第一列和第二列
#-取vt的第一列和第二列(4x2)
#-取前两个特征值(2x2)
u2=Linalg::DMatrix.join_columns[u.column(0),u.column(1)]
v2=Linalg::DMatrix.join_columns[vt.column(0),vt.column(1)]
eig2=Linalg::DMatrix.columns[s.column(0).到展平[0,2],s.column(1).到展平[0,2]]
#鲍勃来了,我们的新用户
bob=Linalg::DMatrix[[5,5,0,0,5]]
bobEmbed=bob*u2*eig2.inverse
#计算Bob和我们二维空间中每个其他用户之间的余弦相似性
用户sim,计数={},1
v2.rows.each{| x|
用户sim[计数]=(bobEmbed.transpose.dot(x.transpose))/(x.norm*bobEmbed.norm)
计数+=1
}
#删除所有低于0.90余弦相似度截止值的用户,并按相似度排序
相似用户=用户sim.delete{if{k,sim{sim<0.9}。排序{a,b{b[1]a[1]}
相似的用户。每个{u | printf“%s(ID:%d,相似性:%0.3f)\\n”,用户[u[0]],u[0],u[1]}
#在这种情况下,我们将使用一种简单的策略:
#1)选择最相似的用户
#2)将此用户评定的所有项目与您自己的项目进行比较,并选择您尚未评定的项目
#3)返回我尚未看到的项目的评分,但最相似的用户已评分
similarUsersItems=m.column(类似的用户[0][0]-1)。将
myItems=bob.transpose.to_a.flatten
还没看到{}
myItems.each|u索引{i|
如果myItems[i]==0和SimilarUserSites[i]!=0,则[i+1]=SimilarUserSites[i]尚未被看到
}
printf“\\n%s建议:\\n”,用户[类似用户[0][0]]
尚未看到。排序{a,b{b[1]a[1]}。每个{项|
printf“\\t原因%d..I给它的评级为%d\\n”,项目[0],项目[1]
}
如果还没有看到,请打印“我们看到了所有相同的季节,混蛋!”大小==0
以下是相应的Mathematica代码:

Clear[s, u, v, s2, u2, v2, m, n, testdata, trainingdata, user, user2d];
find1nn[trainingdata_, user_] := {
  {u , s, v} = SingularValueDecomposition[Transpose[trainingdata]];
  (* Reducr to 2 dimensions. *)
  u2 = u[[All, {1, 2}]];
  s2 = s[[{1, 2}, {1, 2}]];
  v2 = v[[All, {1, 2}]];
  user2d = user.u2.Inverse[s2];
  {m, n} = Dimensions[v2];
  closest = -1;
  index = -1;
  For[a = 1, a < m, a++,
    {distance = 1 - CosineDistance[v2[[a, {1, 2}]], user2d];,
        If[distance > closest, {closest = distance, index = a}];}];
  closestuserratings = trainingdata[[index]];
  closestuserratings
  }
rec[closest_, userx_] := {
  d = Dimensions[closest];
  For[b = 1, b <= d[[2]], b++,
    If[userx[[b]] == 0., userx[[b]] = closest[[1, b]]]
    ]
   userx
  }
finalrec[td_, user_] := rec[find1nn[td, user], user]
(*Clear[s,u,v,s2,u2,v2,m,n,testdata,trainingdata,user,user2d]*)
testdata = {{5., 5., 3., 0., 5., 5.}, {5., 0., 4., 1., 4., 4.}, {0., 
    3., 0., 5., 4., 5.}, {5., 4., 3., 3., 5., 5.}};
bob = {5., 0., 4., 0., 4., 5.};
(*recommend[testdata,bob]*)
find1nn[testdata, bob]
finalrec[testdata, bob]
Clear[s,u,v,s2,u2,v2,m,n,testdata,trainingdata,user,user2d];
find1nn[培训数据,用户]:={
{u,s,v}=奇异值分解[转置[训练数据];
(*减速器为2个尺寸。*)
u2=u[[全部,{1,2}]];
s2=s[{1,2},{1,2}]];
v2=v[[All,{1,2}]];
user2d=user.u2.Inverse[s2];
{m,n}=维数[v2];
最近值=-1;
指数=-1;
对于[a=1,a最近,{最近=距离,索引=a}];}];
闭合锯齿=训练数据[[索引]];
闭合锯齿
}
记录[最近的,用户的]:={
d=尺寸[最近];

对于[b=1,b请查阅Mathematica文档中的变量本地化教程。问题出在rec函数中。问题是您通常无法在Mathematica中修改输入变量(如果您的函数有一个Hold属性,那么您可以这样做,以便将相关参数传递给它而不进行计算,但这里的情况并非如此):

rec[最近的,用户的]:=
块[{d,b,userx=userxi},{d=Dimensions[closest];

对于[b=1,b来说,在不试图理解您想要实现的目标的情况下,这里有一个更为数学化的、但等效(我希望)的工作代码

显式循环消失了,许多不必要的变量被消除了。所有变量现在都是局部变量,所以不需要使用Clear[]

find1nn[trainingdata_, user_] := 
  Module[{u, s, v, v2, user2d, m, distances}, 
   {u, s, v} = SingularValueDecomposition[Transpose[trainingdata]];
   v2 = v[[All, {1, 2}]];
   user2d = user.u[[All, {1, 2}]].Inverse[s[[{1, 2}, {1, 2}]]];
   m = First@Dimensions[v2];
   distances = (1 - CosineDistance[v2[[#, {1, 2}]], user2d]) & /@ Range[m - 1];
   {trainingdata[[Ordering[distances][[-1]]]]}];

rec[closest_, userxi_] := userxi[[#]] /. {0. -> closest[[1, #]]} & /@ 
                          Range[Dimensions[closest][[2]]];

finalrec[td_, user_] := rec[find1nn[td, user], user];

我相信它仍然可以进行大量优化。

这是我根据belisarius的代码和Sjoerd的改进在这方面的尝试

find1nn[trainingdata_, user_] :=
  Module[{u, s, v, user2d, distances},
    {u, s, v} = SingularValueDecomposition[trainingdata\[Transpose], 2];
    user2d = user . u . Inverse@s;
    distances = # ~CosineDistance~ user2d & /@ Most@v;
    trainingdata[[ distances ~Ordering~ 1 ]]
  ]

rec[closest_, userxi_] := If[# == 0, #2, #] & ~MapThread~ {userxi, closest[[1]]}
Clear[s,u,v,s2,u2,v2,m,n,testdata,trainingdata,user,user2d];
推荐[培训数据,用户]:={
{u,s,v}=奇异值分解[转置[训练数据];
(*缩小到2个尺寸。*)
u2=u[[全部,{1,2}]];
s2=s[{1,2},{1,2}]];
v2=v[[All,{1,2}]];
user2d=user.u2.Inverse[s2];
{m,n}=维数[v2];
最近值=-1;
指数=-1;
对于[a=1,a最近,{最近=距离,索引=a}];}];
闭合锯齿=训练数据[[索引]];
d=尺寸[锯齿];
updateUser=表[0,{i,1,d[[1]]}];

对于[b=1,b你能编辑你的问题并发布原始片段吗?如果链接断了,这个问题的整个上下文都会丢失。我自己的翻译很累,但我得到的符号不匹配。这是什么原因造成的?@先生说,这两部分的错误加起来是奇数。我很乐意承认这个数学超出了我的正常经验是的,但是奇异值分解被用作特征值的代理,Mathematica关于奇异值分解的帮助提到“v的前两列是转置[m]的特征向量。m到符号:”所以我认为翻转的符号是预期的,并且可能会被解释。是的,位置[distance,#][[1,1]]和@Max[distance]可以写成顺序[distance][-1]]哪个更简洁,速度快30%。@Sjoerd很好!我专注于清理一些多余的
Part
用法。+1用于排序[distance,-1],而不是排序[distance][-1]]。这速度更快。向导难道我们不能放弃1余弦距离的1部分并测试最小值(排序[distance,1])除了最大的?Sjoerd,我不知道,我还得考虑一下。但我还有另一个问题:
排序
只会返回一个位置,但是
位置
可能会返回多个结果。因为我不知道这是怎么回事
find1nn[trainingdata_, user_] :=
  Module[{u, s, v, user2d, distances},
    {u, s, v} = SingularValueDecomposition[trainingdata\[Transpose], 2];
    user2d = user . u . Inverse@s;
    distances = # ~CosineDistance~ user2d & /@ Most@v;
    trainingdata[[ distances ~Ordering~ 1 ]]
  ]

rec[closest_, userxi_] := If[# == 0, #2, #] & ~MapThread~ {userxi, closest[[1]]}
Clear[s, u, v, s2, u2, v2, m, n, testdata, trainingdata, user, user2d];
recommend[trainingdata_, user_] := {
  {u , s, v} = SingularValueDecomposition[Transpose[trainingdata]];
  (* Reducera till 2 dimensioner. *)
  u2 = u[[All, {1, 2}]];
  s2 = s[[{1, 2}, {1, 2}]];
  v2 = v[[All, {1, 2}]];
  user2d = user.u2.Inverse[s2];
  {m, n} = Dimensions[v2];
  closest = -1;
  index = -1;
  For[a = 1, a < m, a++,
    {distance = 1 - CosineDistance[v2[[a, {1, 2}]], user2d];,
        If[distance > closest, {closest = distance, index = a}];}];
  closestuserratings = trainingdata[[index]];
  d = Dimensions[closestuserratings];
  updateduser = Table[0, {i, 1, d[[1]]}];
  For[b = 1, b <= d[[1]], b++,
    If[user[[b]] == 0., updateduser[[b]] = closestuserratings[[b]], 
     updateduser[[b]] = user[[b]]]
    ]
   updateduser
  }
testdata = {{5., 5., 3., 0., 5., 5.}, {5., 0., 4., 1., 4., 4.}, {0., 
    3., 0., 5., 4., 5.}, {5., 4., 3., 3., 5., 5.}};
bob = {5., 0., 4., 0., 4., 5.};
recommend[testdata, bob]