R 非线性模型以漏掉一个ID的交叉验证模式预测NA值

R 非线性模型以漏掉一个ID的交叉验证模式预测NA值,r,prediction,cross-validation,nonlinear-functions,R,Prediction,Cross Validation,Nonlinear Functions,我有一个数据帧df df<-structure(list(ID = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 19L, 19L, 19L, 19L, 19L, 19L, 24L, 24L, 24L, 25L, 25L, 26L, 26L, 36L, 36L, 36L, 36L, 36L, 36L, 41L, 41L, 41L, 41L,

我有一个数据帧
df

df<-structure(list(ID = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 19L, 19L, 
19L, 19L, 19L, 19L, 24L, 24L, 24L, 25L, 25L, 26L, 26L, 36L, 36L, 
36L, 36L, 36L, 36L, 41L, 41L, 41L, 41L, 41L, 41L, 42L, 45L, 45L, 
45L, 45L, 45L, 47L, 55L, 64L, 64L, 65L, 71L, 71L, 71L, 71L, 71L, 
43L, 43L, 43L, 43L, 43L, 43L, 43L, 60L, 60L, 60L, 60L, 60L, 60L, 
60L, 60L, 60L), .Label = c("AU-Tum", "BE-Bra", "BR-Sa3", "CA-Ca1", 
"CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1", "CA-NS2", "CA-NS3", 
"CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas", "CA-Obs", "CA-Ojp", 
"CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3", "CA-SJ1", "CA-SJ2", 
"CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CZ-Bk1", "DE-Har", "DE-Wet", 
"DK-Sor", "FI-Hyy", "FR-Hes", "FR-Pue", "ID-Pag", "IT-Ro1", "IT-Ro2", 
"IT-Sro", "JP-Tak", "JP-Tef", "NL-Loo", "SE-Abi", "SE-Fla", "SE-Nor", 
"SE-Sk1", "SE-Sk2", "SE-St1", "UK-Gri", "US-Blo", "US-Bn1", "US-Bn2", 
"Us-Bn3", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2", "US-Ho1", 
"US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1", "US-Sp1", 
"US-Sp2", "US-Sp3", "US-Umb", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", 
"US-Wi4", "US-Wi8"), class = "factor"), y = c(380.654850683175, 
467.840079978108, 358.497598990798, 431.528439198621, 442.010991849005, 
351.189937948249, 343.098349086009, 357.122478995472, -471.194518864155, 
-514.390561270528, -518.123427070677, -554.919201846235, -614.669225180172, 
-391.545088194311, -124.396037524566, 32.3765077856369, 43.199114789255, 
-88.3050879996736, 58.9395819107303, 162.441016515717, 116.965395963751, 
-108.997818851843, -144.127755056645, -126.626824281528, -175.750439967494, 
-151.262252734334, -135.830685457215, 28.9567997518461, -68.2299433113076, 
-37.1677788909292, -167.045088054205, -108.258462657337, 100.907804159913, 
90.3369144331664, 233.031065647025, 287.956774678081, 189.082761215046, 
390.740067397826, 89.1989531565923, 155.527563805692, 224.442115622107, 
315.516411969438, 283.912847682368, 390.026366345584, 322.790248586796, 
312.50101460889, -638.973101716489, 132.601979068451, -42.7843619789928, 
82.2957233709167, 8.36848279205151, 115.376620422816, -186.42650026083, 
577.658561848104, 188.342473105964, 95.6089326666552, 936.236027855426, 
70.9266221858561, -5.91938436031342, 338.1149700284, 185.940875658067, 
198.121665383659, 254.551377562806, 45.4501812993549, 187.152575587854, 
152.183998291846, 226.360116416588, 225.67982583819, -0.0398367510642856, 
217.845216980437, 241.779151081573, 214.481376983225, 219.953942558961, 
315.959296110785, 263.547381375218, 194.449290025979, 305.158690313809, 
326.318877183832), x = c(49, 50, 51, 52, 53, 54, 55, 56, 0, 1, 
2, 3, 4, 5, 13, 14, 15, 16, 17, 71, 72, 1, 2, 3, 4, 5, 6, 9, 
10, 11, 1, 3, 29, 30, 54, 55, 56, 57, 58, 59, 64, 65, 66, 67, 
68, 69, 1, 34, 35, 37, 38, 39, 2, 19, 17, 18, 16, 67, 69, 70, 
72, 73, 101, 105, 106, 107, 108, 109, 110, 131, 132, 133, 134, 
135, 136, 137, 138, 139)), .Names = c("ID", "y", "x"), row.names = c(712L, 
713L, 714L, 715L, 716L, 717L, 718L, 719L, 720L, 721L, 722L, 723L, 
724L, 725L, 726L, 727L, 728L, 729L, 730L, 731L, 732L, 784L, 785L, 
786L, 787L, 788L, 789L, 793L, 794L, 795L, 796L, 797L, 798L, 799L, 
841L, 842L, 843L, 844L, 845L, 846L, 866L, 867L, 868L, 869L, 870L, 
871L, 872L, 880L, 881L, 882L, 883L, 884L, 889L, 892L, 916L, 917L, 
918L, 936L, 937L, 938L, 939L, 940L, 873L, 874L, 875L, 876L, 877L, 
878L, 879L, 905L, 906L, 907L, 908L, 909L, 910L, 911L, 912L, 913L
), class = "data.frame")
stat(df)
然后在我的数据帧
df

df<-structure(list(ID = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 19L, 19L, 
19L, 19L, 19L, 19L, 24L, 24L, 24L, 25L, 25L, 26L, 26L, 36L, 36L, 
36L, 36L, 36L, 36L, 41L, 41L, 41L, 41L, 41L, 41L, 42L, 45L, 45L, 
45L, 45L, 45L, 47L, 55L, 64L, 64L, 65L, 71L, 71L, 71L, 71L, 71L, 
43L, 43L, 43L, 43L, 43L, 43L, 43L, 60L, 60L, 60L, 60L, 60L, 60L, 
60L, 60L, 60L), .Label = c("AU-Tum", "BE-Bra", "BR-Sa3", "CA-Ca1", 
"CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1", "CA-NS2", "CA-NS3", 
"CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas", "CA-Obs", "CA-Ojp", 
"CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3", "CA-SJ1", "CA-SJ2", 
"CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CZ-Bk1", "DE-Har", "DE-Wet", 
"DK-Sor", "FI-Hyy", "FR-Hes", "FR-Pue", "ID-Pag", "IT-Ro1", "IT-Ro2", 
"IT-Sro", "JP-Tak", "JP-Tef", "NL-Loo", "SE-Abi", "SE-Fla", "SE-Nor", 
"SE-Sk1", "SE-Sk2", "SE-St1", "UK-Gri", "US-Blo", "US-Bn1", "US-Bn2", 
"Us-Bn3", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2", "US-Ho1", 
"US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1", "US-Sp1", 
"US-Sp2", "US-Sp3", "US-Umb", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", 
"US-Wi4", "US-Wi8"), class = "factor"), y = c(380.654850683175, 
467.840079978108, 358.497598990798, 431.528439198621, 442.010991849005, 
351.189937948249, 343.098349086009, 357.122478995472, -471.194518864155, 
-514.390561270528, -518.123427070677, -554.919201846235, -614.669225180172, 
-391.545088194311, -124.396037524566, 32.3765077856369, 43.199114789255, 
-88.3050879996736, 58.9395819107303, 162.441016515717, 116.965395963751, 
-108.997818851843, -144.127755056645, -126.626824281528, -175.750439967494, 
-151.262252734334, -135.830685457215, 28.9567997518461, -68.2299433113076, 
-37.1677788909292, -167.045088054205, -108.258462657337, 100.907804159913, 
90.3369144331664, 233.031065647025, 287.956774678081, 189.082761215046, 
390.740067397826, 89.1989531565923, 155.527563805692, 224.442115622107, 
315.516411969438, 283.912847682368, 390.026366345584, 322.790248586796, 
312.50101460889, -638.973101716489, 132.601979068451, -42.7843619789928, 
82.2957233709167, 8.36848279205151, 115.376620422816, -186.42650026083, 
577.658561848104, 188.342473105964, 95.6089326666552, 936.236027855426, 
70.9266221858561, -5.91938436031342, 338.1149700284, 185.940875658067, 
198.121665383659, 254.551377562806, 45.4501812993549, 187.152575587854, 
152.183998291846, 226.360116416588, 225.67982583819, -0.0398367510642856, 
217.845216980437, 241.779151081573, 214.481376983225, 219.953942558961, 
315.959296110785, 263.547381375218, 194.449290025979, 305.158690313809, 
326.318877183832), x = c(49, 50, 51, 52, 53, 54, 55, 56, 0, 1, 
2, 3, 4, 5, 13, 14, 15, 16, 17, 71, 72, 1, 2, 3, 4, 5, 6, 9, 
10, 11, 1, 3, 29, 30, 54, 55, 56, 57, 58, 59, 64, 65, 66, 67, 
68, 69, 1, 34, 35, 37, 38, 39, 2, 19, 17, 18, 16, 67, 69, 70, 
72, 73, 101, 105, 106, 107, 108, 109, 110, 131, 132, 133, 134, 
135, 136, 137, 138, 139)), .Names = c("ID", "y", "x"), row.names = c(712L, 
713L, 714L, 715L, 716L, 717L, 718L, 719L, 720L, 721L, 722L, 723L, 
724L, 725L, 726L, 727L, 728L, 729L, 730L, 731L, 732L, 784L, 785L, 
786L, 787L, 788L, 789L, 793L, 794L, 795L, 796L, 797L, 798L, 799L, 
841L, 842L, 843L, 844L, 845L, 846L, 866L, 867L, 868L, 869L, 870L, 
871L, 872L, 880L, 881L, 882L, 883L, 884L, 889L, 892L, 916L, 917L, 
918L, 936L, 937L, 938L, 939L, 940L, 873L, 874L, 875L, 876L, 877L, 
878L, 879L, 905L, 906L, 907L, 908L, 909L, 910L, 911L, 912L, 913L
), class = "data.frame")
stat(df)
但是,该模型无法预测我的数据帧
df
的所有y值。它给出
NA

[1]        NA        NA        NA        NA        NA        NA 208.10405        NA        NA        NA        NA        NA        NA  38.21535        NA
[16]  90.76849  95.81436 103.35304 105.50550        NA        NA        NA  15.39989        NA        NA        NA  44.12078        NA        NA        NA
[31]        NA        NA 153.22284 156.78703        NA        NA        NA        NA        NA        NA        NA        NA        NA 222.45487        NA
[46]        NA        NA        NA 175.06730        NA 181.72692 181.68818        NA        NA        NA        NA        NA        NA        NA        NA
[61]        NA        NA        NA        NA        NA 252.93564        NA        NA 259.31112        NA        NA        NA        NA 252.88759        NA
[76] 258.52018 253.69781 252.87979

有人对此有解释吗

您使用了错误的模型。让我们绘制您的数据:

plot(y ~ x, data = df)
显然,不能使用穿过原点的模型。可以使用带有偏移量的渐近模型

您还应该使用自启动模型

fit <- nls(y ~ SSasympOff(x, A, lrc, c0), data = df)
lines(predict(fit, newdata = data.frame(x = 0:140)))

fit查看
i=1的情况;nls(y~A*(1-exp(k*x)),data=df[-i,],start=list(A=1000,k=-0.224))
,就是这样发生的<代码>nls中的错误(y~A*(1-exp(k*x)),数据=df[-i,],开始=list(A=1000,:迭代次数超过了最大值50次。
。我的参数不够好吗?似乎是这样。这就是为什么函数的输出是
NA
。好的。有什么方法可以让我的参数正确吗?我想这是另一个更广泛的问题。很好。这种自启动只适用于渐近回归模型?什么如果是我的模型的方程式,那么y=f(x)?谢谢阅读帮助(“自启动”)。有许多预定义的自启动模型(链接到帮助页面),但如果你有办法估计起始值,你可以自己编写。
SSASymoff
的公式在文档中给出。@Roland好的。我发现了同样的问题,但这次使用了伽马函数。我提出了一个新问题:有没有办法优化这类函数的参数?