R--用未确定的NAs逐行填充矩阵
我试图用数据样本填充矩阵:R--用未确定的NAs逐行填充矩阵,r,dataframe,matrix,na,R,Dataframe,Matrix,Na,我试图用数据样本填充矩阵: Pays NbChambre Prix 1 Grèce 56 390 2 Grèce 471 468 3 Grèce 93 427 4 Grèce 56 369 5 Grèce 286 499 6 Grèce 282 526 7 Grèce 310 587 8 Grèce 30
Pays NbChambre Prix
1 Grèce 56 390
2 Grèce 471 468
3 Grèce 93 427
4 Grèce 56 369
5 Grèce 286 499
6 Grèce 282 526
7 Grèce 310 587
8 Grèce 300 534
9 Maroc 146 447
10 Maroc 250 482
11 Maroc 196 511
12 Maroc 324 532
13 Maroc 138 450
14 Maroc 400 569
15 Maroc 366 419
16 Maroc 300 421
17 Maroc 144 579
18 Maroc 330 598
19 Maroc 260 495
20 Maroc 170 730
21 Portugal 254 646
22 Portugal 140 652
23 Portugal 273 802
24 Portugal 260 761
25 Portugal 169 1101
26 Tunisie 225 434
27 Tunisie 225 489
28 Tunisie 250 436
29 Tunisie 550 399
30 Tunisie 800 477
31 Tunisie 150 375
32 Tunisie 425 486
33 Tunisie 366 447
34 Tunisie 200 473
35 Tunisie 130 495
36 Turquie 500 617
37 Turquie 50 489
38 Turquie 232 520
39 Turquie 110 534
我需要用向量NbChambre
的值填充一个矩阵,但在某种程度上
每行用于某一特定的支付
。矩阵的维数将是
nrow=length(unique(Pays)),ncol=max(table(Pays))
我需要找到一种方法来填充矩阵,比如R自动放置
NA或0或任何值,在数据不足时结束一行,然后跳过该行以继续向量NbChambre
。我对R比较陌生,所以我似乎真的找不到任何方法来做到这一点
结果应该是这样的
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,] 56 471 93 56 286 282 310 300 NA NA NA NA
[2,] 146 250 196 324 138 400 366 300 144 330 260 170
[3,] 254 140 273 260 169 NA NA NA NA NA NA NA
[4,]
[5,] and so on
我真的很想有人能帮我,谢谢 具有
dplyr
+tidyr
的解决方案:
library(dplyr)
library(tidyr)
df %>%
select(-Prix) %>%
group_by(Pays) %>%
mutate(ID = row_number()) %>%
spread(ID, NbChambre) %>%
ungroup() %>%
select(-Pays) %>%
as.matrix()
使用数据的另一种方法。表
:
library(data.table)
setDT(df)[,ID := rowid(Pays)]
as.matrix(dcast(df, Pays ~ ID, value.var = "NbChambre")[,Pays := NULL])
结果:
1 2 3 4 5 6 7 8 9 10 11 12
1 56 471 93 56 286 282 310 300 NA NA NA NA
2 146 250 196 324 138 400 366 300 144 330 260 170
3 254 140 273 260 169 NA NA NA NA NA NA NA
4 225 225 250 550 800 150 425 366 200 130 NA NA
5 500 50 232 110 NA NA NA NA NA NA NA NA
df = structure(list(Pays = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L
), .Label = c("Grèce", "Maroc", "Portugal", "Tunisie", "Turquie"
), class = "factor"), NbChambre = c(56L, 471L, 93L, 56L, 286L,
282L, 310L, 300L, 146L, 250L, 196L, 324L, 138L, 400L, 366L, 300L,
144L, 330L, 260L, 170L, 254L, 140L, 273L, 260L, 169L, 225L, 225L,
250L, 550L, 800L, 150L, 425L, 366L, 200L, 130L, 500L, 50L, 232L,
110L), Prix = c(390L, 468L, 427L, 369L, 499L, 526L, 587L, 534L,
447L, 482L, 511L, 532L, 450L, 569L, 419L, 421L, 579L, 598L, 495L,
730L, 646L, 652L, 802L, 761L, 1101L, 434L, 489L, 436L, 399L,
477L, 375L, 486L, 447L, 473L, 495L, 617L, 489L, 520L, 534L)), .Names = c("Pays",
"NbChambre", "Prix"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39"))
数据:
1 2 3 4 5 6 7 8 9 10 11 12
1 56 471 93 56 286 282 310 300 NA NA NA NA
2 146 250 196 324 138 400 366 300 144 330 260 170
3 254 140 273 260 169 NA NA NA NA NA NA NA
4 225 225 250 550 800 150 425 366 200 130 NA NA
5 500 50 232 110 NA NA NA NA NA NA NA NA
df = structure(list(Pays = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L
), .Label = c("Grèce", "Maroc", "Portugal", "Tunisie", "Turquie"
), class = "factor"), NbChambre = c(56L, 471L, 93L, 56L, 286L,
282L, 310L, 300L, 146L, 250L, 196L, 324L, 138L, 400L, 366L, 300L,
144L, 330L, 260L, 170L, 254L, 140L, 273L, 260L, 169L, 225L, 225L,
250L, 550L, 800L, 150L, 425L, 366L, 200L, 130L, 500L, 50L, 232L,
110L), Prix = c(390L, 468L, 427L, 369L, 499L, 526L, 587L, 534L,
447L, 482L, 511L, 532L, 450L, 569L, 419L, 421L, 579L, 598L, 495L,
730L, 646L, 652L, 802L, 761L, 1101L, 434L, 489L, 436L, 399L,
477L, 375L, 486L, 447L, 473L, 495L, 617L, 489L, 520L, 534L)), .Names = c("Pays",
"NbChambre", "Prix"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39"))
使用
dplyr
+tidyr
的解决方案:
library(dplyr)
library(tidyr)
df %>%
select(-Prix) %>%
group_by(Pays) %>%
mutate(ID = row_number()) %>%
spread(ID, NbChambre) %>%
ungroup() %>%
select(-Pays) %>%
as.matrix()
使用数据的另一种方法。表
:
library(data.table)
setDT(df)[,ID := rowid(Pays)]
as.matrix(dcast(df, Pays ~ ID, value.var = "NbChambre")[,Pays := NULL])
结果:
1 2 3 4 5 6 7 8 9 10 11 12
1 56 471 93 56 286 282 310 300 NA NA NA NA
2 146 250 196 324 138 400 366 300 144 330 260 170
3 254 140 273 260 169 NA NA NA NA NA NA NA
4 225 225 250 550 800 150 425 366 200 130 NA NA
5 500 50 232 110 NA NA NA NA NA NA NA NA
df = structure(list(Pays = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L
), .Label = c("Grèce", "Maroc", "Portugal", "Tunisie", "Turquie"
), class = "factor"), NbChambre = c(56L, 471L, 93L, 56L, 286L,
282L, 310L, 300L, 146L, 250L, 196L, 324L, 138L, 400L, 366L, 300L,
144L, 330L, 260L, 170L, 254L, 140L, 273L, 260L, 169L, 225L, 225L,
250L, 550L, 800L, 150L, 425L, 366L, 200L, 130L, 500L, 50L, 232L,
110L), Prix = c(390L, 468L, 427L, 369L, 499L, 526L, 587L, 534L,
447L, 482L, 511L, 532L, 450L, 569L, 419L, 421L, 579L, 598L, 495L,
730L, 646L, 652L, 802L, 761L, 1101L, 434L, 489L, 436L, 399L,
477L, 375L, 486L, 447L, 473L, 495L, 617L, 489L, 520L, 534L)), .Names = c("Pays",
"NbChambre", "Prix"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39"))
数据:
1 2 3 4 5 6 7 8 9 10 11 12
1 56 471 93 56 286 282 310 300 NA NA NA NA
2 146 250 196 324 138 400 366 300 144 330 260 170
3 254 140 273 260 169 NA NA NA NA NA NA NA
4 225 225 250 550 800 150 425 366 200 130 NA NA
5 500 50 232 110 NA NA NA NA NA NA NA NA
df = structure(list(Pays = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L
), .Label = c("Grèce", "Maroc", "Portugal", "Tunisie", "Turquie"
), class = "factor"), NbChambre = c(56L, 471L, 93L, 56L, 286L,
282L, 310L, 300L, 146L, 250L, 196L, 324L, 138L, 400L, 366L, 300L,
144L, 330L, 260L, 170L, 254L, 140L, 273L, 260L, 169L, 225L, 225L,
250L, 550L, 800L, 150L, 425L, 366L, 200L, 130L, 500L, 50L, 232L,
110L), Prix = c(390L, 468L, 427L, 369L, 499L, 526L, 587L, 534L,
447L, 482L, 511L, 532L, 450L, 569L, 419L, 421L, 579L, 598L, 495L,
730L, 646L, 652L, 802L, 761L, 1101L, 434L, 489L, 436L, 399L,
477L, 375L, 486L, 447L, 473L, 495L, 617L, 489L, 520L, 534L)), .Names = c("Pays",
"NbChambre", "Prix"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39"))
这是另一种使用
split
和sapply
的可能性:
l = split(df$NbChambre,df$Pays)
m = max(lengths(l))
res = t(sapply(l,function(x) x[1:m]))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
Grèce 56 471 93 56 286 282 310 300 NA NA NA NA
Maroc 146 250 196 324 138 400 366 300 144 330 260 170
Portugal 254 140 273 260 169 NA NA NA NA NA NA NA
Tunisie 225 225 250 550 800 150 425 366 200 130 NA NA
Turquie 500 50 232 110 NA NA NA NA NA NA NA NA
这是另一种使用
split
和sapply
的可能性:
l = split(df$NbChambre,df$Pays)
m = max(lengths(l))
res = t(sapply(l,function(x) x[1:m]))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
Grèce 56 471 93 56 286 282 310 300 NA NA NA NA
Maroc 146 250 196 324 138 400 366 300 144 330 260 170
Portugal 254 140 273 260 169 NA NA NA NA NA NA NA
Tunisie 225 225 250 550 800 150 425 366 200 130 NA NA
Turquie 500 50 232 110 NA NA NA NA NA NA NA NA
你能取悦你的数据吗?
xtabs
是为这类事情而构建的:xtabs(NbChambre~Pays+ave(Pays,Pays,FUN=seq_-along),df)
你能取悦你的数据吗?xtabs
是为这类事情而构建的:xtabs(NbChambre~Pays+ave(Pays,Pays,FUN=seq_-along),df)
如果直接使用向量,您可以切掉一个sapply
:l=split(df$NbChambre,df$Pays);m=最大(长度(l));t(sapply(l,function(x)x[seq(m)])
如果直接使用向量,可以切掉一个sapply
:l=split(df$NbChambre,df$Pays);m=最大(长度(l));t(sapply(l,函数(x)x[seq(m)])