计算两行以上的Hellinger距离_R

计算两行以上的Hellinger距离

计算两行以上的Hellinger距离,r,R,我有以下数据： dat # A tibble: 4 x 7 # Groups: Product.Name [4] Product.Name battery fast life new problem time <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 BLU Studio 5.0

我有以下数据：

dat

# A tibble: 4 x 7
# Groups:   Product.Name [4]
  Product.Name      battery   fast   life    new problem  time
  <chr>               <dbl>  <dbl>  <dbl>  <dbl>   <dbl> <dbl>
1 BLU Studio 5.0      0.325 0.131  0.139  0.0929   0.167 0.145
2 iphone 4s           0.311 0.0512 0.0504 0.278    0.146 0.163
3 Motorola Moto E     0.249 0.169  0.137  0.130    0.150 0.165
4 Samsung Galaxy II   0.226 0.112  0.0531 0.120    0.228 0.260

然而，如果行变大，这将非常耗时。结果应该是行和列中具有相同名称的矩阵

是否有可能将此功能应用于所有行组合对

xy <- structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s", 
"Motorola Moto E", "Samsung Galaxy II"), battery = c(0.324865107913669, 
0.311268715524035, 0.248677248677249, 0.226377952755905), fast = c(0.131294964028777, 
0.0512214342001576, 0.169312169312169, 0.112204724409449), life = c(0.138714028776978, 
0.0504334121355398, 0.136507936507936, 0.0531496062992126), new = c(0.0928507194244604, 
0.278171788810087, 0.13015873015873, 0.12007874015748), problem = c(0.16726618705036, 
0.145784081954295, 0.15026455026455, 0.228346456692913), time = c(0.145008992805755, 
0.163120567375887, 0.165079365079365, 0.259842519685039)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
    0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
    Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E", 
    "Samsung Galaxy II")), class = "data.frame", row.names = c(NA, 
-4L), vars = "Product.Name", drop = TRUE))

xy虽然您可以使用注释中建议的expand.grid
，但您可能不需要自比较对。还有另一个计算组合的函数，名为combn
。使用它生成配对，然后仅对组合应用自定义函数，如下所示：
library(statip)

xy <- structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s", 
"Motorola Moto E", "Samsung Galaxy II"), battery = c(0.324865107913669, 
0.311268715524035, 0.248677248677249, 0.226377952755905), fast = c(0.131294964028777, 
0.0512214342001576, 0.169312169312169, 0.112204724409449), life = c(0.138714028776978, 
0.0504334121355398, 0.136507936507936, 0.0531496062992126), new = c(0.0928507194244604, 
0.278171788810087, 0.13015873015873, 0.12007874015748), problem = c(0.16726618705036, 
0.145784081954295, 0.15026455026455, 0.228346456692913), time = c(0.145008992805755, 
0.163120567375887, 0.165079365079365, 0.259842519685039)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
    0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
    Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E", 
    "Samsung Galaxy II")), class = "data.frame", row.names = c(NA, 
-4L), vars = "Product.Name", drop = TRUE))

my.combos <- combn(1:nrow(xy), 2)
out <- apply(my.combos, MARGIN = 2, FUN = function(x, d) {
  # Subset two appripriate rows.
  row1 <- d[x[1], -1]
  row2 <- d[x[2], -1]
  # Create a resulting data.frame which holds the name of the comparison
  # and the hellinger distance.
  data.frame(pair = paste(d[x[1], 1], "-", d[x[2], 1]),
             hell = hellinger(as.numeric(row1), as.numeric(row2))
  )
}, d = xy)

do.call(rbind, out) 


                                 pair      hell
1          BLU Studio 5.0 - iphone 4s 0.3141352
2    BLU Studio 5.0 - Motorola Moto E 0.2279467
3  BLU Studio 5.0 - Samsung Galaxy II 0.3010341
4         iphone 4s - Motorola Moto E 0.3734612
5       iphone 4s - Samsung Galaxy II 0.0359991
6 Motorola Moto E - Samsung Galaxy II 0.2915914

库（statip）
是的，这是可能的。您可以使用展开.grid
。我们这里说的是几排？如果有很多，你可能会等待很长很长时间。有10行不同。如果我尝试使用你的函数，我会得到有趣的错误（newX[，I]，…）：找不到对象“xy”
@Banjo我已经添加了对象。另外，my.combos丢失，现在已存在。
library(statip)

xy <- structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s", 
"Motorola Moto E", "Samsung Galaxy II"), battery = c(0.324865107913669, 
0.311268715524035, 0.248677248677249, 0.226377952755905), fast = c(0.131294964028777, 
0.0512214342001576, 0.169312169312169, 0.112204724409449), life = c(0.138714028776978, 
0.0504334121355398, 0.136507936507936, 0.0531496062992126), new = c(0.0928507194244604, 
0.278171788810087, 0.13015873015873, 0.12007874015748), problem = c(0.16726618705036, 
0.145784081954295, 0.15026455026455, 0.228346456692913), time = c(0.145008992805755, 
0.163120567375887, 0.165079365079365, 0.259842519685039)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
    0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
    Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E", 
    "Samsung Galaxy II")), class = "data.frame", row.names = c(NA, 
-4L), vars = "Product.Name", drop = TRUE))

my.combos <- combn(1:nrow(xy), 2)
out <- apply(my.combos, MARGIN = 2, FUN = function(x, d) {
  # Subset two appripriate rows.
  row1 <- d[x[1], -1]
  row2 <- d[x[2], -1]
  # Create a resulting data.frame which holds the name of the comparison
  # and the hellinger distance.
  data.frame(pair = paste(d[x[1], 1], "-", d[x[2], 1]),
             hell = hellinger(as.numeric(row1), as.numeric(row2))
  )
}, d = xy)

do.call(rbind, out) 


                                 pair      hell
1          BLU Studio 5.0 - iphone 4s 0.3141352
2    BLU Studio 5.0 - Motorola Moto E 0.2279467
3  BLU Studio 5.0 - Samsung Galaxy II 0.3010341
4         iphone 4s - Motorola Moto E 0.3734612
5       iphone 4s - Samsung Galaxy II 0.0359991
6 Motorola Moto E - Samsung Galaxy II 0.2915914