比较具有相同列的两个dataframe的摘要统计信息

比较具有相同列的两个dataframe的摘要统计信息,r,dataframe,compare,summary,R,Dataframe,Compare,Summary,我有两个dataframe A和B,除了主键(在实际数据中,我有50多个这样的列)之外,其他列都相同,现在我想比较这两个dataframe的“summary”统计(正常的R summary()命令),但为了比较,我想看到它彼此相邻,如所附图像中所示 数据帧DPUT结构 structure(list(Pkey = c(1, 2, 3, 4, 5), Phy_marks = c(43, 44, 45, 46, 47), Math_marks = c(34, 34, 45, 32, 21)

我有两个dataframe A和B,除了主键(在实际数据中,我有50多个这样的列)之外,其他列都相同,现在我想比较这两个dataframe的“summary”统计(正常的R summary()命令),但为了比较,我想看到它彼此相邻,如所附图像中所示

数据帧DPUT结构

structure(list(Pkey = c(1, 2, 3, 4, 5), Phy_marks = c(43, 44,  45,
    46, 47), Math_marks = c(34, 34, 45, 32, 21)), .Names = c("Pkey", 
    "Phy_marks", "Math_marks"), row.names = c(NA, -5L), class =
    "data.frame")

structure(list(Pkey = c(11, 12, 13, 14, 15), Phy_marks = c(43,  44,
    45, 46, 47), Math_marks = c(34, 34, 45, 32, 21)), .Names = c("Pkey",
    "Phy_marks", "Math_marks"), row.names = c(NA, -5L), class =
    "data.frame")

请帮忙

您可以利用我在下面创建的函数来比较这两个数据集

library(dplyr)
compare_them <- function(data1,data2) {
  sum1 <- apply(data1,2,summary) %>% data.frame() 
  sum2 <- apply(data2,2,summary) %>% data.frame() 

  names(sum1) <- paste0(names(sum1),"1")
  names(sum2) <- paste0(names(sum2),"2")

  final <- cbind(sum1,sum2)

  final1 <- t(final) 

  final2 <- final1[order(row.names(final1)), ]

  final_1 <- t(final2) %>% data.frame()
  final_1
}

compare_them(mtcars,mtcars*2) %>% View()
库(dplyr)

比较它们您可以利用我在下面创建的函数来比较这两个数据集

library(dplyr)
compare_them <- function(data1,data2) {
  sum1 <- apply(data1,2,summary) %>% data.frame() 
  sum2 <- apply(data2,2,summary) %>% data.frame() 

  names(sum1) <- paste0(names(sum1),"1")
  names(sum2) <- paste0(names(sum2),"2")

  final <- cbind(sum1,sum2)

  final1 <- t(final) 

  final2 <- final1[order(row.names(final1)), ]

  final_1 <- t(final2) %>% data.frame()
  final_1
}

compare_them(mtcars,mtcars*2) %>% View()
库(dplyr)

比较它们一个选项是使用
摘要。all
dcast
unite
separate
来计算每个数据的所需统计数据。帧排列相同

注:
OP
提供的样本数据已略微修改,以便
df_b
具有与
df_a
不同的统计数据

library(tidyverse)
library(reshape2)

df_a %>% mutate(Grp = "A") %>%
bind_rows(mutate(df_b, Grp = "B"))  %>% 
  select(-Pkey) %>% 
  group_by(Grp) %>% {
    inner_join(inner_join(inner_join(summarise_all(.,funs(min,mean,median, max)),
        summarise_all(.,funs(Q1 = quantile), probs = 0.25), by = "Grp"),
        summarise_all(.,funs(Q2 = quantile), probs = 0.50), by = "Grp"),
        summarise_all(.,funs(Q3 = quantile), probs = 0.75), by = "Grp"
  )
} %>% as.data.frame() %>%
  gather(key, val, -Grp) %>%
  separate("key", c("sub", "param"), sep = "_") %>%
  unite("sub", c("sub", "Grp"), sep = "_") %>%
  dcast(param~sub, value.var = "val") %>%
  select_at(vars(param, sort(names(select(.,-param)))))

#   param Math.marks_A Math.marks_B Phy.marks_A Phy.marks_B
#1    max         45.0        100.0          47        99.0
#2   mean         33.2         66.4          45        63.6
#3 median         34.0         80.0          45        60.0
#4    min         21.0         24.0          43        25.0
#5     Q1         32.0         40.0          44        44.0
#6     Q2         34.0         80.0          45        60.0
#7     Q3         34.0         88.0          46        90.0
数据

df_a <- structure(list(Pkey = c(1, 2, 3, 4, 5), 
Phy.marks = c(43, 44,  45, 46, 47), Math.marks = c(34, 34, 45, 32, 21)), 
.Names = c("Pkey", "Phy.marks", "Math.marks"), 
row.names = c(NA, -5L), class = "data.frame")

df_b <- structure(list(Pkey = c(11, 12, 13, 14, 15), 
Phy.marks = c(90,  44, 60, 25, 99), 
Math.marks = c(24, 40, 80, 88, 100)), 
.Names = c("Pkey", "Phy.marks", "Math.marks"), 
row.names = c(NA, -5L), class = "data.frame")

df_a一个选项是使用
汇总。所有
dcast
联合
分离
来计算每个数据的所需统计数据。帧排列相同

注:
OP
提供的样本数据已略微修改,以便
df_b
具有与
df_a
不同的统计数据

library(tidyverse)
library(reshape2)

df_a %>% mutate(Grp = "A") %>%
bind_rows(mutate(df_b, Grp = "B"))  %>% 
  select(-Pkey) %>% 
  group_by(Grp) %>% {
    inner_join(inner_join(inner_join(summarise_all(.,funs(min,mean,median, max)),
        summarise_all(.,funs(Q1 = quantile), probs = 0.25), by = "Grp"),
        summarise_all(.,funs(Q2 = quantile), probs = 0.50), by = "Grp"),
        summarise_all(.,funs(Q3 = quantile), probs = 0.75), by = "Grp"
  )
} %>% as.data.frame() %>%
  gather(key, val, -Grp) %>%
  separate("key", c("sub", "param"), sep = "_") %>%
  unite("sub", c("sub", "Grp"), sep = "_") %>%
  dcast(param~sub, value.var = "val") %>%
  select_at(vars(param, sort(names(select(.,-param)))))

#   param Math.marks_A Math.marks_B Phy.marks_A Phy.marks_B
#1    max         45.0        100.0          47        99.0
#2   mean         33.2         66.4          45        63.6
#3 median         34.0         80.0          45        60.0
#4    min         21.0         24.0          43        25.0
#5     Q1         32.0         40.0          44        44.0
#6     Q2         34.0         80.0          45        60.0
#7     Q3         34.0         88.0          46        90.0
数据

df_a <- structure(list(Pkey = c(1, 2, 3, 4, 5), 
Phy.marks = c(43, 44,  45, 46, 47), Math.marks = c(34, 34, 45, 32, 21)), 
.Names = c("Pkey", "Phy.marks", "Math.marks"), 
row.names = c(NA, -5L), class = "data.frame")

df_b <- structure(list(Pkey = c(11, 12, 13, 14, 15), 
Phy.marks = c(90,  44, 60, 25, 99), 
Math.marks = c(24, 40, 80, 88, 100)), 
.Names = c("Pkey", "Phy.marks", "Math.marks"), 
row.names = c(NA, -5L), class = "data.frame")

df_a您应该在我们的文档中提供一个最小的可复制示例,以获得更好的帮助。查看添加的可复制示例对不起,我是新来的,你能告诉我如何接受答案吗?你可以单击答案左侧的
勾选
标记接受答案。你应该在我们的中提供一个最小的可复制示例,以获得更好的帮助。请看添加的可复制示例对不起,我是新来的,你能告诉我如何接受答案吗?你可以单击答案左侧的
勾选
标记以接受答案。Genius:D谢谢!如果有任何更简单的方法,请让我知道抱歉,我是新的,你能告诉我如何接受答案吗,找不到我我这样做时得到了不想要的结果类似于A1 A2 B1 B2 C1 C2长度类字符模式字符,没有摘要显示的值请帮助你可以使用
dplyr::filter()
仅在中拾取数据1和数据2的数字列。例如,mtcars%>%过滤器(是.numeric)Genius:D谢谢!如果有任何更简单的方法,请让我知道抱歉,我是新的,你能告诉我如何接受答案吗,找不到我我这样做时得到了不想要的结果类似于A1 A2 B1 B2 C1 C2长度类字符模式字符,没有摘要显示的值请帮助你可以使用
dplyr::filter()
仅在中拾取数据1和数据2的数字列。例如,mtcars%>%过滤器(为.numeric)