在R中对表重新排序并重新格式化
我有一个这样的大表(这只是原始表的一个摘录,它有数千个函数(行)和许多示例(列,第一个除外)): 我需要像这样重新排序,添加两列(“total_hits”列是“hits”列中具有相同“ID”的所有数字的总和,“Percentage”是“hits”/“total_hits”的乘积): 我目前正在使用R,所以如果可能的话,我非常感谢R解决方案在R中对表重新排序并重新格式化,r,formatting,R,Formatting,我有一个这样的大表(这只是原始表的一个摘录,它有数千个函数(行)和许多示例(列,第一个除外)): 我需要像这样重新排序,添加两列(“total_hits”列是“hits”列中具有相同“ID”的所有数字的总和,“Percentage”是“hits”/“total_hits”的乘积): 我目前正在使用R,所以如果可能的话,我非常感谢R解决方案 非常感谢。这里有一种方法,我们将“宽”改为“长”(pivot\u longer),按“ID”分组,得到“点击数”和“百分比”的总和 library(dplyr
非常感谢。这里有一种方法,我们将“宽”改为“长”(
pivot\u longer
),按“ID”分组,得到“点击数”和“百分比”的总和
library(dplyr)
library(tidyr)
df1 %>%
pivot_longer(cols = -function., names_to = "ID", values_to = "hits") %>%
arrange(ID) %>%
group_by(ID) %>%
mutate(total_hits = sum(hits), percentage = hits/total_hits)
# A tibble: 9 x 5
# Groups: ID [3]
# function. ID hits total_hits percentage
# <chr> <chr> <int> <int> <dbl>
#1 phi LKJY11 0 66 0
#2 3R LKJY11 65 66 0.985
#3 GlcNAc LKJY11 1 66 0.0152
#4 phi M123Q 9 84 0.107
#5 3R M123Q 74 84 0.881
#6 GlcNAc M123Q 1 84 0.0119
#7 phi OO987 2 73 0.0274
#8 3R OO987 71 73 0.973
#9 GlcNAc OO987 0 73 0
库(dplyr)
图书馆(tidyr)
df1%>%
pivot_longer(cols=-function.,name_to=“ID”,value_to=“hits”)%>%
排列(ID)%>%
分组依据(ID)%>%
变异(总命中率=总和(命中率),百分比=命中率/总命中率)
#一个tibble:9x5
#组别:ID[3]
#功能。ID命中总数\u命中百分比
#
#1φLKJY11 0 66 0
#2 3R LKJY11 65 66 0.985
#3 GlcNAc LKJY11 1 66 0.0152
#4φM123Q 9 84 0.107
#5 3R M123Q 74 84 0.881
#6 GlcNAc M123Q 1 84 0.0119
#7 phi OO987 2 73 0.0274
#8 3R OO987 71 73 0.973
#9 GlcNAc OO987 0 73 0
数据
df1baser解决方案:
# Reshape the dataframe long-ways:
df1 <- data.frame(reshape(df1,
idvar = "function.",
ids = unique(df1$function.),
direction = "long",
varying = names(df1)[names(df1) != "function."],
v.names = "hits",
times = names(df1)[names(df1) != "function."],
timevar = "ID"), row.names = NULL)
# Groupwise summation of hits (by ID):
df1$total_hits <- with(df1, ave(hits, ID, FUN = sum))
# Calculation of percentage:
df1$percentage <- df1$hits/df1$total_hits
#长距离重塑数据帧:
df1
library(dplyr)
library(tidyr)
df1 %>%
pivot_longer(cols = -function., names_to = "ID", values_to = "hits") %>%
arrange(ID) %>%
group_by(ID) %>%
mutate(total_hits = sum(hits), percentage = hits/total_hits)
# A tibble: 9 x 5
# Groups: ID [3]
# function. ID hits total_hits percentage
# <chr> <chr> <int> <int> <dbl>
#1 phi LKJY11 0 66 0
#2 3R LKJY11 65 66 0.985
#3 GlcNAc LKJY11 1 66 0.0152
#4 phi M123Q 9 84 0.107
#5 3R M123Q 74 84 0.881
#6 GlcNAc M123Q 1 84 0.0119
#7 phi OO987 2 73 0.0274
#8 3R OO987 71 73 0.973
#9 GlcNAc OO987 0 73 0
df1 <- structure(list(`function.` = c("phi", "3R", "GlcNAc"), M123Q = c(9L,
74L, 1L), OO987 = c(2L, 71L, 0L), LKJY11 = c(0L, 65L, 1L)),
class = "data.frame", row.names = c(NA,
-3L))
# Reshape the dataframe long-ways:
df1 <- data.frame(reshape(df1,
idvar = "function.",
ids = unique(df1$function.),
direction = "long",
varying = names(df1)[names(df1) != "function."],
v.names = "hits",
times = names(df1)[names(df1) != "function."],
timevar = "ID"), row.names = NULL)
# Groupwise summation of hits (by ID):
df1$total_hits <- with(df1, ave(hits, ID, FUN = sum))
# Calculation of percentage:
df1$percentage <- df1$hits/df1$total_hits