在R中对表重新排序并重新格式化

在R中对表重新排序并重新格式化,r,formatting,R,Formatting,我有一个这样的大表(这只是原始表的一个摘录,它有数千个函数(行)和许多示例(列,第一个除外)): 我需要像这样重新排序,添加两列(“total_hits”列是“hits”列中具有相同“ID”的所有数字的总和,“Percentage”是“hits”/“total_hits”的乘积): 我目前正在使用R,所以如果可能的话,我非常感谢R解决方案 非常感谢。这里有一种方法,我们将“宽”改为“长”(pivot\u longer),按“ID”分组,得到“点击数”和“百分比”的总和 library(dplyr

我有一个这样的大表(这只是原始表的一个摘录,它有数千个函数(行)和许多示例(列,第一个除外)):

我需要像这样重新排序,添加两列(“total_hits”列是“hits”列中具有相同“ID”的所有数字的总和,“Percentage”是“hits”/“total_hits”的乘积):

我目前正在使用R,所以如果可能的话,我非常感谢R解决方案


非常感谢。

这里有一种方法,我们将“宽”改为“长”(
pivot\u longer
),按“ID”分组,得到“点击数”和“百分比”的总和

library(dplyr)
library(tidyr)
df1 %>% 
  pivot_longer(cols = -function., names_to = "ID", values_to = "hits") %>%
  arrange(ID) %>%
  group_by(ID) %>%
  mutate(total_hits = sum(hits), percentage = hits/total_hits)
# A tibble: 9 x 5
# Groups:   ID [3]
#  function. ID      hits total_hits percentage
#  <chr>     <chr>  <int>      <int>      <dbl>
#1 phi       LKJY11     0         66     0     
#2 3R        LKJY11    65         66     0.985 
#3 GlcNAc    LKJY11     1         66     0.0152
#4 phi       M123Q      9         84     0.107 
#5 3R        M123Q     74         84     0.881 
#6 GlcNAc    M123Q      1         84     0.0119
#7 phi       OO987      2         73     0.0274
#8 3R        OO987     71         73     0.973 
#9 GlcNAc    OO987      0         73     0     
库(dplyr)
图书馆(tidyr)
df1%>%
pivot_longer(cols=-function.,name_to=“ID”,value_to=“hits”)%>%
排列(ID)%>%
分组依据(ID)%>%
变异(总命中率=总和(命中率),百分比=命中率/总命中率)
#一个tibble:9x5
#组别:ID[3]
#功能。ID命中总数\u命中百分比
#                     
#1φLKJY11 0 66 0
#2 3R LKJY11 65 66 0.985
#3 GlcNAc LKJY11 1 66 0.0152
#4φM123Q 9 84 0.107
#5 3R M123Q 74 84 0.881
#6 GlcNAc M123Q 1 84 0.0119
#7 phi OO987 2 73 0.0274
#8 3R OO987 71 73 0.973
#9 GlcNAc OO987 0 73 0
数据
df1baser解决方案:

# Reshape the dataframe long-ways:  

df1 <- data.frame(reshape(df1, 

        idvar = "function.",

        ids = unique(df1$function.),

        direction = "long",

        varying = names(df1)[names(df1) != "function."],

        v.names = "hits",

        times = names(df1)[names(df1) != "function."],

        timevar = "ID"), row.names = NULL)

# Groupwise summation of hits (by ID): 

df1$total_hits <- with(df1, ave(hits, ID, FUN = sum))

# Calculation of percentage: 

df1$percentage <- df1$hits/df1$total_hits
#长距离重塑数据帧:
df1
library(dplyr)
library(tidyr)
df1 %>% 
  pivot_longer(cols = -function., names_to = "ID", values_to = "hits") %>%
  arrange(ID) %>%
  group_by(ID) %>%
  mutate(total_hits = sum(hits), percentage = hits/total_hits)
# A tibble: 9 x 5
# Groups:   ID [3]
#  function. ID      hits total_hits percentage
#  <chr>     <chr>  <int>      <int>      <dbl>
#1 phi       LKJY11     0         66     0     
#2 3R        LKJY11    65         66     0.985 
#3 GlcNAc    LKJY11     1         66     0.0152
#4 phi       M123Q      9         84     0.107 
#5 3R        M123Q     74         84     0.881 
#6 GlcNAc    M123Q      1         84     0.0119
#7 phi       OO987      2         73     0.0274
#8 3R        OO987     71         73     0.973 
#9 GlcNAc    OO987      0         73     0     
df1 <- structure(list(`function.` = c("phi", "3R", "GlcNAc"), M123Q = c(9L, 
74L, 1L), OO987 = c(2L, 71L, 0L), LKJY11 = c(0L, 65L, 1L)),
 class = "data.frame", row.names = c(NA, 
-3L))
# Reshape the dataframe long-ways:  

df1 <- data.frame(reshape(df1, 

        idvar = "function.",

        ids = unique(df1$function.),

        direction = "long",

        varying = names(df1)[names(df1) != "function."],

        v.names = "hits",

        times = names(df1)[names(df1) != "function."],

        timevar = "ID"), row.names = NULL)

# Groupwise summation of hits (by ID): 

df1$total_hits <- with(df1, ave(hits, ID, FUN = sum))

# Calculation of percentage: 

df1$percentage <- df1$hits/df1$total_hits