在R中对表重新排序并重新格式化_R_Formatting

在R中对表重新排序并重新格式化

r formatting

在R中对表重新排序并重新格式化,r,formatting,R,Formatting,我有一个这样的大表（这只是原始表的一个摘录，它有数千个函数（行）和许多示例（列，第一个除外））：我需要像这样重新排序，添加两列（“total_hits”列是“hits”列中具有相同“ID”的所有数字的总和，“Percentage”是“hits”/“total_hits”的乘积）：我目前正在使用R，所以如果可能的话，我非常感谢R解决方案非常感谢。这里有一种方法，我们将“宽”改为“长”（pivot\u longer），按“ID”分组，得到“点击数”和“百分比”的总和 library(dplyr

我有一个这样的大表（这只是原始表的一个摘录，它有数千个函数（行）和许多示例（列，第一个除外））：

我需要像这样重新排序，添加两列（“total_hits”列是“hits”列中具有相同“ID”的所有数字的总和，“Percentage”是“hits”/“total_hits”的乘积）：

我目前正在使用R，所以如果可能的话，我非常感谢R解决方案

非常感谢。

这里有一种方法，我们将“宽”改为“长”（

pivot\u longer

），按“ID”分组，得到“点击数”和“百分比”的总和

library(dplyr)
library(tidyr)
df1 %>% 
  pivot_longer(cols = -function., names_to = "ID", values_to = "hits") %>%
  arrange(ID) %>%
  group_by(ID) %>%
  mutate(total_hits = sum(hits), percentage = hits/total_hits)
# A tibble: 9 x 5
# Groups:   ID [3]
#  function. ID      hits total_hits percentage
#  <chr>     <chr>  <int>      <int>      <dbl>
#1 phi       LKJY11     0         66     0     
#2 3R        LKJY11    65         66     0.985 
#3 GlcNAc    LKJY11     1         66     0.0152
#4 phi       M123Q      9         84     0.107 
#5 3R        M123Q     74         84     0.881 
#6 GlcNAc    M123Q      1         84     0.0119
#7 phi       OO987      2         73     0.0274
#8 3R        OO987     71         73     0.973 
#9 GlcNAc    OO987      0         73     0

库（dplyr）
图书馆（tidyr）
df1%>%
pivot_longer（cols=-function.，name_to=“ID”，value_to=“hits”）%>%
排列（ID）%>%
分组依据（ID）%>%
变异（总命中率=总和（命中率），百分比=命中率/总命中率）
#一个tibble:9x5
#组别:ID[3]
#功能。ID命中总数\u命中百分比
#                     
#1φLKJY11 0 66 0
#2 3R LKJY11 65 66 0.985
#3 GlcNAc LKJY11 1 66 0.0152
#4φM123Q 9 84 0.107
#5 3R M123Q 74 84 0.881
#6 GlcNAc M123Q 1 84 0.0119
#7 phi OO987 2 73 0.0274
#8 3R OO987 71 73 0.973
#9 GlcNAc OO987 0 73 0

数据

df1baser解决方案：
# Reshape the dataframe long-ways:  

df1 <- data.frame(reshape(df1, 

        idvar = "function.",

        ids = unique(df1$function.),

        direction = "long",

        varying = names(df1)[names(df1) != "function."],

        v.names = "hits",

        times = names(df1)[names(df1) != "function."],

        timevar = "ID"), row.names = NULL)

# Groupwise summation of hits (by ID): 

df1$total_hits <- with(df1, ave(hits, ID, FUN = sum))

# Calculation of percentage: 

df1$percentage <- df1$hits/df1$total_hits

#长距离重塑数据帧：
df1
library(dplyr)
library(tidyr)
df1 %>% 
  pivot_longer(cols = -function., names_to = "ID", values_to = "hits") %>%
  arrange(ID) %>%
  group_by(ID) %>%
  mutate(total_hits = sum(hits), percentage = hits/total_hits)
# A tibble: 9 x 5
# Groups:   ID [3]
#  function. ID      hits total_hits percentage
#  <chr>     <chr>  <int>      <int>      <dbl>
#1 phi       LKJY11     0         66     0     
#2 3R        LKJY11    65         66     0.985 
#3 GlcNAc    LKJY11     1         66     0.0152
#4 phi       M123Q      9         84     0.107 
#5 3R        M123Q     74         84     0.881 
#6 GlcNAc    M123Q      1         84     0.0119
#7 phi       OO987      2         73     0.0274
#8 3R        OO987     71         73     0.973 
#9 GlcNAc    OO987      0         73     0     

df1 <- structure(list(`function.` = c("phi", "3R", "GlcNAc"), M123Q = c(9L, 
74L, 1L), OO987 = c(2L, 71L, 0L), LKJY11 = c(0L, 65L, 1L)),
 class = "data.frame", row.names = c(NA, 
-3L))

# Reshape the dataframe long-ways:  

df1 <- data.frame(reshape(df1, 

        idvar = "function.",

        ids = unique(df1$function.),

        direction = "long",

        varying = names(df1)[names(df1) != "function."],

        v.names = "hits",

        times = names(df1)[names(df1) != "function."],

        timevar = "ID"), row.names = NULL)

# Groupwise summation of hits (by ID): 

df1$total_hits <- with(df1, ave(hits, ID, FUN = sum))

# Calculation of percentage: 

df1$percentage <- df1$hits/df1$total_hits