在data.frame中的每个单元格上应用函数_R_Data.table

在data.frame中的每个单元格上应用函数

在data.frame中的每个单元格上应用函数,r,data.table,R,Data.table,我有这样一个data.frame： G5_01 X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02 1: 0/0 0/0 2: 0/0

我有这样一个data.frame：

G5_01
   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1:                                  0/0                                  0/0
2:                                  0/0                                  1/1
3:                                  0/1                                  0/0

我想计算每个单元格中的变化，并将其转换为：

data.table似乎能够处理此问题，我的脚本如下所示：

library(data.table)

G5_02<-setDT(G5_01)[,lapply(.SD,function(x) sum(as.numeric(strsplit(x,"/")[[1]][1]),
                                                as.numeric(strsplit(x,"/")[[1]][2])))]

我该如何修复它有什么建议吗

library(data.table)
setDT(G5_01)[, X8803713069_R01C02_8803713069_R01C02 := as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 1, 1)) + as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 3, 3))][, X8803713069_R02C02_8803713069_R02C02 := as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 1, 1)) + as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 3, 3))]

G5_01
         X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
    1:                                    0                                    0
    2:                                    0                                    2
    3:                                    1                                    0

数据

G5_01 <- read.table(text = 'X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
                                  0/0                                  0/0
                                  0/0                                  1/1
                                 0/1                                  0/0', header = T)

G5\u 01也许您可以尝试下面的代码，其中使用了nchar（）
和gsub（）

下面是两个基本R的解决方案：

解决方案1：（可能比sapply（）
、apply（）
或lappy（）
方法更快）

数据
G5_01 <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))

G5_01在base R
中，我们可以在使用read.table进行拆分后使用rowSums

df[] <- lapply(df, function(x) rowSums(read.table(text = x,
        sep="/", header = FALSE)))
df
#  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1                                    0                                    0
#2                                    0                                    2
#3                                    1                                    0

df[]如果您只需要处理1和0，那么一个可能的解决方案是计算1，即
library(data.table)
setDT(df)[, lapply(.SD, function(i)stringr::str_count(i, '1'))][]

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1:                                    0                                    0
#2:                                    0                                    2
#3:                                    1                                    0

使用data.table:：tstrsplit
获得转置字符串拆分（例如purrr:：transpose（strsplit（x，“/”）
），然后我们可以转换为数字并将它们相加
library(dplyr)

df %>% 
  mutate_all(~ 
    data.table::tstrsplit(., '/') %>% 
      map(as.numeric) %>% 
      do.call(what = '+'))

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
# 1                                    0                                    0
# 2                                    0                                    2
# 3                                    1                                    0

我对数据了解不多。table
但是你能把lapply
改成apply
并设置margin=1
来迭代每一行吗？mt2$meanvar@Mike谢谢，但是我需要把这个函数应用到每个单元格，而不是行或列。谢谢！我没想到在这种情况下计数1会更方便。
> G5_01
  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1                                    0                                    0
2                                    0                                    2
3                                    1                                    0

G5_01 <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))

df[] <- lapply(df, function(x) rowSums(read.table(text = x,
        sep="/", header = FALSE)))
df
#  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1                                    0                                    0
#2                                    0                                    2
#3                                    1                                    0

df <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))

library(data.table)
setDT(df)[, lapply(.SD, function(i)stringr::str_count(i, '1'))][]

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1:                                    0                                    0
#2:                                    0                                    2
#3:                                    1                                    0

library(dplyr)

df %>% 
  mutate_all(~ 
    data.table::tstrsplit(., '/') %>% 
      map(as.numeric) %>% 
      do.call(what = '+'))

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
# 1                                    0                                    0
# 2                                    0                                    2
# 3                                    1                                    0