在data.frame中的每个单元格上应用函数

在data.frame中的每个单元格上应用函数,r,data.table,R,Data.table,我有这样一个data.frame: G5_01 X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02 1: 0/0 0/0 2: 0/0

我有这样一个data.frame:

G5_01
   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1:                                  0/0                                  0/0
2:                                  0/0                                  1/1
3:                                  0/1                                  0/0
我想计算每个单元格中的变化,并将其转换为:

data.table似乎能够处理此问题,我的脚本如下所示:

library(data.table)

G5_02<-setDT(G5_01)[,lapply(.SD,function(x) sum(as.numeric(strsplit(x,"/")[[1]][1]),
                                                as.numeric(strsplit(x,"/")[[1]][2])))]
我该如何修复它有什么建议吗

library(data.table)
setDT(G5_01)[, X8803713069_R01C02_8803713069_R01C02 := as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 1, 1)) + as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 3, 3))][, X8803713069_R02C02_8803713069_R02C02 := as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 1, 1)) + as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 3, 3))]

G5_01
         X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
    1:                                    0                                    0
    2:                                    0                                    2
    3:                                    1                                    0
数据

G5_01 <- read.table(text = 'X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
                                  0/0                                  0/0
                                  0/0                                  1/1
                                 0/1                                  0/0', header = T)

G5\u 01也许您可以尝试下面的代码,其中使用了
nchar()
gsub()

下面是两个基本R的解决方案:

  • 解决方案1:(可能比
    sapply()
    apply()
    lappy()
    方法更快)
数据

G5_01 <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))

G5_01在
base R
中,我们可以在使用
read.table进行拆分后使用
rowSums

df[] <- lapply(df, function(x) rowSums(read.table(text = x,
        sep="/", header = FALSE)))
df
#  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1                                    0                                    0
#2                                    0                                    2
#3                                    1                                    0

df[]如果您只需要处理1和0,那么一个可能的解决方案是计算1,即

library(data.table)
setDT(df)[, lapply(.SD, function(i)stringr::str_count(i, '1'))][]

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1:                                    0                                    0
#2:                                    0                                    2
#3:                                    1                                    0

使用
data.table::tstrsplit
获得转置字符串拆分(例如
purrr::transpose(strsplit(x,“/”)
),然后我们可以转换为数字并将它们相加

library(dplyr)

df %>% 
  mutate_all(~ 
    data.table::tstrsplit(., '/') %>% 
      map(as.numeric) %>% 
      do.call(what = '+'))

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
# 1                                    0                                    0
# 2                                    0                                    2
# 3                                    1                                    0

我对
数据了解不多。table
但是你能把
lapply
改成
apply
并设置
margin=1
来迭代每一行吗?
mt2$meanvar@Mike谢谢,但是我需要把这个函数应用到每个单元格,而不是行或列。谢谢!我没想到在这种情况下计数1会更方便。
> G5_01
  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1                                    0                                    0
2                                    0                                    2
3                                    1                                    0
G5_01 <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))
df[] <- lapply(df, function(x) rowSums(read.table(text = x,
        sep="/", header = FALSE)))
df
#  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1                                    0                                    0
#2                                    0                                    2
#3                                    1                                    0
df <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))
library(data.table)
setDT(df)[, lapply(.SD, function(i)stringr::str_count(i, '1'))][]

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1:                                    0                                    0
#2:                                    0                                    2
#3:                                    1                                    0
library(dplyr)

df %>% 
  mutate_all(~ 
    data.table::tstrsplit(., '/') %>% 
      map(as.numeric) %>% 
      do.call(what = '+'))

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
# 1                                    0                                    0
# 2                                    0                                    2
# 3                                    1                                    0