R 我对不同的答案进行了微基准比较。如果我在比较中完全破坏了你的功能,请原谅。如果是这样,请随时改进我所做的!再次感谢您的贡献。我已经对不同的答案进行了微基准比较。如果我把你的答案写成函数,请原谅我。如果是这样,请随时改进我所做的!再次感谢你的贡献。 dta

R 我对不同的答案进行了微基准比较。如果我在比较中完全破坏了你的功能,请原谅。如果是这样,请随时改进我所做的!再次感谢您的贡献。我已经对不同的答案进行了微基准比较。如果我把你的答案写成函数,请原谅我。如果是这样,请随时改进我所做的!再次感谢你的贡献。 dta ,r,dplyr,code-cleanup,rowsum,R,Dplyr,Code Cleanup,Rowsum,我对不同的答案进行了微基准比较。如果我在比较中完全破坏了你的功能,请原谅。如果是这样,请随时改进我所做的!再次感谢您的贡献。我已经对不同的答案进行了微基准比较。如果我把你的答案写成函数,请原谅我。如果是这样,请随时改进我所做的!再次感谢你的贡献。 dta <- data.frame(foo=c(1,NA,3,4), fooZ=c(4,NA,5,NA), fooQ2=c(7,0,9,NA)) dta$sum1 <- rowSums(dta[, c('fooZ', 'fooQ2') ]

我对不同的答案进行了微基准比较。如果我在比较中完全破坏了你的功能,请原谅。如果是这样,请随时改进我所做的!再次感谢您的贡献。我已经对不同的答案进行了微基准比较。如果我把你的答案写成函数,请原谅我。如果是这样,请随时改进我所做的!再次感谢你的贡献。
dta <- data.frame(foo=c(1,NA,3,4), fooZ=c(4,NA,5,NA), fooQ2=c(7,0,9,NA))
dta$sum1 <- rowSums(dta[, c('fooZ', 'fooQ2') ], na.rm=TRUE) * ifelse(
      rowSums(is.na(dta[, c('fooZ', 'fooQ2') ])) == 
               ncol(dta[, c('fooZ', 'fooQ2') ]), NA, 1)
dta
# >   foo fooZ fooQ2 sum1
# > 1   1    4     7   11
# > 2  NA   NA     0    0
# > 3   3    5     9   14
# > 4   4   NA    NA   NA
# install.packages(c("dplyr", "ggplot2"), dependencies = TRUE)
require(dplyr)
dta$sum2 = dta %>% select(fooZ, fooQ2) %>% rowSums(., na.rm = TRUE)
dta
# >   foo fooZ fooQ2 sum1 sum2
# > 1   1    4     7   11   11
# > 2  NA   NA     0    0    0
# > 3   3    5     9   14   14
# > 4   4   NA    NA   NA    0
set.seed(667)
n <- 1e5+22
dta <- data.frame(
  foo = sample(c(1:10, NA), n, replace = TRUE),
  fooZ = sample(c(1:10, NA), n, replace = TRUE),
  fooQ2 = sample(c(1:10, NA), n, replace = TRUE))

slice <- c(902:907,979:984)
dta[slice,]
#>     foo fooZ fooQ2
#> 902  10    7     2
#> 903  10   10     9
#> 904  NA   NA     8
#> 905   6    4     3
#> 906   8    9    10
#> 907   1    5    NA
#> 979  NA    1     1
#> 980  10    2    NA
#> 981   7   NA    NA
#> 982   3    7     7
#> 983  NA    9     6
#> 984   7   10     7


# `baseline' solution
baseline <- function(z, ...) {W  <- z[, c(...)]; W <- rowSums(W, na.rm=TRUE) * ifelse(rowSums(is.na(W)) == ncol(W), NA, 1); W}

# install.packages(c("dplyr", "ggplot2"), dependencies = TRUE)
require(dplyr)
# G. G.Gro's dplyr solution
G.Gro_dplyr1 <- function(z, ...) z %>% mutate(sum2 = select(., ...) %>% { rowSums(., na.rm = TRUE) + ifelse(apply(is.na(.), 1, all), NA, 0) })

# G. G.Gro's Variation 1a solution
G.Gro_dplyr1a <- function(z, ...) z %>% mutate(sum2 = select(., fooZ, fooQ2) %>% apply(1, . %>% { sum(., na.rm = TRUE) + if (all(is.na(.))) NA else 0}))

# G. G.Gro's base solution
G.Gro_base <- function(z, ...) {W  <- z[, c(...)]; S = {X <- dta[, c("fooZ", "fooQ2")]; rowSums(X, na.rm = TRUE) + ifelse(apply(is.na(X), 1, all), NA, 0)}; S}

# Thierry's solution
Thierry_my_sum <- function(z, ...){z <- select(z, ...); sums <- rowSums(z, na.rm = TRUE); sums[apply(is.na(z), 1, all)] <- NA; sums}

# lmo's solution
lmo <- function(z, ...) {W  <- z[, c(...)]; rowSums(W, na.rm=TRUE) * (NA^(rowSums(is.na(W)) == ncol(W)))}

# Benjamin's solution
Benjamin <- function(..., na.rm = FALSE, all.na = NA){v <- list(...); all_na <- lapply(v, is.na); all_na <- Reduce(`&`, all_na); all_na; if (na.rm){v <- lapply(v, function(x) {x[is.na(x)] <- 0; x}); }; v <- Reduce(`+`, v); v[all_na] <- all.na; v;}

# Aramis7d's solution
Aramis7d <- function(z, ...) {z %>% select(...) %>% mutate(sum = rowSums(., na.rm=TRUE)) %>% mutate(s2 = rowSums(is.na(.))) %>% mutate(sum = if_else(s2 < 2, sum, as.double(NA))) %>%  select(sum) }

# Fail's solution combining from all
Fail <- function(z, ...){z <- select(z, ...); zTF <- rowMeans(is.na(z)) == 1; replace(rowSums(z, na.rm = TRUE), zTF, NA)}

# install.packages("microbenchmark", dependencies = TRUE)
require(microbenchmark)

# run test
res <- microbenchmark(
baseline(dta, c("fooZ", "fooQ2")),
Thierry_my_sum(dta, fooZ, fooQ2),
G.Gro_dplyr1(dta, fooZ, fooQ2)[,ncol(dta)+1],
G.Gro_dplyr1a(dta, fooZ, fooQ2)[, ncol(dta) + 1],
G.Gro_base(dta, c("fooZ", "fooQ2")),
(dta %>% mutate(sum99 = Benjamin(fooZ, fooQ2, na.rm = TRUE)))[,ncol(dta)+1],
lmo(dta, c("fooZ", "fooQ2")),
Aramis7d(dta, fooZ, fooQ2)[,1],
Fail(dta, fooZ, fooQ2),
 times = 25)

# clean up
levels(res[[1]]) <- c('baseline', 'Thierry', 'G.Gro1', 'G.Gro1a', 'G.Gro2', 'Benjamin', 'lmo', 'Aramis7d', 'Fail')

## Print results:
print(res)

 print(res)
#> Unit: milliseconds
#>      expr        min         lq        mean     median          uq        max neval cld
#>  baseline  12.729803  15.691060   31.141114  23.299101   48.694436   72.83702    25   a  
#>   Thierry 215.541035 241.795764  298.319826 263.822553  363.066476  494.90875    25   b 
#>    G.Gro1 226.761181 242.617099  295.413437 264.911513  307.339115  591.28424    25   b 
#>   G.Gro1a 935.176542 985.329298 1088.300741 997.788858 1030.085839 1736.51506    25   c
#>    G.Gro2 219.650080 227.464694  292.898566 246.188189  320.789036  505.08154    25   b 
#>  Benjamin   6.227054   9.327364   15.583907  11.230079   14.345366   55.44653    25   a  
#>       lmo   4.138434   5.970850    9.329506   6.851132    8.406799   39.40295    25   a  
#>  Aramis7d  33.966101  38.737671   60.777304  66.663967   72.686939  100.72799    25   a  
#>      Fail  11.464254  13.932386   20.476011  14.865245   25.156740   58.37730    25   a  

### Plot results:
boxplot(res)
rowSums(dta[-1], na.rm=TRUE) * (NA^(rowSums(is.na(dta[-1])) == ncol(dta[-1])))
[1] 11  8 14 NA
rowSumsNA <- function(dat, ...) {
    W <- data.matrix(dat[...])
    rowSums(W, na.rm=TRUE) * (NA^(rowSums(is.na(W)) == ncol(W)))
}
dta %>%
    mutate(sum2 = select(., fooZ, fooQ2) %>%
                  { rowSums(., na.rm = TRUE) + ifelse(apply(is.na(.), 1, all), NA, 0) })
  foo fooZ fooQ2 sum2
1   1    4     7   11
2  NA   NA     8    8
3   3    5     9   14
4   4   NA    NA   NA
dta %>%
    mutate(sum2 = select(., fooZ, fooQ2) %>%
        apply(1, . %>% { sum(., na.rm = TRUE) + if (all(is.na(.))) NA else 0}))
transform(dta, sum2 = { 
      X <- data.frame(fooZ, fooQ2)
      rowSums(X, na.rm = TRUE) + ifelse(apply(is.na(X), 1, all), NA, 0)
})
library(data.table)
DT <- as.data.table(dta)
DT[, sum2 := rowSums(.SD, na.rm = TRUE) + ifelse(apply(is.na(.SD), 1, all), NA, 0) , .SDcols = c("fooZ", "fooQ2")]
dta %>%
  select(-foo) %>%
  mutate(sum1 = rowSums(., na.rm=TRUE)) %>%
  mutate(s2 = rowSums(is.na(.))) %>%  
  mutate(sum1 = if_else(s2 < 2, sum1, as.double(NA))) %>%
  bind_cols(dta) %>%
  select(foo, fooZ, fooQ2, sum1)
  foo fooZ fooQ2 sum1
1   1    4     7   11
2  NA   NA     8    8
3   3    5     9   14
4   4   NA    NA   NA
dta %>% 
  mutate(all_na = Reduce(`&`, lapply(list(fooZ, fooQ2), is.na)),
         sum1 = Reduce(`+`, lapply(list(fooZ, fooQ2), function(x) {x[is.na(x)] <- 0; x})),
         sum1 = ifelse(all_na, NA, sum1)) %>% 
  select(-all_na)
rsum <- function(..., na.rm = FALSE, all.na = NA){

  v <- list(...)

  all_na <- lapply(v, is.na)
  all_na <- Reduce(`&`, all_na)
  all_na

  if (na.rm){
    v <- lapply(v, function(x) {x[is.na(x)] <- 0; x})
  }

  v <- Reduce(`+`, v)

  v[all_na] <- all.na
  v
}

dta %>% 
  mutate(sum1 = rsum(fooZ, fooQ2, na.rm = TRUE))
library(dplyr)
dta <- data.frame(foo=c(1,NA,3,4), fooZ=c(4,NA,5,NA), fooQ2=c(7,0,9,NA))
my_sum <- function(z, ...){
  z <- select(z, ...)
  sums <- rowSums(z, na.rm = TRUE)
  sums[apply(is.na(z), 1, all)] <- NA
  sums
}

dta %>%
  mutate(
    sum1 = my_sum(., fooZ, fooQ2),
    sum2 = my_sum(., foo, fooQ2),
    sum3 = my_sum(., foo, fooZ)
  )