R rowSums()正在生成一个奇怪的输出。我做错了什么?

R rowSums()正在生成一个奇怪的输出。我做错了什么?,r,R,我有一个名为tmp的data.frame。总结如下: > summary(tmp) Organization Advance Monthly Sales Other Homeownership Rate Length:2460 Min. : 0 Min. : 0 Min. : 0 Class :character 1st Qu.: 0 1st Qu.:

我有一个名为tmp的data.frame。总结如下:

> summary(tmp)
 Organization       Advance Monthly Sales     Other       Homeownership Rate
 Length:2460        Min.   :  0           Min.   :    0   Min.   :   0      
 Class :character   1st Qu.:  0           1st Qu.:    0   1st Qu.:   0      
 Mode  :character   Median :  0           Median :    2   Median :   0      
                    Mean   :  1           Mean   :   53   Mean   :   3      
                    3rd Qu.:  0           3rd Qu.:   14   3rd Qu.:   0      
                    Max.   :637           Max.   :34622   Max.   :3272      
 New Residential Construction New Residential Sales Construction Spending
 Min.   :   0                 Min.   :   0          Min.   :    0        
 1st Qu.:   0                 1st Qu.:   0          1st Qu.:    0        
 Median :   0                 Median :   0          Median :    0        
 Mean   :  10                 Mean   :   1          Mean   :   83        
 3rd Qu.:   0                 3rd Qu.:   0          3rd Qu.:    0        
 Max.   :9078                 Max.   :1856          Max.   :60630        
 U.S. International Manufacturing and Trade Advance Report on Durable Goods
 Min.   :    0      Min.   :  0             Min.   :   0                   
 1st Qu.:    0      1st Qu.:  0             1st Qu.:   0                   
 Median :    0      Median :  0             Median :   0                   
 Mean   :   18      Mean   :  0             Mean   :   2                   
 3rd Qu.:    3      3rd Qu.:  0             3rd Qu.:   0                   
 Max.   :11992      Max.   :874             Max.   :4785                   
 Quarterly Financial Report Advance U.S. Intl Trades Monthly Wholesale Trade
 Min.   :  0                Min.   :  0              Min.   :  0            
 1st Qu.:  0                1st Qu.:  0              1st Qu.:  0            
 Median :  0                Median :  0              Median :  0            
 Mean   :  0                Mean   :  0              Mean   :  0            
 3rd Qu.:  0                3rd Qu.:  0              3rd Qu.:  0            
 Max.   :478                Max.   :849              Max.   :697            
 Quarterly Services Survey Business Formation Statistics     Total  
 Min.   :  0               Min.   :  0                   Min.   :0  
 1st Qu.:  0               1st Qu.:  0                   1st Qu.:0  
 Median :  0               Median :  0                   Median :0  
 Mean   :  0               Mean   :  0                   Mean   :0  
 3rd Qu.:  0               3rd Qu.:  0                   3rd Qu.:0  
 Max.   :423               Max.   :233                   Max.   :0
我正在使用此命令创建列“N”

我知道这看起来不太好,但是你可以看到行的总和应该是1,但是我得到的是一个很小的分数。我做错什么了吗

***编辑***

> dput(head(tmp, 1))
structure(list(Organization = "VeriSign Infrastructure & Operations", 
    `Advance Monthly Sales` = structure(4.94065645841247e-324, class = "integer64"), 
    `New Residential Sales` = structure(0, class = "integer64"), 
    `U.S. International` = structure(0, class = "integer64"), 
    Other = structure(0, class = "integer64"), `New Residential Construction` = structure(0, class = "integer64"), 
    `Advance Report on Durable Goods` = structure(0, class = "integer64"), 
    `Homeownership Rate` = structure(0, class = "integer64"), 
    `Construction Spending` = structure(0, class = "integer64"), 
    `Manufacturing and Trade` = structure(0, class = "integer64"), 
    `Quarterly Financial Report` = structure(0, class = "integer64"), 
    `Advance U.S. Intl Trades` = structure(0, class = "integer64"), 
    `Monthly Wholesale Trade` = structure(0, class = "integer64"), 
    `Quarterly Services Survey` = structure(0, class = "integer64"), 
    `Business Formation Statistics` = structure(0, class = "integer64")), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))
***编辑2***

还有一些东西:

> tmp$"Advance Monthly Sales"
integer64
[1] 1   0   0   1   0   0   0   0   0   2   0   0   9   0   0   0   0   0  
[19] 0   0   0   0   1   0   0   0   0   1   0   0   0   8   0   0   0   1   
[37] 0   0   1   0   0   1   0   0   0   0   0   0   0   0   1   0   0   0  
[55] 1   0   0   0   0   0   1   0   0   0   4   0   0   0   0   0   0   0  
[73] 0   1   13  0   0   0   0   0   0   0   0   0   2   0   0   0   0   0  
[91] 0   0   14  0   0   0   1   0   9   0   0   0   0   0   1   0   0   0 

> tmp$"Advance Monthly Sales" %>% class()
[1] "integer64"
> tmp2 <- tmp
> tmp2$"Advance Monthly Sales" <- as.numeric(tmp2$"Advance Monthly Sales")
> tmp2$"Advance Monthly Sales" %>% class()
[1] "numeric"
> dput(head(tmp2, 1))
structure(list(Organization = "VeriSign Infrastructure & Operations", 
    `Advance Monthly Sales` = 1, `New Residential Sales` = structure(0, class = "integer64"), 
    `U.S. International` = structure(0, class = "integer64"), 
    Other = structure(0, class = "integer64"), `New Residential Construction` = structure(0, class = "integer64"), 
    `Advance Report on Durable Goods` = structure(0, class = "integer64"), 
    `Homeownership Rate` = structure(0, class = "integer64"), 
    `Construction Spending` = structure(0, class = "integer64"), 
    `Manufacturing and Trade` = structure(0, class = "integer64"), 
    `Quarterly Financial Report` = structure(0, class = "integer64"), 
    `Advance U.S. Intl Trades` = structure(0, class = "integer64"), 
    `Monthly Wholesale Trade` = structure(0, class = "integer64"), 
    `Quarterly Services Survey` = structure(0, class = "integer64"), 
    `Business Formation Statistics` = structure(0, class = "integer64")), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))

这是我对正在发生的事情的猜测

df <- data.frame(x = bit64::as.integer64(1), y = 0)
print(df)
#>   x y
#> 1 1 0
rowSums(df)
#> [1] 4.940656e-324

或者您可以将每一列转换为双倍,使用
as.numeric

这是我对发生的事情的猜测

df <- data.frame(x = bit64::as.integer64(1), y = 0)
print(df)
#>   x y
#> 1 1 0
rowSums(df)
#> [1] 4.940656e-324

或者,您可以使用
as.numeric
将每一列转换为双倍。我知道我必须做什么:

> is.integer64 <- function(x){
  class(x)=="integer64"
}
> sel <- sapply(tmp, is.integer64)
> tmp[sel] <- lapply(tmp[sel], as.numeric)
> dput(head(tmp, 1))
structure(list(Organization = "VeriSign Infrastructure & Operations", 
    `Advance Monthly Sales` = 1, `New Residential Sales` = 0, 
    `U.S. International` = 0, Other = 0, `New Residential Construction` = 0, 
    `Advance Report on Durable Goods` = 0, `Homeownership Rate` = 0, 
    `Construction Spending` = 0, `Manufacturing and Trade` = 0, 
    `Quarterly Financial Report` = 0, `Advance U.S. Intl Trades` = 0, 
    `Monthly Wholesale Trade` = 0, `Quarterly Services Survey` = 0, 
    `Business Formation Statistics` = 0, Total = 1), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))
> tmp$Total <- rowSums(tmp[, -1])
> head(tmp2$Total,20)
 [1]     2    40     1     8     1 14085     8     1     7    41    21    25
[13]   129     1     1     1    37     1     1    81
>is.integer64 sel tmp[sel]dput(磁头(tmp,1))
结构(列表(Organization=“VeriSign Infrastructure&Operations”,
`预售月销售额`=1,`新住宅销售额`=0,
`美国国际`=0,其他=0,`新住宅建设`=0,
`耐用品`=0,`住房拥有率`=0,
`建筑开支`=0,`制造业和贸易`=0,
`季度财务报告`=0,`提前美国国际贸易`=0,
`月度批发贸易`=0,`季度服务调查`=0,
`业务构成统计`=0,合计=1),行名称=c(NA,
-1L),类=c(“待定”、“待定”、“数据帧”)
>tmp$总支出(tmp2$总支出,20)
[1]     2    40     1     8     1 14085     8     1     7    41    21    25
[13]   129     1     1     1    37     1     1    81

再次感谢大家

我明白了我该做什么:

> is.integer64 <- function(x){
  class(x)=="integer64"
}
> sel <- sapply(tmp, is.integer64)
> tmp[sel] <- lapply(tmp[sel], as.numeric)
> dput(head(tmp, 1))
structure(list(Organization = "VeriSign Infrastructure & Operations", 
    `Advance Monthly Sales` = 1, `New Residential Sales` = 0, 
    `U.S. International` = 0, Other = 0, `New Residential Construction` = 0, 
    `Advance Report on Durable Goods` = 0, `Homeownership Rate` = 0, 
    `Construction Spending` = 0, `Manufacturing and Trade` = 0, 
    `Quarterly Financial Report` = 0, `Advance U.S. Intl Trades` = 0, 
    `Monthly Wholesale Trade` = 0, `Quarterly Services Survey` = 0, 
    `Business Formation Statistics` = 0, Total = 1), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))
> tmp$Total <- rowSums(tmp[, -1])
> head(tmp2$Total,20)
 [1]     2    40     1     8     1 14085     8     1     7    41    21    25
[13]   129     1     1     1    37     1     1    81
>is.integer64 sel tmp[sel]dput(磁头(tmp,1))
结构(列表(Organization=“VeriSign Infrastructure&Operations”,
`预售月销售额`=1,`新住宅销售额`=0,
`美国国际`=0,其他=0,`新住宅建设`=0,
`耐用品`=0,`住房拥有率`=0,
`建筑开支`=0,`制造业和贸易`=0,
`季度财务报告`=0,`提前美国国际贸易`=0,
`月度批发贸易`=0,`季度服务调查`=0,
`业务构成统计`=0,合计=1),行名称=c(NA,
-1L),类=c(“待定”、“待定”、“数据帧”)
>tmp$总支出(tmp2$总支出,20)
[1]     2    40     1     8     1 14085     8     1     7    41    21    25
[13]   129     1     1     1    37     1     1    81

再次感谢大家

base::rowSums(tmp[,-1])
是否提供相同的值?您还可以将
dput(head(tmp,1))
添加到question@FrsLry,它是合法的,请参见(包括
?[
?[
)并查找“负值”。好的,我现在很感兴趣。这看起来是R可以表示的最小值-请参见
?。机器
:“在一个典型的R平台上,最小的正双精度大约是‘5e-324’”,正如这里所说的-bcstryker,你能添加来自
dput(head(tmp,1))
的输出吗?str(tmp)
的输出是什么?如果问题的所有内容都是正确的,那么我只能将变量类型视为一个可能的问题。
base::rowSums(tmp[,-1])
为您提供相同的值?您还可以将
dput(head(tmp,1))
添加到question@FrsLry,这是合法的,请参见(包括
?[
?[
)并查找“负值”。好的,我现在很感兴趣。这看起来是R可以表示的最小值-请参见
?。机器
:“在典型的R平台上,最小的正双精度约为“5e-324”,根据这里的说明-bcstryker,您可以添加
dput(head(tmp,1))
的输出吗?str(tmp)的输出是什么
?如果问题的所有内容都是正确的,那么我只能将变量类型视为一个可能的问题。嘿@pseudospin我尝试了您的建议,并获得了相同的最终输出。请参见问题二次编辑。tmp$“Advance Monthly Sales”绝对是整数64,但现在它是数字,我仍然从rowSums()中获得这些奇怪的输出.Hey@pseudospin我尝试了您推荐的方法,并获得了相同的最终输出。请参见问题二次编辑。tmp$“Advance Monthly Sales”绝对是整数64,但现在它是数字,我仍然从rowSums()获得这些奇怪的输出。
library(bit64)
df <- data.frame(x = as.integer64(1), y = as.integer64(2))
df$z <- rowSums(df)
print(df)
#>   x y             z
#> 1 1 2 1.482197e-323
class(df$z) <- 'integer64'
df
#>   x y z
#> 1 1 2 3
> is.integer64 <- function(x){
  class(x)=="integer64"
}
> sel <- sapply(tmp, is.integer64)
> tmp[sel] <- lapply(tmp[sel], as.numeric)
> dput(head(tmp, 1))
structure(list(Organization = "VeriSign Infrastructure & Operations", 
    `Advance Monthly Sales` = 1, `New Residential Sales` = 0, 
    `U.S. International` = 0, Other = 0, `New Residential Construction` = 0, 
    `Advance Report on Durable Goods` = 0, `Homeownership Rate` = 0, 
    `Construction Spending` = 0, `Manufacturing and Trade` = 0, 
    `Quarterly Financial Report` = 0, `Advance U.S. Intl Trades` = 0, 
    `Monthly Wholesale Trade` = 0, `Quarterly Services Survey` = 0, 
    `Business Formation Statistics` = 0, Total = 1), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))
> tmp$Total <- rowSums(tmp[, -1])
> head(tmp2$Total,20)
 [1]     2    40     1     8     1 14085     8     1     7    41    21    25
[13]   129     1     1     1    37     1     1    81