Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/hadoop/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 填空的最佳方法_R - Fatal编程技术网

R 填空的最佳方法

R 填空的最佳方法,r,R,使用来自 我阅读的数据如下: heartatk4R <- read.csv(file="C:\\Users\\"heartatk4R.txt", header = TRUE, sep = "\t", colClasses = c("character", "factor", "factor", "factor", "factor", "numeric

使用来自

我阅读的数据如下:

heartatk4R <- read.csv(file="C:\\Users\\"heartatk4R.txt",         
            header = TRUE, sep = "\t",          
            colClasses = c("character", "factor", "factor", "factor", 
                           "factor", "numeric", "numeric", "numeric"))
head(heartatk4R)
heartatk4R您可以使用
aggregate
计算每天的平均成本,如:

heartatk4R <- read.table("http://statland.org/AP/R/heartatk4R.txt", header = TRUE, sep = "\t", colClasses = c("character", "factor", "factor", "factor","factor", "numeric", "numeric", "numeric"), na.strings = "*")

tt <- with(heartatk4R[heartatk4R$SEX=="F" & heartatk4R$AGE>20 & heartatk4R$AGE<70,],
 aggregate(data.frame(CostPerDay = CHARGES), data.frame(DIAGNOSIS), FUN=mean, na.rm=TRUE))
tt[order(-tt[,2]),]
#  DIAGNOSIS CostPerDay
#1     41001  11911.179
#3     41021  11165.837
#2     41011  10922.091
#5     41041  10366.557
#4     41031  10101.536
#8     41081   9641.175
#7     41071   9558.745
#9     41091   9528.740
#6     41051   9393.048

heartatk4R使用
dplyr
,我们可以
筛选
值,
根据
诊断对u进行分组
并取
费用的
平均值

heartatk4R <- read.csv(url("http://statland.org/AP/R/heartatk4R.txt"), sep = "\t")

library(dplyr)

heartatk4R %>%
   filter(SEX == "F" & AGE > 20 & AGE < 70) %>%
   mutate(CHARGES = as.numeric(as.character(CHARGES))) %>%
   group_by(DIAGNOSIS) %>%
   summarise(AvgCostPERDAY  = mean(CHARGES, na.rm = TRUE)) %>%
   arrange(desc(AvgCostPERDAY))

# DIAGNOSIS AvgCostPERDAY
#      <int>         <dbl>
#1     41001        11911.
#2     41021        11166.
#3     41011        10922.
#4     41041        10367.
#5     41031        10102.
#6     41081         9641.
#7     41071         9559.
#8     41091         9529.
#9     41051         9393.
heartatk4R%
过滤器(性别==“F”&年龄>20岁&年龄<70)%>%
变异(费用=作为.numeric(作为.character(费用)))%>%
分组依据(诊断)%>%
汇总(平均每日费用=平均(费用,na.rm=真实))%>%
安排(说明(平均每日费用))
#每日平均诊断费用
#               
#1     41001        11911.
#2     41021        11166.
#3     41011        10922.
#4     41041        10367.
#5     41031        10102.
#6     41081         9641.
#7     41071         9559.
#8     41091         9529.
#9     41051         9393.
heartatk4R <- read.csv(url("http://statland.org/AP/R/heartatk4R.txt"), sep = "\t")

library(dplyr)

heartatk4R %>%
   filter(SEX == "F" & AGE > 20 & AGE < 70) %>%
   mutate(CHARGES = as.numeric(as.character(CHARGES))) %>%
   group_by(DIAGNOSIS) %>%
   summarise(AvgCostPERDAY  = mean(CHARGES, na.rm = TRUE)) %>%
   arrange(desc(AvgCostPERDAY))

# DIAGNOSIS AvgCostPERDAY
#      <int>         <dbl>
#1     41001        11911.
#2     41021        11166.
#3     41011        10922.
#4     41041        10367.
#5     41031        10102.
#6     41081         9641.
#7     41071         9559.
#8     41091         9529.
#9     41051         9393.