R 填空的最佳方法
使用来自 我阅读的数据如下:R 填空的最佳方法,r,R,使用来自 我阅读的数据如下: heartatk4R <- read.csv(file="C:\\Users\\"heartatk4R.txt", header = TRUE, sep = "\t", colClasses = c("character", "factor", "factor", "factor", "factor", "numeric
heartatk4R <- read.csv(file="C:\\Users\\"heartatk4R.txt",
header = TRUE, sep = "\t",
colClasses = c("character", "factor", "factor", "factor",
"factor", "numeric", "numeric", "numeric"))
head(heartatk4R)
heartatk4R您可以使用aggregate
计算每天的平均成本,如:
heartatk4R <- read.table("http://statland.org/AP/R/heartatk4R.txt", header = TRUE, sep = "\t", colClasses = c("character", "factor", "factor", "factor","factor", "numeric", "numeric", "numeric"), na.strings = "*")
tt <- with(heartatk4R[heartatk4R$SEX=="F" & heartatk4R$AGE>20 & heartatk4R$AGE<70,],
aggregate(data.frame(CostPerDay = CHARGES), data.frame(DIAGNOSIS), FUN=mean, na.rm=TRUE))
tt[order(-tt[,2]),]
# DIAGNOSIS CostPerDay
#1 41001 11911.179
#3 41021 11165.837
#2 41011 10922.091
#5 41041 10366.557
#4 41031 10101.536
#8 41081 9641.175
#7 41071 9558.745
#9 41091 9528.740
#6 41051 9393.048
heartatk4R使用dplyr
,我们可以筛选值,根据诊断对u进行分组
并取费用的平均值
heartatk4R <- read.csv(url("http://statland.org/AP/R/heartatk4R.txt"), sep = "\t")
library(dplyr)
heartatk4R %>%
filter(SEX == "F" & AGE > 20 & AGE < 70) %>%
mutate(CHARGES = as.numeric(as.character(CHARGES))) %>%
group_by(DIAGNOSIS) %>%
summarise(AvgCostPERDAY = mean(CHARGES, na.rm = TRUE)) %>%
arrange(desc(AvgCostPERDAY))
# DIAGNOSIS AvgCostPERDAY
# <int> <dbl>
#1 41001 11911.
#2 41021 11166.
#3 41011 10922.
#4 41041 10367.
#5 41031 10102.
#6 41081 9641.
#7 41071 9559.
#8 41091 9529.
#9 41051 9393.
heartatk4R%
过滤器(性别==“F”&年龄>20岁&年龄<70)%>%
变异(费用=作为.numeric(作为.character(费用)))%>%
分组依据(诊断)%>%
汇总(平均每日费用=平均(费用,na.rm=真实))%>%
安排(说明(平均每日费用))
#每日平均诊断费用
#
#1 41001 11911.
#2 41021 11166.
#3 41011 10922.
#4 41041 10367.
#5 41031 10102.
#6 41081 9641.
#7 41071 9559.
#8 41091 9529.
#9 41051 9393.
heartatk4R <- read.csv(url("http://statland.org/AP/R/heartatk4R.txt"), sep = "\t")
library(dplyr)
heartatk4R %>%
filter(SEX == "F" & AGE > 20 & AGE < 70) %>%
mutate(CHARGES = as.numeric(as.character(CHARGES))) %>%
group_by(DIAGNOSIS) %>%
summarise(AvgCostPERDAY = mean(CHARGES, na.rm = TRUE)) %>%
arrange(desc(AvgCostPERDAY))
# DIAGNOSIS AvgCostPERDAY
# <int> <dbl>
#1 41001 11911.
#2 41021 11166.
#3 41011 10922.
#4 41041 10367.
#5 41031 10102.
#6 41081 9641.
#7 41071 9559.
#8 41091 9529.
#9 41051 9393.