R 如何在列表中存储函数的输出
这是我的数据帧的一个子集R 如何在列表中存储函数的输出,r,list,automation,R,List,Automation,这是我的数据帧的一个子集 library(arsenal) library(tidyverse) mydata2=structure(list(Hospital = structure(c(1L, 1L, 2L, 3L, 4L, 5L, 4L, 2L, 4L, 3L, 2L, 1L, 3L, 3L, 1L, 4L, 4L, 4L, 5L, 5L, 1L, 4L, 4L, 1L, 1L, 5L, 2L, 4L, 4L, 5L, 1L, 2L, 2L, 5L, 1L, 5L, 1L, 1
library(arsenal)
library(tidyverse)
mydata2=structure(list(Hospital = structure(c(1L, 1L, 2L, 3L, 4L, 5L,
4L, 2L, 4L, 3L, 2L, 1L, 3L, 3L, 1L, 4L, 4L, 4L, 5L, 5L, 1L, 4L,
4L, 1L, 1L, 5L, 2L, 4L, 4L, 5L, 1L, 2L, 2L, 5L, 1L, 5L, 1L, 1L,
4L, 2L, 4L, 4L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 4L, 2L,
2L, 4L, 4L, 1L, 3L, 3L, 5L, 4L, 2L, 5L, 1L, 4L, 1L, 4L, 4L, 2L,
2L, 2L, 2L, 3L, 1L, 3L, 1L, 3L, 5L, 5L, 4L, 3L, 3L, 2L, 3L, 1L,
1L, 1L, 5L, 3L, 1L, 1L, 2L, 1L, 2L, 4L, 3L, 3L, 1L, 2L, 5L, 1L,
3L, 2L, 2L, 1L, 1L, 1L, 2L), .Label = c("A", "B", "C", "D", "E"
), class = "factor"), ZipCode = structure(c(1L, 5L, 16L, 23L,
42L, 18L, 46L, 49L, 36L, 61L, 33L, 28L, 58L, 60L, 3L, 40L, 8L,
45L, 35L, 37L, 55L, 45L, 14L, 15L, 59L, 41L, 42L, 44L, 42L, 41L,
17L, 49L, 43L, 34L, 55L, 41L, 52L, 63L, 42L, 38L, 8L, 45L, 49L,
7L, 13L, 26L, 63L, 39L, 59L, 38L, 59L, 50L, 8L, 49L, 38L, 45L,
43L, 53L, 24L, 22L, 34L, 48L, 33L, 29L, 62L, 42L, 32L, 48L, 33L,
19L, 49L, 49L, 38L, 25L, 4L, 51L, 30L, 57L, 47L, 35L, 9L, 23L,
51L, 12L, 58L, 63L, 59L, 27L, 37L, 57L, 2L, 54L, 38L, 56L, 49L,
64L, 11L, 20L, 56L, 49L, 34L, 21L, 23L, 49L, 49L, 10L, 31L, 59L,
6L), .Label = c("27000", "45490", "72470", "75011", "75015",
"75018", "76480", "77270", "77340", "77350", "77380", "77440",
"77580", "77C01", "78125", "78200", "80000", "91090", "91100",
"91130", "91160", "91200", "91210", "91270", "91350", "91410",
"91540", "91700", "92000", "92220", "92310", "92350", "93000",
"93100", "93110", "93120", "93130", "93150", "93200", "93220",
"93230", "93270", "93290", "93300", "93420", "93440", "93500",
"93600", "93700", "94110", "94190", "94200", "94230", "94240",
"94250", "94270", "94290", "94310", "94400", "94480", "94520",
"94550", "94800", "95190"), class = "factor")), row.names = c(NA,
-109L), class = "data.frame")
我创建了一个函数,允许我计算每家医院的第一个n Zipcode,它占住院人数的60%。然后,我想将每个医院的Zipcode存储在向量列表中
Zone<-function(choice){
mydata2<-mydata2%>%filter(Hospital==choice)
TABcode <-table(mydata2$ZipCode, useNA = "ifany")
TABcode <- freqlist(TABcode, na.options = "include", digits.pct = 1)
TABcode<-summary(sort(TABcode,decreasing = T))
TABcode<-as.data.frame(TABcode$object)
TABcode<-TABcode%>%filter(cumPercent<=60)
TABcode$Var1
}
所以我想为所有医院做这件事。我们的目标是将所有输出存储在一个列表中,以便以后能够提取占住院人数60%的每家医院的邮政编码,如下所示。我需要一个功能,使我能够自动化所有这一切,因为我有数百家医院在我的完整数据集(这只是一个子集)
您的代码不可复制,因为您没有给我们
freqlist
,但这应该可以做到:
lapply(unique(mydata2$Hospital), Zone)
要保留医院ID,请执行以下操作:
ids <- unique(mydata2$Hospital)
result <- lapply(ids, Zone)
names(result) <- ids
ids我正在使用data.table
库。我试着理解你的功能。我可能在这里犯了错误(例如,我不明白您为什么要对频率表进行排序)。另外,我注意到您的预期结果与此处输出中的区域不匹配。如果你有任何问题,请告诉我
library(data.table)
setDT(mydata2)
mydata2[, .N, by = .(Hospital, ZipCode)][, .(ZipCode, freq = cumsum(prop.table(N))), by = .(Hospital)][freq <= 0.6, ZipCode, by = .(Hospital)]
#Explanation of above one-liner by splitting the chain into parts
mydata2 <- mydata2[, .N, by = .(Hospital, ZipCode)] # find the total number in each zipcode in each hospital
mydata2 <- mydata2[, .(ZipCode, freq = cumsum(prop.table(N))), by = .(Hospital)] # find the cumulative frequency of above for each hospital
mydata2[freq <= 0.6, ZipCode, by = .(Hospital)] # extract ZipCodes with less than or equal to 60% for each hospital
抱歉@Limey,freklist
来自软件包arsenal
。您的代码有效,但医院名称不再可用。它们被数字所取代
ids <- unique(mydata2$Hospital)
result <- lapply(ids, Zone)
names(result) <- ids
library(data.table)
setDT(mydata2)
mydata2[, .N, by = .(Hospital, ZipCode)][, .(ZipCode, freq = cumsum(prop.table(N))), by = .(Hospital)][freq <= 0.6, ZipCode, by = .(Hospital)]
#Explanation of above one-liner by splitting the chain into parts
mydata2 <- mydata2[, .N, by = .(Hospital, ZipCode)] # find the total number in each zipcode in each hospital
mydata2 <- mydata2[, .(ZipCode, freq = cumsum(prop.table(N))), by = .(Hospital)] # find the cumulative frequency of above for each hospital
mydata2[freq <= 0.6, ZipCode, by = .(Hospital)] # extract ZipCodes with less than or equal to 60% for each hospital
# Hospital ZipCode
# 1: A 27000
# 2: A 75015
# 3: A 91700
# 4: A 72470
# 5: A 94250
# 6: A 78125
# 7: A 94400
# 8: A 80000
# 9: A 94200
#10: A 94800
#11: A 76480
#12: A 91410
#13: B 78200
#14: B 93700
#15: B 93000
#16: B 93270
#17: C 91210
#18: C 94520
#19: C 94310
#20: C 94480
#21: C 91270
#22: C 91200
#23: D 93270
#24: D 93440
#25: D 93120
#26: D 93220
#27: D 77270
#28: E 91090
#29: E 93110
#30: E 93130
# Hospital ZipCode