如何为R中环境中的所有数据帧将函数应用于特定列

如何为R中环境中的所有数据帧将函数应用于特定列,r,dataframe,R,Dataframe,我有很多相同列的数据帧。我想要的是将分位数(15%和80%)函数应用于我环境中所有数据帧的第三列(“cpm”),并将结果作为新列添加到每个数据帧 环境中的所有数据帧都是相同的,下面是它们的示例: BD.ios = structure(list(geo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "BD", class = "factor"), os = structure(c(1L, 1L, 1L, 1L,

我有很多相同列的数据帧。我想要的是将分位数(15%和80%)函数应用于我环境中所有数据帧的第三列(“cpm”),并将结果作为新列添加到每个数据帧 环境中的所有数据帧都是相同的,下面是它们的示例:

BD.ios = structure(list(geo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = "BD", class = "factor"), os = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "ios", class = "factor"), 
    cpm = c(0.00026978417266187, 0.000276497695852535, 0.00442228161827238, 
    0.00396317260301814, 0.0191772698764066, 0.700811773637797, 
    0.00482934642627173, 0.00201429499675114, 0.00021494623655914, 
    0.0000520855057351408)), row.names = c(12925L, 13011L, 15189L, 
18469L, 19494L, 22385L, 22594L, 29467L, 31907L, 38037L), class = "data.frame")

AE.mac = structure(list(geo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = "AE", class = "factor"), os = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "mac", class = "factor"), 
    cpm = c(0.000353264424964019, 0.00390138781055901, 0.000893105609526794, 
    0.0099634872417983, 0.00119375573921028, 0.00535134321942833, 
    0.00318471337579618, 0.000983284169124877, 0.116180371352785
    )), row.names = c(2622L, 6483L, 6898L, 9383L, 25280L, 25923L, 
29649L, 37977L, 40411L), class = "data.frame")

AF.android = structure(list(geo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = "AF", class = "factor"), os = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "android", class = "factor"), 
    cpm = c(0.193592767295597, 0.153727276424417, 0.30376596601237, 
    0.43615845874945, 0.552450120363948, 0.214786723495654, 0.206123674204523, 
    0.0250727462779332, 0.157723828668625)), row.names = c(955L, 
7975L, 8899L, 9297L, 11223L, 14963L, 17452L, 19883L, 20555L), class = "data.frame")
我相信,解决方案很简单,需要使用
eapply
函数,但我就是想不出来

env = .GlobalEnv
eapply(env, quantile, probs = c(.15,.8))
此命令导致错误:

    Error in `[.data.frame`(x, order(x, na.last = na.last, decreasing = decreasing)) :
   undefined columns selected
编辑 为了说明这一点,以下是我所做的以及我所需要的:

我有这样的数据

data = structure(list(geo = structure(c(15L, 1L, 3L, 16L, 1L, 9L, 17L, 
23L, 29L, 52L, 26L, 55L, 34L, 46L, 25L, 52L, 17L, 15L, 27L, 35L, 
45L, 8L, 21L, 24L, 6L, 16L, 52L, 31L, 14L, 38L, 21L, 5L, 41L, 
16L, 34L, 52L, 27L, 16L, 7L, 13L, 10L, 35L, 52L, 44L, 27L, 19L, 
35L, 6L, 42L, 25L, 40L, 31L, 43L, 33L, 13L, 2L, 4L, 12L, 30L, 
44L, 51L, 38L, 35L, 28L, 52L, 32L, 20L, 19L, 34L, 56L, 51L, 53L, 
54L, 22L, 49L, 18L, 4L, 36L, 34L, 4L, 47L, 11L, 25L, 9L, 6L, 
46L, 39L, 25L, 12L, 50L, 27L, 39L, 48L, 27L, 23L, 9L, 19L, 9L, 
44L, 37L), .Label = c("AE", "AR", "AT", "AU", "AZ", "BD", "BG", 
"BO", "CA", "CD", "CH", "CO", "DK", "DZ", "EC", "EG", "ES", "FI", 
"FR", "GA", "GB", "GE", "HK", "HU", "ID", "IE", "IN", "IR", "IT", 
"KE", "KR", "LB", "LY", "MX", "MY", "NL", "PE", "PH", "PK", "PL", 
"PT", "QA", "RO", "RU", "RW", "SE", "SG", "SK", "SY", "TH", "TR", 
"US", "UY", "VN", "YE", "ZA"), class = "factor"), os = structure(c(3L, 
2L, 1L, 1L, 1L, 6L, 4L, 1L, 1L, 4L, 6L, 1L, 1L, 1L, 6L, 7L, 1L, 
4L, 1L, 3L, 1L, 6L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 
6L, 1L, 1L, 1L, 1L, 4L, 6L, 1L, 1L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 
1L, 6L, 1L, 1L, 1L, 4L, 4L, 1L, 3L, 1L, 5L, 1L, 6L, 6L, 1L, 3L, 
1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 6L, 4L, 2L, 
6L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 6L, 3L, 3L, 1L, 1L, 1L, 1L, 6L, 
4L, 3L, 1L), .Label = c("android", "blackberry", "ios", "mac", 
"other", "windows", "windows_phone"), class = "factor"), cpm = c(0.259529602595296, 
0.008325, 0.664507018855387, 0.000646161798914448, 0.117647058823529, 
0.630132741077424, 0.00398838150289017, 0.0986788005043583, 0.483832900637243, 
0.631904877252478, 0.00499783423573511, 0.408063887806778, 0.0916731378464372, 
1.3325069724202, 0.0112485708069297, 0.00171537666632221, 0.0129665435458787, 
0.00296443300606869, 0.22941417451864, 0.000426580184572523, 
0.206888580674988, 0.000622490272373541, 0.016084968041569, 0.119169168392267, 
0.0216352172946694, 0.0552526416330796, 0.0150883006745904, 0.324403186817902, 
0.188053932659688, 0.00389006342494715, 0.0625410833224263, 0.00111134385665529, 
0.000198831231813773, 0.00551511140525039, 1.02902374670185, 
0.574300071787509, 0.371022474579782, 0.111970606352996, 0.0000313953488372093, 
0.380035469977198, 0.0159468438538206, 0.0274524158125915, 0.237448482577744, 
0.083452302337827, 0.371352785145889, 0.129754756459319, 0.0261164794985636, 
0.602409638554217, 0.0157611216101295, 0.347620654741816, 0.130193264668441, 
0.34434946165254, 0.0693131695022054, 0.673575129533679, 0.0272002127093858, 
0.0295980803571429, 0.482425913163336, 0.00235336471280429, 0.00508469886782341, 
0.0000840689365279529, 0.236539258503618, 0.0799443865137296, 
0.296296296296296, 0.0236127508854782, 0.0152198636822762, 0.00339285714285714, 
0.150753768844221, 0.0859481582537517, 0.000587920688617856, 
0.00127715231788079, 0.150836862270619, 0.0849810111668886, 0.279757646414598, 
0.00113308871141809, 0.996427153632394, 0.00269808881394042, 
0.374087591240876, 0.228267072474796, 0.0516169572925784, 0.00902986826347305, 
0.000207365145228216, 0.244244977712646, 0.169128424850603, 0.573023255813954, 
0.0152944175375988, 1.11731843575419, 0.426646706586826, 0.0544090571844687, 
0.271433919880195, 0.0271570068233128, 0.00445611403693561, 0.00160892057026477, 
0.671800318640467, 0.0216794334441393, 0.00285318261516391, 0.295866741619575, 
0.0843108504398827, 1.60302577359969, 0.0132230143658259, 0.00246752277351996
)), row.names = c(6L, 22L, 25L, 28L, 31L, 41L, 43L, 45L, 47L, 
59L, 68L, 70L, 71L, 72L, 73L, 80L, 94L, 95L, 96L, 101L, 115L, 
117L, 121L, 123L, 125L, 140L, 144L, 149L, 151L, 165L, 169L, 170L, 
179L, 182L, 186L, 189L, 190L, 206L, 207L, 208L, 221L, 238L, 239L, 
259L, 271L, 275L, 276L, 280L, 281L, 294L, 303L, 308L, 311L, 315L, 
318L, 345L, 354L, 355L, 362L, 374L, 377L, 383L, 384L, 385L, 386L, 
394L, 405L, 407L, 408L, 419L, 422L, 424L, 425L, 427L, 442L, 445L, 
454L, 455L, 465L, 466L, 482L, 484L, 485L, 487L, 496L, 506L, 510L, 
513L, 517L, 518L, 523L, 528L, 544L, 548L, 552L, 557L, 570L, 579L, 
586L, 596L), class = "data.frame")
使用
split
函数获取数据帧列表,将geo+os组合彼此分离,并将其记录在数据帧列表中:

X <- split(data, list(data$geo,data$os))
如果geo+os是独一无二的,并且有一定的分位数15,分位数80,我强烈建议不要把它们留在全球环境中。我链接到的答案应该可以帮助您理解为什么列表更好,并展示如何从一开始就创建列表,而不是使用“查找所有数据帧并将其放入列表”的方法

eapply
很难,因为没有内置的东西可以让您只应用于数据帧。而
eapply
列表的形式返回结果,因此将列添加到现有数据帧没有多大意义

df_names = ls()[sapply(mget(ls()), is.data.frame)]
df_list = mget(df_names)
result_list = lapply(df_list, function(d) d$new_col = <code for new column>)

编辑根据您的编辑,让我们直接使用
数据
。我们不需要
拆分
,当然也不需要
拆分后的
列表2env
。使用
dplyr
data.table
按组添加列非常简单有效。例如:

library(dplyr)
data %>%
  group_by(geo, os) %>%
  summarize(quantile_15 = quantile(cpm, .15),
            quantile_80 = quantile(cpm, 0.8))
# # A tibble: 81 x 4
# # Groups:   geo [?]
#    geo   os         quantile_15 quantile_80
#    <fct> <fct>            <dbl>       <dbl>
#  1 AE    android      0.118       0.118    
#  2 AE    blackberry   0.00833     0.00833  
#  3 AR    mac          0.0296      0.0296   
#  4 AT    android      0.665       0.665    
#  5 AU    android      0.482       0.482    
#  6 AU    ios          0.374       0.374    
#  7 AU    mac          0.00903     0.00903  
# ...
我强烈建议不要把它们留在全球环境中。我链接到的答案应该可以帮助您理解为什么列表更好,并展示如何从一开始就创建列表,而不是使用“查找所有数据帧并将其放入列表”的方法

eapply
很难,因为没有内置的东西可以让您只应用于数据帧。而
eapply
列表的形式返回结果,因此将列添加到现有数据帧没有多大意义

df_names = ls()[sapply(mget(ls()), is.data.frame)]
df_list = mget(df_names)
result_list = lapply(df_list, function(d) d$new_col = <code for new column>)

编辑根据您的编辑,让我们直接使用
数据
。我们不需要
拆分
,当然也不需要
拆分后的
列表2env
。使用
dplyr
data.table
按组添加列非常简单有效。例如:

library(dplyr)
data %>%
  group_by(geo, os) %>%
  summarize(quantile_15 = quantile(cpm, .15),
            quantile_80 = quantile(cpm, 0.8))
# # A tibble: 81 x 4
# # Groups:   geo [?]
#    geo   os         quantile_15 quantile_80
#    <fct> <fct>            <dbl>       <dbl>
#  1 AE    android      0.118       0.118    
#  2 AE    blackberry   0.00833     0.00833  
#  3 AR    mac          0.0296      0.0296   
#  4 AT    android      0.665       0.665    
#  5 AU    android      0.482       0.482    
#  6 AU    ios          0.374       0.374    
#  7 AU    mac          0.00903     0.00903  
# ...

你为什么不一个一个地运行你的代码呢?您只有3个数据框架您的代码有什么问题?为什么它不能给你想要的?如果它抛出了一个错误,那么错误是什么?将
分位数
应用于全局环境中的所有对象(不仅仅是数据.frames)可能会导致错误,因为并非所有对象都是数字向量。
probs
应该是
probs=c(.15,.8)
。我想说,在环境中的所有对象之间循环可能不是一个好主意。如果环境中的一个对象不是数据帧怎么办?我建议显式地创建一个需要修改的所有数据帧的列表。需要帮忙吗<代码>列表(BD.ios,AE.mac,AF.android)%>%map(~mutate(.x,quantile=quantile(cpm,probs=seq(.15.8)))
(你需要
dplyr
purrr
)你为什么不逐个运行你的代码呢?您只有3个数据框架您的代码有什么问题?为什么它不能给你想要的?如果它抛出了一个错误,那么错误是什么?将
分位数
应用于全局环境中的所有对象(不仅仅是数据.frames)可能会导致错误,因为并非所有对象都是数字向量。
probs
应该是
probs=c(.15,.8)
。我想说,在环境中的所有对象之间循环可能不是一个好主意。如果环境中的一个对象不是数据帧怎么办?我建议显式地创建一个需要修改的所有数据帧的列表。需要帮忙吗<代码>列表(BD.ios,AE.mac,AF.android)%>%map(~mutate(.x,quantile=quantile(cpm,probs=seq(.15.8)))
(你需要
dplyr
purrr
)现在它已经尽可能完美了。我的方法在很多方面都是错误的。现在这已经是完美的了。我的方法在很多方面都是错误的。
library(dplyr)
data %>%
  group_by(geo, os) %>%
  summarize(quantile_15 = quantile(cpm, .15),
            quantile_80 = quantile(cpm, 0.8))
# # A tibble: 81 x 4
# # Groups:   geo [?]
#    geo   os         quantile_15 quantile_80
#    <fct> <fct>            <dbl>       <dbl>
#  1 AE    android      0.118       0.118    
#  2 AE    blackberry   0.00833     0.00833  
#  3 AR    mac          0.0296      0.0296   
#  4 AT    android      0.665       0.665    
#  5 AU    android      0.482       0.482    
#  6 AU    ios          0.374       0.374    
#  7 AU    mac          0.00903     0.00903  
# ...
library(data.table)
setDT(data)
data[, as.list(quantile(cpm, c(0.15, 0.8))), by = .(geo, os)]
 #    geo            os          15%          80%
 # 1:  EC           ios 2.595296e-01 2.595296e-01
 # 2:  AE    blackberry 8.325000e-03 8.325000e-03
 # 3:  AT       android 6.645070e-01 6.645070e-01
 # 4:  EG       android 1.702811e-02 8.928342e-02
 # 5:  AE       android 1.176471e-01 1.176471e-01
 # 6:  CA       windows 6.301327e-01 6.301327e-01