r中的子选择和创建_R - Fatal编程技术网

r中的子选择和创建

r中的子选择和创建,r,R,假设此数据集： household_id person_id age_group 1 1 5 1 2 3 1 3 2 2 1 3 2 2 5 2 3 1 2 4 1 我想创建一个新字段，指示家庭是否包括任何年龄组

假设此数据集：

household_id person_id age_group  
1            1         5  
1            2         3  
1            3         2  
2            1         3  
2            2         5
2            3         1
2            4         1

我想创建一个新字段，指示家庭是否包括任何年龄组=1的人，如下所示：

household_id person_id age_group age_group1  
1            1         5         0  
1            2         3         0
1            3         2         0
2            1         3         1
2            2         5         1
2            3         1         1
2            4         1         1

我感谢你的帮助

读取数据后

dat <- read.table(text = 'household_id person_id age_group  
1            1         5  
1            2         3  
1            3         2  
2            1         3  
2            2         5
2            3         1
2            4         1',head=T)

plyr

解决方案：

require(plyr)
df <- structure(list(household_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L), 
person_id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L), age_group = c(5L, 
3L, 2L, 3L, 5L, 1L, 1L)), .Names = c("household_id", "person_id", 
"age_group"), class = "data.frame", row.names = c(NA, -7L))

ddply(df, .(household_id), transform, age_group1 = 0 + any(age_group == 1))

#   household_id person_id age_group age_group1
# 1            1         1         5          0
# 2            1         2         3          0
# 3            1         3         2          0
# 4            2         1         3          1
# 5            2         2         5          1
# 6            2         3         1          1
# 7            2         4         1          1

ave（t$age\u group，t$household\u id，FUN=function（x）1%x）
[1] 0 0 0 1 1 1 1
>t$age_组1 t
住户身份证人员身份证年龄组年龄组1
1            1         1         5          0
2            1         2         3          0
3            1         3         2          0
4            2         1         3          1
5            2         2         5          1
6            2         3         1          1
7            2         4         1          1

对于这类东西，我更喜欢

sql

，因为很多人已经知道它，它可以跨语言工作（sas有

proc-sql；

），而且非常直观：）

#将数据读入名为'x'的对象`
#加载sqldf库
库（sqldf）
#创建一个新的家庭级表，其中仅包含
#住户id和0/1指标
#家庭中是否有人符合您的要求
家庭这里有另一个不涉及安装任何软件包的选项；）
#将数据帧读入'x'`
x@Arun我想现在看起来好多了。在这里提供一个data.table
解决方案怎么样：）？+1！谢谢你，真的很简洁！我试图在不创建密钥的情况下执行此操作，但出现了强制错误“RHS类型（'double'）必须与LHS（'logical'）匹配…”。。偶和我在dt[，age\u group1:=any（age\u group1==1），by=househouse\u id]上有错误
我看到了我的错误。我尝试了dt[，age\u group1:=0+any（age\u group==1），by=househouse\u id]
然后dt[，age\u group1:=any（age\u group==1），by=househouse\u id]
所以我在[.data.table
（dt，：=/code>（age\u group1，any（age\u group==1）），by=househouse\u id）中出现了错误：…`它在我的待办事项列表中！我至少看过一次手册！也许我的第一个R包将包含数据。表
require(plyr)
df <- structure(list(household_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L), 
person_id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L), age_group = c(5L, 
3L, 2L, 3L, 5L, 1L, 1L)), .Names = c("household_id", "person_id", 
"age_group"), class = "data.frame", row.names = c(NA, -7L))

ddply(df, .(household_id), transform, age_group1 = 0 + any(age_group == 1))

#   household_id person_id age_group age_group1
# 1            1         1         5          0
# 2            1         2         3          0
# 3            1         3         2          0
# 4            2         1         3          1
# 5            2         2         5          1
# 6            2         3         1          1
# 7            2         4         1          1

require(data.table)
dt <- data.table(df, key="household_id")
dt[, age_group1 := 0 + any(age_group == 1), by=household_id]

ave(t$age_group, t$household_id, FUN=function(x) 1 %in% x)
[1] 0 0 0 1 1 1 1

> t$age_group1 <- with(t, ave(age_group, household_id, FUN=function(x) 1 %in% x))
> t
  household_id person_id age_group age_group1
1            1         1         5          0
2            1         2         3          0
3            1         3         2          0
4            2         1         3          1
5            2         2         5          1
6            2         3         1          1
7            2         4         1          1

# read your data into an object named `x`

# load the sqldf library
library(sqldf)

# create a new household-level table that contains just
# the household id and a 0/1 indicator of
# whether anyone within the household meets your requirement
households <- 
    sqldf( 'select household_id , max( age_group == 1 ) as age_group1 from x group by household_id' )

# merge the new column back on to the original table
x <- merge( x , households )

# view your result
x

# read your data frame into `x`
x <- read.table( text = "household_id person_id age_group  
1            1         5  
1            2         3  
1            3         2  
2            1         3  
2            2         5
2            3         1
2            4         1" , head=TRUE)


# determine the maximum of age_group == 1 within each household id
hhold <- aggregate( age_group == 1 ~ household_id , FUN = max , data = x )

# now just change the name of the second column
names( hhold )[ 2 ] <- 'age_group1'

# merge it back on and you're done
x <- merge( x , hhold )

# look at the result
x