R 按子组为最常见的特征指定一个值
如果个人没有住房类型(每列0:类型1、类型2、类型3、类型4),我想为他指定其所在城市最常见的住房类型。 我尝试了这个代码,但没有成功R 按子组为最常见的特征指定一个值,r,function,loops,assign,R,Function,Loops,Assign,如果个人没有住房类型(每列0:类型1、类型2、类型3、类型4),我想为他指定其所在城市最常见的住房类型。 我尝试了这个代码,但没有成功 mostCommon <- function(df,columnsNames){ # INPUTS : df le dataframe, columnsNames une liste de str des colonnes à checker. CE SONT DES DUMMIES ! # OUTPUT : le nom de la colonn
mostCommon <- function(df,columnsNames){
# INPUTS : df le dataframe, columnsNames une liste de str des colonnes à checker. CE SONT DES DUMMIES !
# OUTPUT : le nom de la colonne qui contient le + de fois la modalité 1.
liste = c("Type1", "Type2", "Type3","Type4") # Liste qui va contenir ttes les modalités
for (i in 1:length(columnsNames)){
liste[i] = sum(df[columnsNames[i]])
} # Ajouter la somme des modalités
return(columnsNames[which(liste == max(liste))])
}
abc <- data_object%>%
group_by(municipality) %>% mostCommon(data_object, c("Type1", "Type2", "Type3","Type4")) %>% Type(i)[0]<-1
mostCommon%mostCommon(数据对象,c(“类型1”、“类型2”、“类型3”、“类型4”))%>%Type(i)[0]让我们使用以下输入数据(例如作为csv)进行测试
community;type1;type2;type3;type4
A;0;0;1;0
B;0;0;0;1
A;0;0;1;0
A;0;1;0;0
B;1;0;0;1
。。。然后把它放在一个叫做datahouse的data.frame中。现在
tmp <- aggregate(datahouse[,2:5],list(comm=datahouse$community),sum)
给定一个社区,我们可以从中找出最常见的住房类型。例如
which.max(tmp[tmp$comm=="A",][2:5])
结果表明,“3类住房”在“A社区”中最为常见
请注意,最大值通常不是唯一的,即可能存在同样常见的住房类型。df请不要将您的数据作为图像发布:这样很难使用它来测试我们的代码。改为使用dput()
,并将输出发布到您的问题中。非常感谢。
df <-data_object
# helper df
maketype.f <- function( df ){
df_helper <- df %>% group_by(municipality) %>%
# create some helping columns for logic
# T# will be the sums of each Type by the grouping
# HelperCheck will decide if all is blank
mutate( T1 = sum(Type1),
T2 = sum(Type2),
T3 = sum(Type3),
T4 = sum(Type4),
HelperCheck = (Type1 + Type2 + Type3 + Type4)==0,
helpermax = max(T1,T2,T3,T4),
T1 = as.numeric(T1 == helpermax),
T2 = as.numeric(T2 == helpermax),
T3 = as.numeric(T3 == helpermax),
T4 = as.numeric(T4 == helpermax),
sumHelp = T1 + T2 + T3 + T4)
for( row_i in seq_along(df_helper$municipality) ){
if( !df_helper$HelperCheck[row_i] ){
next
}
randomwinner <- sample(c(1,rep(0,df_helper$sumHelp[row_i]-1) ), replace = F)
for( winner in seq_along(randomwinner) ){
if( df_helper$T1[row_i] == 1 ){
df_helper$Type1[row_i] <- randomwinner[winner]
df_helper$T1[row_i] <- 0
next
}
if( df_helper$T2[row_i] == 1 ){
df_helper$Type2[row_i] <- randomwinner[winner]
df_helper$T2[row_i] <- 0
next
}
if( df_helper$T3[row_i] == 1 ){
df_helper$Type3[row_i] <- randomwinner[winner]
df_helper$T3[row_i] <- 0
next
}
if( df_helper$T4[row_i] == 1 ){
df_helper$Type4[row_i] <- randomwinner[winner]
df_helper$T4[row_i] <- 0
next
}
}
}
ret_df <- df_helper %>% select(-T1,-T2,-T3,-T4,-HelperCheck,-helpermax,-sumHelp)
return(ret_df)
}
df<-maketype.f(df)
df <-data_object
# helper df
maketype.f <- function( df ){
df_helper <- df %>% group_by(municipality) %>%
# create some helping columns for logic
# T# will be the sums of each Type by the grouping
# HelperCheck will decide if all is blank
mutate( T1 = sum(Type1),
T2 = sum(Type2),
T3 = sum(Type3),
T4 = sum(Type4),
HelperCheck = (Type1 + Type2 + Type3 + Type4)==0,
helpermax = max(T1,T2,T3,T4),
T1 = as.numeric(T1 == helpermax),
T2 = as.numeric(T2 == helpermax),
T3 = as.numeric(T3 == helpermax),
T4 = as.numeric(T4 == helpermax),
sumHelp = T1 + T2 + T3 + T4)
for( row_i in seq_along(df_helper$municipality) ){
if( !df_helper$HelperCheck[row_i] ){
next
}
randomwinner <- sample(c(1,rep(0,df_helper$sumHelp[row_i]-1) ), replace = F)
for( winner in seq_along(randomwinner) ){
if( df_helper$T1[row_i] == 1 ){
df_helper$Type1[row_i] <- randomwinner[winner]
df_helper$T1[row_i] <- 0
next
}
if( df_helper$T2[row_i] == 1 ){
df_helper$Type2[row_i] <- randomwinner[winner]
df_helper$T2[row_i] <- 0
next
}
if( df_helper$T3[row_i] == 1 ){
df_helper$Type3[row_i] <- randomwinner[winner]
df_helper$T3[row_i] <- 0
next
}
if( df_helper$T4[row_i] == 1 ){
df_helper$Type4[row_i] <- randomwinner[winner]
df_helper$T4[row_i] <- 0
next
}
}
}
ret_df <- df_helper %>% select(-T1,-T2,-T3,-T4,-HelperCheck,-helpermax,-sumHelp)
return(ret_df)
}
df<-maketype.f(df)