R 按子组为最常见的特征指定一个值

R 按子组为最常见的特征指定一个值,r,function,loops,assign,R,Function,Loops,Assign,如果个人没有住房类型(每列0:类型1、类型2、类型3、类型4),我想为他指定其所在城市最常见的住房类型。 我尝试了这个代码,但没有成功 mostCommon <- function(df,columnsNames){ # INPUTS : df le dataframe, columnsNames une liste de str des colonnes à checker. CE SONT DES DUMMIES ! # OUTPUT : le nom de la colonn

如果个人没有住房类型(每列0:类型1、类型2、类型3、类型4),我想为他指定其所在城市最常见的住房类型。 我尝试了这个代码,但没有成功

mostCommon <- function(df,columnsNames){
  # INPUTS : df le dataframe, columnsNames une liste de str des colonnes à checker. CE SONT DES DUMMIES !
  # OUTPUT : le nom de la colonne qui contient le + de fois la modalité 1.
  
  liste = c("Type1", "Type2", "Type3","Type4") # Liste qui va contenir ttes les modalités
  
  for (i in 1:length(columnsNames)){
    liste[i] = sum(df[columnsNames[i]])
  } # Ajouter la somme des modalités
  
  return(columnsNames[which(liste == max(liste))])
}


abc <- data_object%>%
  group_by(municipality) %>% mostCommon(data_object, c("Type1", "Type2", "Type3","Type4")) %>% Type(i)[0]<-1 


mostCommon%mostCommon(数据对象,c(“类型1”、“类型2”、“类型3”、“类型4”))%>%Type(i)[0]让我们使用以下输入数据(例如作为csv)进行测试

community;type1;type2;type3;type4
A;0;0;1;0
B;0;0;0;1
A;0;0;1;0
A;0;1;0;0
B;1;0;0;1
。。。然后把它放在一个叫做datahouse的data.frame中。现在

tmp <- aggregate(datahouse[,2:5],list(comm=datahouse$community),sum)
给定一个社区,我们可以从中找出最常见的住房类型。例如

which.max(tmp[tmp$comm=="A",][2:5])
结果表明,“3类住房”在“A社区”中最为常见


请注意,最大值通常不是唯一的,即可能存在同样常见的住房类型。

df请不要将您的数据作为图像发布:这样很难使用它来测试我们的代码。改为使用
dput()
,并将输出发布到您的问题中。非常感谢。
df <-data_object


# helper df
maketype.f <- function( df ){
  df_helper <- df %>% group_by(municipality) %>%
    # create some helping columns for logic
    # T# will be the sums of each Type by the grouping
    # HelperCheck will decide if all is blank
    mutate( T1 = sum(Type1),
            T2 = sum(Type2),
            T3 = sum(Type3),
            T4 = sum(Type4),
            HelperCheck = (Type1 + Type2 + Type3 + Type4)==0,
            helpermax = max(T1,T2,T3,T4),
            T1 = as.numeric(T1 == helpermax),
            T2 = as.numeric(T2 == helpermax),
            T3 = as.numeric(T3 == helpermax),
            T4 = as.numeric(T4 == helpermax),
            sumHelp = T1 + T2 + T3 + T4)
  for( row_i in seq_along(df_helper$municipality) ){
    if( !df_helper$HelperCheck[row_i] ){
      next
    }
    randomwinner <- sample(c(1,rep(0,df_helper$sumHelp[row_i]-1) ), replace = F)
    for( winner in seq_along(randomwinner) ){
      if( df_helper$T1[row_i] == 1 ){
        df_helper$Type1[row_i] <- randomwinner[winner]
        df_helper$T1[row_i] <- 0
        next
      }
      if( df_helper$T2[row_i] == 1 ){
        df_helper$Type2[row_i] <- randomwinner[winner]
        df_helper$T2[row_i] <- 0
        next
      }
      if( df_helper$T3[row_i] == 1 ){
        df_helper$Type3[row_i] <- randomwinner[winner]
        df_helper$T3[row_i] <- 0
        next
      }
      if( df_helper$T4[row_i] == 1 ){
        df_helper$Type4[row_i] <- randomwinner[winner]
        df_helper$T4[row_i] <- 0
        next
      }
    }
  }
  ret_df <- df_helper %>% select(-T1,-T2,-T3,-T4,-HelperCheck,-helpermax,-sumHelp)
  return(ret_df)
}
df<-maketype.f(df)

df <-data_object


# helper df
maketype.f <- function( df ){
  df_helper <- df %>% group_by(municipality) %>%
    # create some helping columns for logic
    # T# will be the sums of each Type by the grouping
    # HelperCheck will decide if all is blank
    mutate( T1 = sum(Type1),
            T2 = sum(Type2),
            T3 = sum(Type3),
            T4 = sum(Type4),
            HelperCheck = (Type1 + Type2 + Type3 + Type4)==0,
            helpermax = max(T1,T2,T3,T4),
            T1 = as.numeric(T1 == helpermax),
            T2 = as.numeric(T2 == helpermax),
            T3 = as.numeric(T3 == helpermax),
            T4 = as.numeric(T4 == helpermax),
            sumHelp = T1 + T2 + T3 + T4)
  for( row_i in seq_along(df_helper$municipality) ){
    if( !df_helper$HelperCheck[row_i] ){
      next
    }
    randomwinner <- sample(c(1,rep(0,df_helper$sumHelp[row_i]-1) ), replace = F)
    for( winner in seq_along(randomwinner) ){
      if( df_helper$T1[row_i] == 1 ){
        df_helper$Type1[row_i] <- randomwinner[winner]
        df_helper$T1[row_i] <- 0
        next
      }
      if( df_helper$T2[row_i] == 1 ){
        df_helper$Type2[row_i] <- randomwinner[winner]
        df_helper$T2[row_i] <- 0
        next
      }
      if( df_helper$T3[row_i] == 1 ){
        df_helper$Type3[row_i] <- randomwinner[winner]
        df_helper$T3[row_i] <- 0
        next
      }
      if( df_helper$T4[row_i] == 1 ){
        df_helper$Type4[row_i] <- randomwinner[winner]
        df_helper$T4[row_i] <- 0
        next
      }
    }
  }
  ret_df <- df_helper %>% select(-T1,-T2,-T3,-T4,-HelperCheck,-helpermax,-sumHelp)
  return(ret_df)
}
df<-maketype.f(df)