R 为系数级别的每个级别添加频率计数列，并重塑数据帧_R

R 为系数级别的每个级别添加频率计数列，并重塑数据帧

R 为系数级别的每个级别添加频率计数列，并重塑数据帧,r,R,我有这样一个数据帧： df<- data.frame(region = c("1","1","1","1","1","2","2"), loc = c("104","104","104","105","105","106","107"), plntsp = c("A","A", "B", "C", "C", "E", "F"), lepsp = c("Z","Z", "Y", "W", "X", "T", "T"

我有这样一个数据帧：

 df<- data.frame(region = c("1","1","1","1","1","2","2"),
            loc = c("104","104","104","105","105","106","107"), 
            plntsp = c("A","A", "B", "C", "C", "E", "F"), 
            lepsp = c("Z","Z", "Y", "W", "X", "T", "T"))

我试过：

df<- 
group_by(region,loc) %>%
summarise(freq1= length(unique(plantsp), freq2= length(unique(lepsp))
mutate(sp= df$plantsp &df$lepsp, freq= df$freq1 &df$freq2)

df%
总结（频率1=长度（唯一（plantsp），频率2=长度（唯一（lepsp））
突变（sp=df$plantsp和df$lepsp，freq=df$freq1和df$FREK2）

聚合

将是一种选择

rbind(aggregate(list(freq = seq_along(df$plntsp)),
                by = list(region = df$region,loc = df$loc, sp = df$plntsp),
                FUN = length),
      aggregate(list(freq = seq_along(df$plntsp)),
                by = list(region = df$region, loc = df$loc, sp = df$lepsp),
                FUN = length))
#   region loc sp freq
#1       1 104  A    2
#2       1 104  B    1
#3       1 105  C    2
#4       2 106  E    1
#5       2 107  F    1
#6       2 106  T    1
#7       2 107  T    1
#8       1 105  W    1
#9       1 105  X    1
#10      1 104  Y    1
#11      1 104  Z    2

或者使用

aggregate

library(reshape2)
opt = melt(data = df, id.vars = c("region", "loc"))
#Warning message:
#attributes are not identical across measure variables; they will be dropped 
aggregate(list(freq=opt$value), opt[c("region","loc","value")], FUN = length)
#   region loc value freq
#1       1 104     A    2
#2       1 104     B    1
#3       1 105     C    2
#4       2 106     E    1
#5       2 107     F    1
#6       2 106     T    1
#7       2 107     T    1
#8       1 105     W    1
#9       1 105     X    1
#10      1 104     Y    1
#11      1 104     Z    2

aggregate

将是一个选项

rbind(aggregate(list(freq = seq_along(df$plntsp)),
                by = list(region = df$region,loc = df$loc, sp = df$plntsp),
                FUN = length),
      aggregate(list(freq = seq_along(df$plntsp)),
                by = list(region = df$region, loc = df$loc, sp = df$lepsp),
                FUN = length))
#   region loc sp freq
#1       1 104  A    2
#2       1 104  B    1
#3       1 105  C    2
#4       2 106  E    1
#5       2 107  F    1
#6       2 106  T    1
#7       2 107  T    1
#8       1 105  W    1
#9       1 105  X    1
#10      1 104  Y    1
#11      1 104  Z    2

或者使用

aggregate

library(reshape2)
opt = melt(data = df, id.vars = c("region", "loc"))
#Warning message:
#attributes are not identical across measure variables; they will be dropped 
aggregate(list(freq=opt$value), opt[c("region","loc","value")], FUN = length)
#   region loc value freq
#1       1 104     A    2
#2       1 104     B    1
#3       1 105     C    2
#4       2 106     E    1
#5       2 107     F    1
#6       2 106     T    1
#7       2 107     T    1
#8       1 105     W    1
#9       1 105     X    1
#10      1 104     Y    1
#11      1 104     Z    2

使用

tidyverse

：

library(tidyverse)
df %>% 
  gather(key, sp, plntsp, lepsp) %>%
  group_by(region, loc, sp) %>%
  count(.) %>%
  rename(x=n)

   region    loc    sp     x
 1      1    104     A     2
 2      1    104     B     1
 3      1    104     Y     1
 4      1    104     Z     2
 5      1    105     C     2
 6      1    105     W     1
 7      1    105     X     1
 8      2    106     E     1
 9      2    106     T     1
10      2    107     F     1
11      2    107     T     1

使用

tidyverse

：

library(tidyverse)
df %>% 
  gather(key, sp, plntsp, lepsp) %>%
  group_by(region, loc, sp) %>%
  count(.) %>%
  rename(x=n)

   region    loc    sp     x
 1      1    104     A     2
 2      1    104     B     1
 3      1    104     Y     1
 4      1    104     Z     2
 5      1    105     C     2
 6      1    105     W     1
 7      1    105     X     1
 8      2    106     E     1
 9      2    106     T     1
10      2    107     F     1
11      2    107     T     1

这个

数据表

解决方案首先将形状改为长格式，然后计算外观

将数据从宽格式重塑为长格式的

melt（）

函数可从两个软件包获得：

restrape2

和

data.table

。出于性能原因和简洁的语法，我更喜欢后者：

library(data.table)
id_vars = c("region", "loc")
melt(setDT(df), id.vars = id_vars, value.name = "sp")[, .(freq = .N), c(id_vars, "sp")]

请注意，专栏已根据OP的要求进行了重命名。为了与far发布的其他答案进行比较，代码在不重命名专栏的情况下更加精简：

melt(setDT(df), id.vars = id_vars)[, .N, c(id_vars, "value")]

这个

数据表

解决方案首先将形状改为长格式，然后计算外观

将数据从宽格式重塑为长格式的

melt（）

函数可从两个软件包获得：

restrape2

和

data.table

。出于性能原因和简洁的语法，我更喜欢后者：

library(data.table)
id_vars = c("region", "loc")
melt(setDT(df), id.vars = id_vars, value.name = "sp")[, .(freq = .N), c(id_vars, "sp")]

请注意，专栏已根据OP的要求进行了重命名。为了与far发布的其他答案进行比较，代码在不重命名专栏的情况下更加精简：

melt(setDT(df), id.vars = id_vars)[, .N, c(id_vars, "value")]

首先将其折叠到长格式，然后按

region/loc/newspcolumn

将其分组备份，首先将其折叠到长格式，然后按

region/loc/newspcolumn

将其分组备份，如果您只为

by=

参数选择

opt

df

的列，则您的

聚合可以缩短很多-aggregate（列表（freq=opt$value），opt[c（“区域”、“位置”、“值”）]，FUN=length）
例如，在我看来，这对解决方案并不重要，但OP明确要求如何命名这些列。您是否介意相应地修改您的解决方案，以便与其他答案进行比较？如果您只选择opt
/的列，您的聚合可能会缩短很多e> df
对于by=
参数-聚合（列表（freq=opt$value）、opt[c（“区域”、“位置”、“值”）、FUN=length）
例如，在我看来，这对解决方案并不重要，但OP明确要求列的名称。您是否介意相应地修改您的解决方案，以便与其他答案进行比较？在我看来，这对解决方案并不重要，但OP明确要求列的名称命名。你介意相应地修改你的解决方案，只是为了与其他答案进行比较吗？在我看来，这对解决方案并不重要，但OP明确要求如何命名这些列。你介意相应地修改你的解决方案，只是为了与其他答案进行比较吗？