R 如何按组查找唯一的因子名称

R 如何按组查找唯一的因子名称,r,dplyr,R,Dplyr,我在寻找独特的价值:这里的物种代码是SPID,前四个字母来自属名,前四个字母来自物种名——在我的数据框架的不同组中:栖息地有3个栖息地变量hab,命名为TA、TB和TC 以下是我的数据输出: library(dplyr) brk%>% + dplyr::select(spid,hab)%>% + dplyr::sample_n(20)%>% + dput() structure(list(spid = structure(c(127L, 78L, 33L, 162

我在寻找独特的价值:这里的物种代码是SPID,前四个字母来自属名,前四个字母来自物种名——在我的数据框架的不同组中:栖息地有3个栖息地变量hab,命名为TA、TB和TC

以下是我的数据输出:

library(dplyr)

brk%>%
+   dplyr::select(spid,hab)%>%
+   dplyr::sample_n(20)%>%
+   dput()
structure(list(spid = structure(c(127L, 78L, 33L, 162L, 81L, 
72L, 72L, 196L, 196L, 241L, 240L, 238L, 190L, 181L, 188L, 162L, 
214L, 13L, 178L, 78L), .Label = c("ACROEMER", "ACROMEGA", "AEROSUBPM", 
"AMAZDIPL", "ANASAURI", "ANASPILI", "ANDRABER", "ANDRBILO", "ANEULATI", 
"BAZZDECR", "BAZZDECRM", "BAZZMASC", "BAZZNITI", "BAZZPRAE", 
"BAZZROCA", "BRACEURY", "BUCKMEMB", "CALYARGU", "CALYFISS", "CALYMASC", 
"CALYPALI", "CALYPERU", "CAMPARCTM", "CAMPAURE", "CAMPCRAT", 
"CAMPFLEX", "CAMPJAME", "CAMPROBI", "CAMPTHWA", "CEPHVAGI", "CERABELA", 
"CERACORN", "CERAZENK", "CHEICAME", "CHEICORDI", "CHEIDECU", 
"CHEIMONT", "CHEISERP", "CHEISURR", "CHEITRIF", "CHEIUSAM", "CHEIXANT", 
"COLOCEAT", "COLOHASK", "COLOHILD", "COLOOBLI", "COLOPEPO", "COLOTANZ", 
"COLOZENK", "COLUBENO", "COLUCALY", "COLUDIGI", "COLUHUMB", "COLUOBES", 
"COLUTENU", "CONOTRAP", "CRYPMART", "CUSPCONT", "CYCLBORB", "CYCLBREV", 
"CYLIKIAE", "DALTANGU", "DALTLATI", "DENDBORB", "DICRBILLB", 
"DIPLCAVI", "DIPLCOGO", "DIPLCORN", "DREPCULT", "DREPHELE", "DREPMADA", 
"DREPPHYS", "ECTRREGU", "ECTRVALE", "FISSASPL", "FISSMEGAH", 
"FISSSCIO", "FRULAPIC", "FRULAPICU", "FRULBORB", "FRULCAPE", 
"FRULGROS", "FRULHUMB", "FRULLIND", "FRULREPA", "FRULSCHI", "FRULSERR", 
"FRULUSAMR", "FRULVARI", "FUSCCONN", "GOTTNEES", "GOTTSCHI", 
"GOTTSPHA", "GROULAXO", "HAPLSTIC", "HERBDICR", "HERBJUNI", "HERBMAUR", 
"HETEDUBI", "HETESPLE", "HETESPN", "HOLOBORB", "HOLOCYLI", "HYPNCUPR", 
"ISOPCHRY", "ISOPCITR", "ISOPINTO", "ISOTAUBE", "JAEGSOLI", "JAEGSOLIR", 
"KURZCAPI", "KURZCAPIS", "LEJEALAT", "LEJEANIS", "LEJECONF", 
"LEJEECKL", "LEJEFLAV", "LEJELOMA", "LEJEOBTU", "LEJERAMO", "LEJETABU", 
"LEJETUBE", "LEJEVILL", "LEPIAFRI", "LEPICESP", "LEPIDELE", "LEPIHIRS", 
"LEPISTUH", "LEPISTUHP", "LEPTFLEX", "LEPTINFU", "LEPTMACU", 
"LEUCANGU", "LEUCBIFI", "LEUCBORY", "LEUCCANDI", "LEUCCAPI", 
"LEUCCINC", "LEUCDELI", "LEUCGRAN", "LEUCHILD", "LEUCISLE", "LEUCLEPE", 
"LEUCMAYO", "LEUCSEYC", "LOPHBORB", "LOPHCOAD", "LOPHCONC", "LOPHDIFF", 
"LOPHEULO", "LOPHMULT", "LOPHMURI", "LOPHNIGR", "LOPHSUBF", "MACRACID", 
"MACRMAUR", "MACRMICR", "MACRPALL", "MACRSERP", "MACRSULC", "MACRTENU", 
"MASTDICL", "METZCONS", "METZFURC", "METZLEPT", "METZMADA", "MICRAFRI", 
"MICRANKA", "MICRDISP", "MICRINFL", "MICRKAME", "MICROBLO", "MICRSTRA", 
"MITTLIMO", "MNIOFUSC", "PAPICOMP", "PLAGANGU", "PLAGDREP", "PLAGPECT", 
"PLAGRENA", "PLAGREPA", "PLAGRODR", "PLAGTERE", "PLEUGIGA", "PLICHIRT", 
"POLYCOMM", "POROELON", "POROMADA", "POROUSAG", "PRIOGRAT", "PSEUDECI", 
"PTYCSTRI", "PYRRSPIN", "RACOAFRI", "RADUANKE", "RADUAPPR", "RADUBORB", 
"RADUBORY", "RADUCOMO", "RADUEVEL", "RADUFULV", "RADUMADA", "RADUSTEN", 
"RADUTABU", "RADUVOLU", "RHAPCRIS", "RHAPGRAC", "RHAPRUBR", "RICCAMAZ", 
"RICCEROS", "RICCFAST", "RICCLIMB", "RICCLONG", "SCHLBADI", "SCHLMICRO", 
"SCHLOANGU", "SCHLSQUA", "SEMACRAS", "SEMASCHI", "SEMASUBP", 
"SERPCYRT", "SOLEBORG", "SOLEONRA", "SOLESPHA", "SPHATUMI", "SPHEMINU", 
"SYRRAFRI", "SYRRAPER", "SYRRDIMO", "SYRRGAUD", "SYRRHISP", "SYRRPOTT", 
"SYRRPROL", "SYRRPROLA", "SYZYPURP", "TAXICONFO", "TELACOAC", 
"TELADIAC", "TELANEMA", "TRICADHA", "TRICDEBE", "TRICPERV", "ULOTFULV", 
"WARBLEPT", "ZYGOINTE", "ZYGOREIN"), class = "factor"), hab = structure(c(1L, 
3L, 3L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 1L, 3L, 
2L, 3L, 2L), .Label = c("TA", "TB", "TC"), class = "factor")), row.names = c(NA, 
-20L), class = "data.frame")
我试过:


dplyr::select(spid,hab)%>%
 dplyr::group_by(hab)%>%
 dplyr::summarise(n_distinct(spid))
显然,它不会告诉我我想要什么。我如何通过栖息地找到独特物种的名称

感谢您的帮助,

您可以尝试:

df <- dplyr::select(brk, spid, hab)

lapply(split(df, df$hab), unique)
根据澄清进行编辑:

df2 <- as.data.frame(table(df$spid, df$hab)[rowSums(table(df$spid, df$hab))==1,])
df2[df2$Freq != 0,]
#>        Var1 Var2 Freq
#> 4  LEPIHIRS   TA    1
#> 6  PLAGREPA   TA    1
#> 12 TRICDEBE   TA    1
#> 13 BAZZNITI   TB    1
#> 19 POROMADA   TB    1
#> 22 TELADIAC   TB    1
#> 23 TRICADHA   TB    1
#> 26 CERAZENK   TC    1
#> 27 FRULCAPE   TC    1
#> 29 PLAGDREP   TC    1
#> 32 PRIOGRAT   TC    1
#> 33 SCHLBADI   TC    1


这里有一个修改后的解决方案,只显示与1个hab相关的spid

dplyr解决方案:

brk %>% 
  distinct(spid, hab) %>% 
  arrange(hab)

              spid hab
1  LEPIHIRS  TA
2  DREPPHYS  TA
3  TRICDEBE  TA
4  PLAGREPA  TA
5  MASTDICL  TA
6  MASTDICL  TB
7  DREPPHYS  TB
8  RADUAPPR  TB
9  TRICADHA  TB
10 TELADIAC  TB
11 POROMADA  TB
12 BAZZNITI  TB
13 FRULAPIC  TB
14 FRULAPIC  TC
15 CERAZENK  TC
16 FRULCAPE  TC
17 RADUAPPR  TC
18 PRIOGRAT  TC
19 SCHLBADI  TC
20 PLAGDREP  TC
根据澄清进行编辑


编辑以匹配。。。一个非常低调的解决方案,但有时表格是很好的显示方式

tableyour_数据$spid, 您的_数据$hab[rowSumstableyour_数据$spid, 您的_数据$hab==1,] > >TA TB TC >巴兹尼提0 1 0 >Ceracenk 0 0 1 >节俭0 0 1 >LEPIHIRS 100 >普拉德雷普0 0 1 >普拉杰帕100 >波罗马达0 1 0 >普里格拉特0 0 1 >施尔巴迪0 1 >TELADIAC 0 1 0 >三分之0 1 0 >特里德贝100
谢谢你的回答。是的,这正是我想要的,但是你知道为什么一个物种在两个栖息地中是独一无二的吗?我想展示的是只存在于一个栖息地的所有物种,也许我没有解释清楚,如果是这样的话,很抱歉

啊好的。请参阅我的最新答案。这就是你想要的吗?是的,那正是我想要的,谢谢!顺便说一下,我不确定是否完全理解代码中的一些句子。通过这样做:dplyr::mutaten=n-这是一种像count函数那样进行计数的方法吗?关于最后一部分,select-n对什么有用?再次感谢您的帮助!mutaten=n统计给定spid值出现的次数,因为我按spid分组。select-n只是删除在mutate调用中创建的列n,我这样做是因为我们已经知道n对于所有行都是1。是的,mutaten=n类似于count,但与count等价的是summarsen=n谢谢!现在说得通了你好,我回来找你,因为我意识到我们达成的协议并不是我真正需要的。事实上,在您的代码中,一个spid可以在一个栖息地中出现40次,并且是该栖息地所独有的,但不会被考虑。我们如何改变它来选择一个栖息地特有的spid,即使这个物种在这个栖息地中出现超过1次?再次感谢您抽出时间!我们可以按spid分组,检查每个spid有多少个唯一的hab,并且只保留那些有一个唯一hab的:brk%>%group\U byspid%>%mutaten\U unique=lengthuniquehab%>%filtern\U unique==1是的,这也是我要找的!非常感谢。
> brk %>% 
    group_by(spid) %>% 
    summarize(nn = n_distinct(hab)) %>% 
    filter(nn == 1) %>%
    ungroup()

# A tibble: 12 x 2
   spid        nn
   <fct>    <int>
 1 BAZZNITI     1
 2 CERAZENK     1
 3 FRULCAPE     1
 4 LEPIHIRS     1
 5 PLAGDREP     1
 6 PLAGREPA     1
 7 POROMADA     1
 8 PRIOGRAT     1
 9 SCHLBADI     1
10 TELADIAC     1
11 TRICADHA     1
12 TRICDEBE     1
brk %>% 
  distinct(spid, hab) %>% 
  arrange(hab)

              spid hab
1  LEPIHIRS  TA
2  DREPPHYS  TA
3  TRICDEBE  TA
4  PLAGREPA  TA
5  MASTDICL  TA
6  MASTDICL  TB
7  DREPPHYS  TB
8  RADUAPPR  TB
9  TRICADHA  TB
10 TELADIAC  TB
11 POROMADA  TB
12 BAZZNITI  TB
13 FRULAPIC  TB
14 FRULAPIC  TC
15 CERAZENK  TC
16 FRULCAPE  TC
17 RADUAPPR  TC
18 PRIOGRAT  TC
19 SCHLBADI  TC
20 PLAGDREP  TC
brk %>% 
  group_by(spid) %>% 
  mutate(n = n()) %>% 
  filter(n == 1) %>% 
  select(-n)

# A tibble: 12 x 2
# Groups:   spid [12]
   spid     hab  
   <fct>    <fct>
 1 LEPIHIRS TA   
 2 CERAZENK TC   
 3 FRULCAPE TC   
 4 TRICDEBE TA   
 5 TRICADHA TB   
 6 TELADIAC TB   
 7 PRIOGRAT TC   
 8 PLAGREPA TA   
 9 POROMADA TB   
10 SCHLBADI TC   
11 BAZZNITI TB   
12 PLAGDREP TC