R 如何按组查找唯一的因子名称
我在寻找独特的价值:这里的物种代码是SPID,前四个字母来自属名,前四个字母来自物种名——在我的数据框架的不同组中:栖息地有3个栖息地变量hab,命名为TA、TB和TC 以下是我的数据输出:R 如何按组查找唯一的因子名称,r,dplyr,R,Dplyr,我在寻找独特的价值:这里的物种代码是SPID,前四个字母来自属名,前四个字母来自物种名——在我的数据框架的不同组中:栖息地有3个栖息地变量hab,命名为TA、TB和TC 以下是我的数据输出: library(dplyr) brk%>% + dplyr::select(spid,hab)%>% + dplyr::sample_n(20)%>% + dput() structure(list(spid = structure(c(127L, 78L, 33L, 162
library(dplyr)
brk%>%
+ dplyr::select(spid,hab)%>%
+ dplyr::sample_n(20)%>%
+ dput()
structure(list(spid = structure(c(127L, 78L, 33L, 162L, 81L,
72L, 72L, 196L, 196L, 241L, 240L, 238L, 190L, 181L, 188L, 162L,
214L, 13L, 178L, 78L), .Label = c("ACROEMER", "ACROMEGA", "AEROSUBPM",
"AMAZDIPL", "ANASAURI", "ANASPILI", "ANDRABER", "ANDRBILO", "ANEULATI",
"BAZZDECR", "BAZZDECRM", "BAZZMASC", "BAZZNITI", "BAZZPRAE",
"BAZZROCA", "BRACEURY", "BUCKMEMB", "CALYARGU", "CALYFISS", "CALYMASC",
"CALYPALI", "CALYPERU", "CAMPARCTM", "CAMPAURE", "CAMPCRAT",
"CAMPFLEX", "CAMPJAME", "CAMPROBI", "CAMPTHWA", "CEPHVAGI", "CERABELA",
"CERACORN", "CERAZENK", "CHEICAME", "CHEICORDI", "CHEIDECU",
"CHEIMONT", "CHEISERP", "CHEISURR", "CHEITRIF", "CHEIUSAM", "CHEIXANT",
"COLOCEAT", "COLOHASK", "COLOHILD", "COLOOBLI", "COLOPEPO", "COLOTANZ",
"COLOZENK", "COLUBENO", "COLUCALY", "COLUDIGI", "COLUHUMB", "COLUOBES",
"COLUTENU", "CONOTRAP", "CRYPMART", "CUSPCONT", "CYCLBORB", "CYCLBREV",
"CYLIKIAE", "DALTANGU", "DALTLATI", "DENDBORB", "DICRBILLB",
"DIPLCAVI", "DIPLCOGO", "DIPLCORN", "DREPCULT", "DREPHELE", "DREPMADA",
"DREPPHYS", "ECTRREGU", "ECTRVALE", "FISSASPL", "FISSMEGAH",
"FISSSCIO", "FRULAPIC", "FRULAPICU", "FRULBORB", "FRULCAPE",
"FRULGROS", "FRULHUMB", "FRULLIND", "FRULREPA", "FRULSCHI", "FRULSERR",
"FRULUSAMR", "FRULVARI", "FUSCCONN", "GOTTNEES", "GOTTSCHI",
"GOTTSPHA", "GROULAXO", "HAPLSTIC", "HERBDICR", "HERBJUNI", "HERBMAUR",
"HETEDUBI", "HETESPLE", "HETESPN", "HOLOBORB", "HOLOCYLI", "HYPNCUPR",
"ISOPCHRY", "ISOPCITR", "ISOPINTO", "ISOTAUBE", "JAEGSOLI", "JAEGSOLIR",
"KURZCAPI", "KURZCAPIS", "LEJEALAT", "LEJEANIS", "LEJECONF",
"LEJEECKL", "LEJEFLAV", "LEJELOMA", "LEJEOBTU", "LEJERAMO", "LEJETABU",
"LEJETUBE", "LEJEVILL", "LEPIAFRI", "LEPICESP", "LEPIDELE", "LEPIHIRS",
"LEPISTUH", "LEPISTUHP", "LEPTFLEX", "LEPTINFU", "LEPTMACU",
"LEUCANGU", "LEUCBIFI", "LEUCBORY", "LEUCCANDI", "LEUCCAPI",
"LEUCCINC", "LEUCDELI", "LEUCGRAN", "LEUCHILD", "LEUCISLE", "LEUCLEPE",
"LEUCMAYO", "LEUCSEYC", "LOPHBORB", "LOPHCOAD", "LOPHCONC", "LOPHDIFF",
"LOPHEULO", "LOPHMULT", "LOPHMURI", "LOPHNIGR", "LOPHSUBF", "MACRACID",
"MACRMAUR", "MACRMICR", "MACRPALL", "MACRSERP", "MACRSULC", "MACRTENU",
"MASTDICL", "METZCONS", "METZFURC", "METZLEPT", "METZMADA", "MICRAFRI",
"MICRANKA", "MICRDISP", "MICRINFL", "MICRKAME", "MICROBLO", "MICRSTRA",
"MITTLIMO", "MNIOFUSC", "PAPICOMP", "PLAGANGU", "PLAGDREP", "PLAGPECT",
"PLAGRENA", "PLAGREPA", "PLAGRODR", "PLAGTERE", "PLEUGIGA", "PLICHIRT",
"POLYCOMM", "POROELON", "POROMADA", "POROUSAG", "PRIOGRAT", "PSEUDECI",
"PTYCSTRI", "PYRRSPIN", "RACOAFRI", "RADUANKE", "RADUAPPR", "RADUBORB",
"RADUBORY", "RADUCOMO", "RADUEVEL", "RADUFULV", "RADUMADA", "RADUSTEN",
"RADUTABU", "RADUVOLU", "RHAPCRIS", "RHAPGRAC", "RHAPRUBR", "RICCAMAZ",
"RICCEROS", "RICCFAST", "RICCLIMB", "RICCLONG", "SCHLBADI", "SCHLMICRO",
"SCHLOANGU", "SCHLSQUA", "SEMACRAS", "SEMASCHI", "SEMASUBP",
"SERPCYRT", "SOLEBORG", "SOLEONRA", "SOLESPHA", "SPHATUMI", "SPHEMINU",
"SYRRAFRI", "SYRRAPER", "SYRRDIMO", "SYRRGAUD", "SYRRHISP", "SYRRPOTT",
"SYRRPROL", "SYRRPROLA", "SYZYPURP", "TAXICONFO", "TELACOAC",
"TELADIAC", "TELANEMA", "TRICADHA", "TRICDEBE", "TRICPERV", "ULOTFULV",
"WARBLEPT", "ZYGOINTE", "ZYGOREIN"), class = "factor"), hab = structure(c(1L,
3L, 3L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 1L, 3L,
2L, 3L, 2L), .Label = c("TA", "TB", "TC"), class = "factor")), row.names = c(NA,
-20L), class = "data.frame")
我试过:
dplyr::select(spid,hab)%>%
dplyr::group_by(hab)%>%
dplyr::summarise(n_distinct(spid))
显然,它不会告诉我我想要什么。我如何通过栖息地找到独特物种的名称
感谢您的帮助,您可以尝试:
df <- dplyr::select(brk, spid, hab)
lapply(split(df, df$hab), unique)
根据澄清进行编辑:
df2 <- as.data.frame(table(df$spid, df$hab)[rowSums(table(df$spid, df$hab))==1,])
df2[df2$Freq != 0,]
#> Var1 Var2 Freq
#> 4 LEPIHIRS TA 1
#> 6 PLAGREPA TA 1
#> 12 TRICDEBE TA 1
#> 13 BAZZNITI TB 1
#> 19 POROMADA TB 1
#> 22 TELADIAC TB 1
#> 23 TRICADHA TB 1
#> 26 CERAZENK TC 1
#> 27 FRULCAPE TC 1
#> 29 PLAGDREP TC 1
#> 32 PRIOGRAT TC 1
#> 33 SCHLBADI TC 1
这里有一个修改后的解决方案,只显示与1个hab相关的spid dplyr解决方案:
brk %>%
distinct(spid, hab) %>%
arrange(hab)
spid hab
1 LEPIHIRS TA
2 DREPPHYS TA
3 TRICDEBE TA
4 PLAGREPA TA
5 MASTDICL TA
6 MASTDICL TB
7 DREPPHYS TB
8 RADUAPPR TB
9 TRICADHA TB
10 TELADIAC TB
11 POROMADA TB
12 BAZZNITI TB
13 FRULAPIC TB
14 FRULAPIC TC
15 CERAZENK TC
16 FRULCAPE TC
17 RADUAPPR TC
18 PRIOGRAT TC
19 SCHLBADI TC
20 PLAGDREP TC
根据澄清进行编辑
编辑以匹配。。。一个非常低调的解决方案,但有时表格是很好的显示方式 tableyour_数据$spid, 您的_数据$hab[rowSumstableyour_数据$spid, 您的_数据$hab==1,] > >TA TB TC >巴兹尼提0 1 0 >Ceracenk 0 0 1 >节俭0 0 1 >LEPIHIRS 100 >普拉德雷普0 0 1 >普拉杰帕100 >波罗马达0 1 0 >普里格拉特0 0 1 >施尔巴迪0 1 >TELADIAC 0 1 0 >三分之0 1 0 >特里德贝100
谢谢你的回答。是的,这正是我想要的,但是你知道为什么一个物种在两个栖息地中是独一无二的吗?我想展示的是只存在于一个栖息地的所有物种,也许我没有解释清楚,如果是这样的话,很抱歉 啊好的。请参阅我的最新答案。这就是你想要的吗?是的,那正是我想要的,谢谢!顺便说一下,我不确定是否完全理解代码中的一些句子。通过这样做:dplyr::mutaten=n-这是一种像count函数那样进行计数的方法吗?关于最后一部分,select-n对什么有用?再次感谢您的帮助!mutaten=n统计给定spid值出现的次数,因为我按spid分组。select-n只是删除在mutate调用中创建的列n,我这样做是因为我们已经知道n对于所有行都是1。是的,mutaten=n类似于count,但与count等价的是summarsen=n谢谢!现在说得通了你好,我回来找你,因为我意识到我们达成的协议并不是我真正需要的。事实上,在您的代码中,一个spid可以在一个栖息地中出现40次,并且是该栖息地所独有的,但不会被考虑。我们如何改变它来选择一个栖息地特有的spid,即使这个物种在这个栖息地中出现超过1次?再次感谢您抽出时间!我们可以按spid分组,检查每个spid有多少个唯一的hab,并且只保留那些有一个唯一hab的:brk%>%group\U byspid%>%mutaten\U unique=lengthuniquehab%>%filtern\U unique==1是的,这也是我要找的!非常感谢。
> brk %>%
group_by(spid) %>%
summarize(nn = n_distinct(hab)) %>%
filter(nn == 1) %>%
ungroup()
# A tibble: 12 x 2
spid nn
<fct> <int>
1 BAZZNITI 1
2 CERAZENK 1
3 FRULCAPE 1
4 LEPIHIRS 1
5 PLAGDREP 1
6 PLAGREPA 1
7 POROMADA 1
8 PRIOGRAT 1
9 SCHLBADI 1
10 TELADIAC 1
11 TRICADHA 1
12 TRICDEBE 1
brk %>%
distinct(spid, hab) %>%
arrange(hab)
spid hab
1 LEPIHIRS TA
2 DREPPHYS TA
3 TRICDEBE TA
4 PLAGREPA TA
5 MASTDICL TA
6 MASTDICL TB
7 DREPPHYS TB
8 RADUAPPR TB
9 TRICADHA TB
10 TELADIAC TB
11 POROMADA TB
12 BAZZNITI TB
13 FRULAPIC TB
14 FRULAPIC TC
15 CERAZENK TC
16 FRULCAPE TC
17 RADUAPPR TC
18 PRIOGRAT TC
19 SCHLBADI TC
20 PLAGDREP TC
brk %>%
group_by(spid) %>%
mutate(n = n()) %>%
filter(n == 1) %>%
select(-n)
# A tibble: 12 x 2
# Groups: spid [12]
spid hab
<fct> <fct>
1 LEPIHIRS TA
2 CERAZENK TC
3 FRULCAPE TC
4 TRICDEBE TA
5 TRICADHA TB
6 TELADIAC TB
7 PRIOGRAT TC
8 PLAGREPA TA
9 POROMADA TB
10 SCHLBADI TC
11 BAZZNITI TB
12 PLAGDREP TC