R 如何计算两种状态之间的跃迁频率?
我的数据集如下所示:R 如何计算两种状态之间的跃迁频率?,r,frequency,R,Frequency,我的数据集如下所示: Data <- read.table(header=TRUE, text=" itemset aac, cca, bab, caa, aba, abb, cab, bcc, aca, bab, cca, cac, baa, baa, abc, abb, cbb, baa, cba, acb, ccb, bbc, aac, bac, abb, bba, bca, acc, caa, cca") Data此代码创建您要查找的22x22矩阵 Data <- read
Data <- read.table(header=TRUE, text="
itemset
aac,
cca,
bab,
caa,
aba,
abb,
cab,
bcc,
aca,
bab,
cca,
cac,
baa,
baa,
abc,
abb,
cbb,
baa,
cba,
acb,
ccb,
bbc,
aac,
bac,
abb,
bba,
bca,
acc,
caa,
cca")
Data此代码创建您要查找的22x22矩阵
Data <- read.table(header=TRUE, text="
itemset
aac,
cca,
bab,
caa,
aba,
abb,
cab,
bcc,
aca,
bab,
cca,
cac,
baa,
baa,
abc,
abb,
cbb,
baa,
cba,
acb,
ccb,
bbc,
aac,
bac,
abb,
bba,
bca,
acc,
caa,
cca")
Data$second_state <- c(as.character(Data$itemset[-1]),NA)
Data %>%
group_by(itemset,second_state) %>%
filter(!is.na(second_state) & !is.na(itemset)) %>%
count() %>% spread(second_state,n)
数据%
过滤器(!is.na(第二个_状态)&!is.na(项目集))%>%
count()%>%排列(第二个状态,n)
你在找这样的东西吗
library(tidyverse)
df <- data.frame( Data, stringsAsFactors = FALSE ) %>%
#remove the comma
mutate( itemset = gsub( ",", "", itemset) ) %>%
#create colum with next state
mutate( next_itemset = lead( itemset, n = 1 ) )
xtabs( ~ itemset + next_itemset, df) / nrow(df)
库(tidyverse)
df%
#删除逗号
mutate(itemset=gsub(“,”,“”,itemset))%>%
#创建具有下一个状态的列
变异(下一个项目集=领导(项目集,n=1))
xtabs(~itemset+next_itemset,df)/nrow(df)
另一种方法是使用重塑2
,生成21x21概率转移矩阵
library(reshape2)
Data <- data.frame(Data, stringsAsFactors = FALSE)
Data$nextitem <- c(as.character(Data$itemset[-1]), NA)
Data$value <- 1
df <- dcast(Data, itemset~nextitem, fill=0)
df <- df[-ncol(df)]
df[-1] <- df[-1] / rowSums(df[-1]) # assuming no rows have all zeros
df
# itemset aac aba abb abc aca acb acc baa bab bac bba bbc bca bcc caa cab cac cba cbb cca ccb
#1 aac 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.5 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#2 aba 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#3 abb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.3333333 0 0 0 0.0 0.3333333 0.0 0.0000000 0.3333333 0.0 0
#4 abc 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#5 aca 0 0.0 0 0.0000000 0 0 0 0.0000000 1.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#6 acb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 1
#7 acc 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 1.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#8 baa 0 0.0 0 0.3333333 0 0 0 0.3333333 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.3333333 0.0000000 0.0 0
#9 bab 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.5 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#10 bac 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#11 bba 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 1 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#12 bbc 1 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#13 bca 0 0.0 0 0.0000000 0 0 1 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#14 bcc 0 0.0 0 0.0000000 1 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#15 caa 0 0.5 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#16 cab 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 1 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#17 cac 0 0.0 0 0.0000000 0 0 0 1.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#18 cba 0 0.0 0 0.0000000 0 1 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#19 cbb 0 0.0 0 0.0000000 0 0 0 1.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#20 cca 0 0.0 0 0.0000000 0 0 0 0.0000000 0.5 0.0 0.0000000 0 0 0 0.0 0.0000000 0.5 0.0000000 0.0000000 0.0 0
#21 ccb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 1 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
library(重塑2)
数据1。
不要命名任何对象t
。(保留为TRUE),2.
。您的22x22矩阵是什么样子的。(显示您想要的结果!)似乎您正在寻找表格
:表格(数据$itemset[-1],数据$itemset[-nrow(数据)])
。是的,您是对的。表的大小只能分为1。@Nick将xtab除以nrow(df)
,请参阅更新的回答我已经在测试“数据”上运行了代码,df['cca',]中的所有值都是NA。
library(reshape2)
Data <- data.frame(Data, stringsAsFactors = FALSE)
Data$nextitem <- c(as.character(Data$itemset[-1]), NA)
Data$value <- 1
df <- dcast(Data, itemset~nextitem, fill=0)
df <- df[-ncol(df)]
df[-1] <- df[-1] / rowSums(df[-1]) # assuming no rows have all zeros
df
# itemset aac aba abb abc aca acb acc baa bab bac bba bbc bca bcc caa cab cac cba cbb cca ccb
#1 aac 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.5 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#2 aba 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#3 abb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.3333333 0 0 0 0.0 0.3333333 0.0 0.0000000 0.3333333 0.0 0
#4 abc 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#5 aca 0 0.0 0 0.0000000 0 0 0 0.0000000 1.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#6 acb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 1
#7 acc 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 1.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#8 baa 0 0.0 0 0.3333333 0 0 0 0.3333333 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.3333333 0.0000000 0.0 0
#9 bab 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.5 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#10 bac 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#11 bba 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 1 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#12 bbc 1 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#13 bca 0 0.0 0 0.0000000 0 0 1 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#14 bcc 0 0.0 0 0.0000000 1 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#15 caa 0 0.5 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#16 cab 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 1 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#17 cac 0 0.0 0 0.0000000 0 0 0 1.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#18 cba 0 0.0 0 0.0000000 0 1 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#19 cbb 0 0.0 0 0.0000000 0 0 0 1.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#20 cca 0 0.0 0 0.0000000 0 0 0 0.0000000 0.5 0.0 0.0000000 0 0 0 0.0 0.0000000 0.5 0.0000000 0.0000000 0.0 0
#21 ccb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 1 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0