ta.table解决方案确实帮了大忙。这是很棒的东西,对数据的解释。table解决方案帮了大忙。 df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6))) id
ta.table解决方案确实帮了大忙。这是很棒的东西,对数据的解释。table解决方案帮了大忙。 df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6))) id ,r,dataframe,duplicates,R,Dataframe,Duplicates,ta.table解决方案确实帮了大忙。这是很棒的东西,对数据的解释。table解决方案帮了大忙。 df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6))) id link 1 1 2, 3 2 2 1, 3 3 3 1, 2 4 4 NA 5 5 NA 6 6 7 7 7 6 id link 1 1 2, 3 2 4 NA 3 5 NA 4 6
ta.table解决方案确实帮了大忙。这是很棒的东西,对
数据的解释。table
解决方案帮了大忙。
df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6)))
id link
1 1 2, 3
2 2 1, 3
3 3 1, 2
4 4 NA
5 5 NA
6 6 7
7 7 6
id link
1 1 2, 3
2 4 NA
3 5 NA
4 6 7
to_remove <- character(0)
for (n in 1:nrow(df)) {
links <- df$link[[n]]
if (all(is.na(links))) next # skip if no links available
add <- ifelse(links %in% to_remove, NA,links)
add <- add[!is.na(add)]
if (length(add > 0)) to_remove <- c(to_remove,add)
}
df <- data.frame(id=1:7,link=I(list(c(2,3),c(1,3),c(1,2),NA,NA,7,6)))
keep <- rep(TRUE, nrow(df))
for (i in 1:nrow(df)) {
idx <- df$link[[i]]
idx_larger_than_me <- idx[idx > i]
print(idx_larger_than_me)
keep[idx_larger_than_me] <- FALSE
}
df2 <- df[keep,]
library(data.table)
DT <- data.table(id = rep(df$id, lengths(df$link)), link = unlist(df$link))
DT[DT[, .I[!any(id > link) | is.na(link)], by = id]$V1][, .(link = toString(link)), by = id]
id link
1: 1 2, 3
2: 4 NA
3: 5 NA
4: 6 7
library(dplyr)
library(tidyr)
df %>%
unnest(link) %>%
group_by(id) %>%
filter(!any(id > link) | is.na(link)) %>%
summarise(link = toString(link))
# A tibble: 4 × 2
id link
<int> <chr>
1 1 2, 3
2 4 NA
3 5 NA
4 6 7
dfn <- data.frame(id = rep(df$id, lengths(df$link)), link = unlist(df$link))
dfn <- dfn[!dfn$id %in% unique(dfn$id[which(dfn$id > dfn$link)]),]
aggregate(link ~ id, dfn, toString, na.action = na.pass)
library(dplyr)
library(tidyr)
result <- df %>% unnest(link) %>%
filter(is.na(link) | link > id & !duplicated(link)) %>%
group_by(id) %>%
nest(link, .key=link)
print(result)
### A tibble: 4 x 3
## id link
## <int> <list>
##1 1 <tibble [2 x 1]>
##2 4 <tibble [1 x 1]>
##3 5 <tibble [1 x 1]>
##4 6 <tibble [1 x 1]>
print(result$link)
##[[1]]
### A tibble: 2 x 1
## link
## <dbl>
##1 2
##2 3
##
##[[2]]
### A tibble: 1 x 1
## link
## <dbl>
##1 NA
##
##[[3]]
### A tibble: 1 x 1
## link
## <dbl>
##1 NA
##
##[[4]]
### A tibble: 1 x 1
## link
## <dbl>
##1 7