迭代删除重复R函数中的行_R_Split_Dplyr

迭代删除重复R函数中的行

迭代删除重复R函数中的行,r,split,dplyr,R,Split,Dplyr,我有一个大型的面板数据集，对于许多参与者来说，生物标记物的有效观察值必须与无效观察值分开，每一个都形成自己的模式分布我希望将双峰函数的diptest应用于每一组观测值，我已经使用分割函数分离了这些观测值。我需要运行dip.test 1:n次，这取决于每个参与者观察中的模式数量，在参与者的模式，呃，集总间隔之外丢弃这些观察。（使用marker0变量可以清楚哪些观察结果属于参与者。）我通过依次使用Lappy和mutate函数，对所有参与者的所有观察结果应用diptest 困难在于在中心嵌套函数中重

我有一个大型的面板数据集，对于许多参与者来说，生物标记物的有效观察值必须与无效观察值分开，每一个都形成自己的模式分布

我希望将双峰函数的diptest应用于每一组观测值，我已经使用分割函数分离了这些观测值。我需要运行dip.test 1:n次，这取决于每个参与者观察中的模式数量，在参与者的模式，呃，集总间隔之外丢弃这些观察。（使用marker0变量可以清楚哪些观察结果属于参与者。）我通过依次使用Lappy和mutate函数，对所有参与者的所有观察结果应用diptest

困难在于在中心嵌套函数中重复diptest：我希望排除每次迭代显示不属于参与者的观察结果。但是，我还没有找到在重复函数中排除这些值的方法（现在我只是将它们重新编码为x=1。如果我只处理一个数据帧，这将是一个简单的情况——有人知道如何在每次迭代中限制我的观察值吗

# notional measures

y1 <- rnorm(80,mean=75, sd=3)
y2 <- rnorm(100,mean=100, sd=4)
y3 <- rnorm(40, mean=150, sd=2)
mark <- append(y1,y2)
marker <- append(mark,y3)
df_y <- as.data.frame(marker)
df_y$id <- 1
df_y$marker0 <-100
df_y$lump <- 0
df_y$other <-0

z1 <- rnorm(130,mean=50, sd=2)
z2 <- rnorm(110,mean=125, sd=5)
marker <- append(z1,z2)
df_z <- as.data.frame(marker)
df_z$id <- 2
df_z$marker0 <-130
df_z$lump <- 0
df_z$other <-0

df <- rbind(df_y, df_z)

# my function

trim.others <- function(x,a,b,c) {
repeat {
  diptest <- dip.test(x, simulate.p.value = TRUE, B=500)
  if (diptest$p.value > 0.1) break
  hm <- classIntervals(x, n=2, style="jenks", method="complete")
  b[(a < hm$brks[2])] <- 1
  b[(a > hm$brks[2])] <- 2
  c[(b == 1) & (x > hm$brks[2])] <- 1
  c[(b == 2) & (x < hm$brks[2])] <- 1
  x <- x[c==0]
  }
  return(x)
}

#概念度量
y1我会使用purrr并在循环内进行拆分：
library(dplyr)
library(purrr)

trim.others2  <- function(mydf) {

  x  <- mydf$marker
  a  <- mydf$marker0
  b  <- mydf$lump
  c  <- mydf$other

  repeat {

    diptest <- dip.test(x, simulate.p.value = TRUE, B=500)

    if (diptest$p.value > 0.1) break

    hm <- classIntervals(x, n=2, style="jenks", method="complete")
    b[(a < hm$brks[2])] <- 1
    b[(a > hm$brks[2])] <- 2
    c[(b == 1) & (x > hm$brks[2])] <- 1
    c[(b == 2) & (x < hm$brks[2])] <- 1
    x <- x[c==0]

  }
  return(x)
}

all_ids  <- unique(df$id) 

list_of_results  <- map(all_ids, ~df %>% filter(id == .x) %>% trim.others2 ) %>%
    setNames(all_ids)

库（dplyr）
图书馆（purrr）
trim.others2包括a将使其他人更容易帮助你。会的，谢谢！非常感谢你，疯狂比利。我暂时放弃了这项任务，但我现在可以试试。抱歉耽搁了。听起来不错。如果它对你有效，请接受它作为回答！谢谢！
dfs <- split(df, f = df$id)

myfunct <- function(w,x,y,z) {
  repeat {
    diptest <- dip.test(x, simulate.p.value = TRUE, B=500)
    if (diptest$p.value > 0.1) break
    hm <- classIntervals(x, n=2, style="jenks", method="complete")
    w[(y < hm$brks[2])] <- 1
    w[(y > hm$brks[2])] <- 2
    x[(w == 1) & (x > hm$brks[2])] <- 1
    x[(w == 2) & (x < hm$brks[2])] <- 1
     }
}

library(dplyr)

anotherfunct <- function(ia) {
  mutate(ia, value = myfunct(lump,marker,marker0,other))
}

dothefunct <- lapply(df,function(i) {anotherfunct(i)})

library(dplyr)
library(purrr)

trim.others2  <- function(mydf) {

  x  <- mydf$marker
  a  <- mydf$marker0
  b  <- mydf$lump
  c  <- mydf$other

  repeat {

    diptest <- dip.test(x, simulate.p.value = TRUE, B=500)

    if (diptest$p.value > 0.1) break

    hm <- classIntervals(x, n=2, style="jenks", method="complete")
    b[(a < hm$brks[2])] <- 1
    b[(a > hm$brks[2])] <- 2
    c[(b == 1) & (x > hm$brks[2])] <- 1
    c[(b == 2) & (x < hm$brks[2])] <- 1
    x <- x[c==0]

  }
  return(x)
}

all_ids  <- unique(df$id) 

list_of_results  <- map(all_ids, ~df %>% filter(id == .x) %>% trim.others2 ) %>%
    setNames(all_ids)