R 筛选4000+;中的4个完整组;图像集
我有一个4000多张图像的数据集。为了找出代码,我把一小批代码移到了另一个文件夹中 这些文件如下所示:R 筛选4000+;中的4个完整组;图像集,r,image-processing,filtering,R,Image Processing,Filtering,我有一个4000多张图像的数据集。为了找出代码,我把一小批代码移到了另一个文件夹中 这些文件如下所示: folder r01c01f01p01-ch3.tiff r01c01f01p01-ch4.tiff r01c01f02p01-ch1.tiff r01c01f03p01-ch2.tiff r01c01f03p01-ch3.tiff r01c01f04p01-ch2.tiff r01c01f04p01-ch4.tiff
folder
r01c01f01p01-ch3.tiff
r01c01f01p01-ch4.tiff
r01c01f02p01-ch1.tiff
r01c01f03p01-ch2.tiff
r01c01f03p01-ch3.tiff
r01c01f04p01-ch2.tiff
r01c01f04p01-ch4.tiff
r01c01f05p01-ch1.tiff
r01c01f05p01-ch2.tiff
r01c01f06p01-ch2.tiff
r01c01f06p01-ch4.tiff
r01c01f09p01-ch3.tiff
r01c01f09p01-ch4.tiff
r01c01f10p01-ch1.tiff
r01c01f10p01-ch4.tiff
r01c01f11p01-ch1.tiff
r01c01f11p01-ch2.tiff
r01c01f11p01-ch3.tiff
r01c01f11p01-ch4.tiff
r01c02f10p01-ch1.tiff
r01c02f10p01-ch2.tiff
r01c02f10p01-ch3.tiff
r01c02f10p01-ch4.tiff
ch1 <- dir(path="/Desktop/cp/complete//", pattern="ch1")
ch2 <- dir(path="/Desktop/cp/complete//", pattern="ch2")
ch3 <- dir(path="/Desktop/cp/complete//", pattern="ch3")
ch4 <- dir(path="/Desktop/cp/complete//", pattern="ch4")
我不能删除-ch#之前的名称,因为该信息很重要
我想做的是过滤图像文件夹,并只移动/保留所有四个ch值(ch1-4)的集合(即:r01c02f10p01)
背景:我使用不同的脚本将图像合并到一个组合中,这取决于这样一个事实,即每个图像上都有一个完整的ch1、ch2、ch3和ch4后缀,并且它们分为四个批次。(此代码用于名为Fiji的图像处理软件,而不是R。)
在尝试过滤图像时,该装置最初包括第五个通道(ch5)。我能够像这样从原始文件夹中删除带有ch5的所有图像
##Create folder variable which has all image files
setwd("/Desktop/Nov 5/")
folder = list.files("/Desktop/Nov5/")
folder <- list.files(getwd())
##Create final2 variable which has all image files ending in ch5
final2 <- dir(path="/Desktop/Nov5/", pattern="ch5")
##Remove final2 from folder
file.remove(folder,final2)
##创建包含所有图像文件的文件夹变量
setwd(“/Desktop/Nov 5/”)
folder=list.files(“/Desktop/Nov5/”)
文件夹%
分组依据(分组)%>%
过滤器(全部(通道集%in%ch))
要保存的文件
总结:我希望从没有完整ch值的随机组合(即:可能只有ch1和ch2,或ch3和ch4)中筛选文件,到只包含具有完整ch1、ch2、ch3和ch4图像的文件组合
注意:我对R还是很陌生,因此我可以澄清描述问题的某些语言是否已关闭。我的解决方案:
library(tidyr)
library(dplyr)
library(magrittr)
library(stringr)
#Input data
dirnames <- data.frame(flenames = c("r01c01f01p01-ch3.tiff", "r01c01f01p01-ch4.tiff", "r01c01f02p01-ch1.tiff", "r01c01f03p01-ch2.tiff",
"r01c01f03p01-ch3.tiff", "r01c01f04p01-ch2.tiff", "r01c01f04p01-ch4.tiff", "r01c01f05p01-ch1.tiff",
"r01c01f05p01-ch2.tiff", "r01c01f06p01-ch2.tiff", "r01c01f06p01-ch4.tiff", "r01c01f09p01-ch3.tiff",
"r01c01f09p01-ch4.tiff", "r01c01f10p01-ch1.tiff", "r01c01f10p01-ch4.tiff", "r01c01f11p01-ch1.tiff",
"r01c01f11p01-ch2.tiff", "r01c01f11p01-ch3.tiff", "r01c01f11p01-ch4.tiff", "r01c02f10p01-ch1.tiff",
"r01c02f10p01-ch2.tiff", "r01c02f10p01-ch3.tiff", "r01c02f10p01-ch4.tiff"))
#To get actual input data in the format above
#Presuming this script is being executed from the directory where the `tiff` files are stored
dirnames <- data.frame(flenames = list.files(path = getwd(), pattern = "*.tiff"))
#Creating a column for the leading string, using which we can group the channels (stored in an accompanying column)
#Then grouping by the leading string, and nesting the channel and filename columns thereunder.
#Using the length of the nested tibble to consider files for removal.
#If the nested tibble contains 4 channels (i.e. length == 4), then keep. Else discard.
dirnames %<>%
mutate(fc1 = str_extract(flenames, ".+(?=\\-)")) %>% #Leading string
mutate(fc2 = str_extract(flenames, "(?<=\\-)\\w+")) %>% #Channel string
select(-c(flenames)) %>%
group_by(fc1) %>% #Grouping
nest(fc3 = fc2) %>% #Nesting
mutate(keep_val = if(length(unlist(fc3)) == 4){"Y"} else{"N"}) %>% #Checking to keep/discard
unnest(fc3) %>% #Unnesting to get back original data frame
mutate(flename = paste0(fc1, "-", fc2, ".tiff")) %>% #Getting the original filename back
ungroup(.) %>%
select(flename, keep_val) #Reordering data frame
#For loop to iterate through data frame, and issue file.remove() commands as necessary
for(i in 1:nrow(dirnames)){
if(dirnames$keep_val[i] == "N"){
cat("Removing file ", paste0(dirnames$flename[i]), "\n")
file.remove(dirnames$flename[i])
} else{
cat("Keeping file ", paste0(dirnames$flename[i]), "\n")
}
}
# Removing file r01c01f01p01-ch3.tiff
# Removing file r01c01f01p01-ch4.tiff
# Removing file r01c01f02p01-ch1.tiff
# Removing file r01c01f03p01-ch2.tiff
# Removing file r01c01f03p01-ch3.tiff
# Removing file r01c01f04p01-ch2.tiff
# Removing file r01c01f04p01-ch4.tiff
# Removing file r01c01f05p01-ch1.tiff
# Removing file r01c01f05p01-ch2.tiff
# Removing file r01c01f06p01-ch2.tiff
# Removing file r01c01f06p01-ch4.tiff
# Removing file r01c01f09p01-ch3.tiff
# Removing file r01c01f09p01-ch4.tiff
# Removing file r01c01f10p01-ch1.tiff
# Removing file r01c01f10p01-ch4.tiff
# Keeping file r01c01f11p01-ch1.tiff
# Keeping file r01c01f11p01-ch2.tiff
# Keeping file r01c01f11p01-ch3.tiff
# Keeping file r01c01f11p01-ch4.tiff
# Keeping file r01c02f10p01-ch1.tiff
# Keeping file r01c02f10p01-ch2.tiff
# Keeping file r01c02f10p01-ch3.tiff
# Keeping file r01c02f10p01-ch4.tiff
library(tidyr)
图书馆(dplyr)
图书馆(magrittr)
图书馆(stringr)
#输入数据
dirnames%#通道字符串
选择(-c(flenames))%>%
分组依据(fc1)%>%#分组
嵌套(fc3=fc2)%>%#嵌套
mutate(keep_val=if(length(unlist(fc3))=4){“Y”}否则{“N”})%>%#检查以保留/放弃
未测试(fc3)%>%#未测试以返回原始数据帧
mutate(flename=paste0(fc1,“-”,fc2,“.tiff”)%>%#恢复原始文件名
解组(%)%%>%
选择(flename,keep_val)#重新排列数据帧
#使循环遍历数据帧,并根据需要发出file.remove()命令
对于(i in 1:nrow(dirnames)){
if(dirnames$keep_val[i]=“N”){
cat(“删除文件”,粘贴0(dirnames$flename[i]),“\n”)
删除(dirnames$flename[i])
}否则{
cat(“保留文件”,粘贴0(dirnames$flename[i]),“\n”)
}
}
#正在删除文件r01c01f01p01-ch3.tiff
#正在删除文件r01c01f01p01-ch4.tiff
#正在删除文件r01c01f02p01-ch1.tiff
#正在删除文件r01c01f03p01-ch2.tiff
#正在删除文件r01c01f03p01-ch3.tiff
#正在删除文件r01c01f04p01-ch2.tiff
#正在删除文件r01c01f04p01-ch4.tiff
#正在删除文件r01c01f05p01-ch1.tiff
#正在删除文件r01c01f05p01-ch2.tiff
#正在删除文件r01c01f06p01-ch2.tiff
#正在删除文件r01c01f06p01-ch4.tiff
#正在删除文件r01c01f09p01-ch3.tiff
#正在删除文件r01c01f09p01-ch4.tiff
#正在删除文件r01c01f10p01-ch1.tiff
#正在删除文件r01c01f10p01-ch4.tiff
#保存文件r01c01f11p01-ch1.tiff
#保存文件r01c01f11p01-ch2.tiff
#保存文件r01c01f11p01-ch3.tiff
#保存文件r01c01f11p01-ch4.tiff
#保存文件r01c02f10p01-ch1.tiff
#保存文件r01c02f10p01-ch2.tiff
#保存文件r01c02f10p01-ch3.tiff
#保存文件r01c02f10p01-ch4.tiff
内的注释中包含对代码的解释
对于实际使用,您可以使用类似于dirnamesMy solution的内容获取输入文件列表:
library(tidyr)
library(dplyr)
library(magrittr)
library(stringr)
#Input data
dirnames <- data.frame(flenames = c("r01c01f01p01-ch3.tiff", "r01c01f01p01-ch4.tiff", "r01c01f02p01-ch1.tiff", "r01c01f03p01-ch2.tiff",
"r01c01f03p01-ch3.tiff", "r01c01f04p01-ch2.tiff", "r01c01f04p01-ch4.tiff", "r01c01f05p01-ch1.tiff",
"r01c01f05p01-ch2.tiff", "r01c01f06p01-ch2.tiff", "r01c01f06p01-ch4.tiff", "r01c01f09p01-ch3.tiff",
"r01c01f09p01-ch4.tiff", "r01c01f10p01-ch1.tiff", "r01c01f10p01-ch4.tiff", "r01c01f11p01-ch1.tiff",
"r01c01f11p01-ch2.tiff", "r01c01f11p01-ch3.tiff", "r01c01f11p01-ch4.tiff", "r01c02f10p01-ch1.tiff",
"r01c02f10p01-ch2.tiff", "r01c02f10p01-ch3.tiff", "r01c02f10p01-ch4.tiff"))
#To get actual input data in the format above
#Presuming this script is being executed from the directory where the `tiff` files are stored
dirnames <- data.frame(flenames = list.files(path = getwd(), pattern = "*.tiff"))
#Creating a column for the leading string, using which we can group the channels (stored in an accompanying column)
#Then grouping by the leading string, and nesting the channel and filename columns thereunder.
#Using the length of the nested tibble to consider files for removal.
#If the nested tibble contains 4 channels (i.e. length == 4), then keep. Else discard.
dirnames %<>%
mutate(fc1 = str_extract(flenames, ".+(?=\\-)")) %>% #Leading string
mutate(fc2 = str_extract(flenames, "(?<=\\-)\\w+")) %>% #Channel string
select(-c(flenames)) %>%
group_by(fc1) %>% #Grouping
nest(fc3 = fc2) %>% #Nesting
mutate(keep_val = if(length(unlist(fc3)) == 4){"Y"} else{"N"}) %>% #Checking to keep/discard
unnest(fc3) %>% #Unnesting to get back original data frame
mutate(flename = paste0(fc1, "-", fc2, ".tiff")) %>% #Getting the original filename back
ungroup(.) %>%
select(flename, keep_val) #Reordering data frame
#For loop to iterate through data frame, and issue file.remove() commands as necessary
for(i in 1:nrow(dirnames)){
if(dirnames$keep_val[i] == "N"){
cat("Removing file ", paste0(dirnames$flename[i]), "\n")
file.remove(dirnames$flename[i])
} else{
cat("Keeping file ", paste0(dirnames$flename[i]), "\n")
}
}
# Removing file r01c01f01p01-ch3.tiff
# Removing file r01c01f01p01-ch4.tiff
# Removing file r01c01f02p01-ch1.tiff
# Removing file r01c01f03p01-ch2.tiff
# Removing file r01c01f03p01-ch3.tiff
# Removing file r01c01f04p01-ch2.tiff
# Removing file r01c01f04p01-ch4.tiff
# Removing file r01c01f05p01-ch1.tiff
# Removing file r01c01f05p01-ch2.tiff
# Removing file r01c01f06p01-ch2.tiff
# Removing file r01c01f06p01-ch4.tiff
# Removing file r01c01f09p01-ch3.tiff
# Removing file r01c01f09p01-ch4.tiff
# Removing file r01c01f10p01-ch1.tiff
# Removing file r01c01f10p01-ch4.tiff
# Keeping file r01c01f11p01-ch1.tiff
# Keeping file r01c01f11p01-ch2.tiff
# Keeping file r01c01f11p01-ch3.tiff
# Keeping file r01c01f11p01-ch4.tiff
# Keeping file r01c02f10p01-ch1.tiff
# Keeping file r01c02f10p01-ch2.tiff
# Keeping file r01c02f10p01-ch3.tiff
# Keeping file r01c02f10p01-ch4.tiff
library(tidyr)
图书馆(dplyr)
图书馆(magrittr)
图书馆(stringr)
#输入数据
dirnames%#通道字符串
选择(-c(flenames))%>%
分组依据(fc1)%>%#分组
嵌套(fc3=fc2)%>%#嵌套
mutate(keep_val=if(length(unlist(fc3))=4){“Y”}否则{“N”})%>%#检查以保留/放弃
未测试(fc3)%>%#未测试以返回原始数据帧
mutate(flename=paste0(fc1,“-”,fc2,“.tiff”)%>%#恢复原始文件名
解组(%)%%>%
选择(flename,keep_val)#重新排列数据帧
#使循环遍历数据帧,并根据需要发出file.remove()命令
对于(i in 1:nrow(dirnames)){
if(dirnames$keep_val[i]=“N”){
cat(“删除文件”,粘贴0(dirnames$flename[i]),“\n”)
删除(dirnames$flename[i])
}否则{
cat(“保留文件”,粘贴0(dirnames$flename[i]),“\n”)
}
}
#正在删除文件r01c01f01p01-ch3.tiff
#正在删除文件r01c01f01p01-ch4.tiff
#正在删除文件r01c01f02p01-ch1.tiff
#正在删除文件r01c01f03p01-ch2.tiff
#正在删除文件r01c01f03p01-ch3.tiff
#正在删除文件r01c01f04p01-ch2.tiff
#正在删除文件r01c01f04p01-ch4.tiff
#正在删除文件r01c01f05p01-ch1.tiff
#正在删除文件r01c01f05p01-ch2.tiff
#正在删除文件r01c01f06p01-ch2.tiff
#正在删除文件r01c01f06p01-ch4.tiff
#正在删除文件r01c01f09p01-ch3.tiff
#正在删除文件r01c01f09p01-ch4.tiff
#正在删除文件r01c01f10p01-ch1.tiff
#正在删除文件r01c01f10p01-ch4.tiff
#保存文件r01c01f11p01-ch1.tiff
#保存文件r01c01f11p01-ch2.tiff
#保存文件r01c01f11p01-ch3.tiff
#保存文件r01c01f11p01-ch4.tiff
#保存文件r01c02f10p01-ch1.tiff
#保存文件r01c02f10p01-ch2.tiff
#保存文件r01c02f10p01-ch3.tiff
#保存文件r01c02f10p01-ch4.tiff
内的注释中包含对代码的解释
实际使用时,你可以得到一个输入文件的列表,比如“代码> DrimeNo”,以使事物保持最小(参见)和两个问题之间的独立关注点,你可以编辑这一个,仅仅是开始有一个需要移动的文件名列表的任务,这样你就可以专注于我编辑的任务,试图使它平缓下来。但从功能上讲,这两项任务是紧密交织在一起的。我更关心的是如何正确地筛选内容,我进行了编辑以反映这一点。为了使内容最小化(请参阅)并将两个问题之间的关注点分开,您可以将此问题编辑为仅包含需要移动的文件名列表的任务,这样您就可以专注于此
library(tidyr)
library(dplyr)
library(magrittr)
library(stringr)
#Input data
dirnames <- data.frame(flenames = c("r01c01f01p01-ch3.tiff", "r01c01f01p01-ch4.tiff", "r01c01f02p01-ch1.tiff", "r01c01f03p01-ch2.tiff",
"r01c01f03p01-ch3.tiff", "r01c01f04p01-ch2.tiff", "r01c01f04p01-ch4.tiff", "r01c01f05p01-ch1.tiff",
"r01c01f05p01-ch2.tiff", "r01c01f06p01-ch2.tiff", "r01c01f06p01-ch4.tiff", "r01c01f09p01-ch3.tiff",
"r01c01f09p01-ch4.tiff", "r01c01f10p01-ch1.tiff", "r01c01f10p01-ch4.tiff", "r01c01f11p01-ch1.tiff",
"r01c01f11p01-ch2.tiff", "r01c01f11p01-ch3.tiff", "r01c01f11p01-ch4.tiff", "r01c02f10p01-ch1.tiff",
"r01c02f10p01-ch2.tiff", "r01c02f10p01-ch3.tiff", "r01c02f10p01-ch4.tiff"))
#To get actual input data in the format above
#Presuming this script is being executed from the directory where the `tiff` files are stored
dirnames <- data.frame(flenames = list.files(path = getwd(), pattern = "*.tiff"))
#Creating a column for the leading string, using which we can group the channels (stored in an accompanying column)
#Then grouping by the leading string, and nesting the channel and filename columns thereunder.
#Using the length of the nested tibble to consider files for removal.
#If the nested tibble contains 4 channels (i.e. length == 4), then keep. Else discard.
dirnames %<>%
mutate(fc1 = str_extract(flenames, ".+(?=\\-)")) %>% #Leading string
mutate(fc2 = str_extract(flenames, "(?<=\\-)\\w+")) %>% #Channel string
select(-c(flenames)) %>%
group_by(fc1) %>% #Grouping
nest(fc3 = fc2) %>% #Nesting
mutate(keep_val = if(length(unlist(fc3)) == 4){"Y"} else{"N"}) %>% #Checking to keep/discard
unnest(fc3) %>% #Unnesting to get back original data frame
mutate(flename = paste0(fc1, "-", fc2, ".tiff")) %>% #Getting the original filename back
ungroup(.) %>%
select(flename, keep_val) #Reordering data frame
#For loop to iterate through data frame, and issue file.remove() commands as necessary
for(i in 1:nrow(dirnames)){
if(dirnames$keep_val[i] == "N"){
cat("Removing file ", paste0(dirnames$flename[i]), "\n")
file.remove(dirnames$flename[i])
} else{
cat("Keeping file ", paste0(dirnames$flename[i]), "\n")
}
}
# Removing file r01c01f01p01-ch3.tiff
# Removing file r01c01f01p01-ch4.tiff
# Removing file r01c01f02p01-ch1.tiff
# Removing file r01c01f03p01-ch2.tiff
# Removing file r01c01f03p01-ch3.tiff
# Removing file r01c01f04p01-ch2.tiff
# Removing file r01c01f04p01-ch4.tiff
# Removing file r01c01f05p01-ch1.tiff
# Removing file r01c01f05p01-ch2.tiff
# Removing file r01c01f06p01-ch2.tiff
# Removing file r01c01f06p01-ch4.tiff
# Removing file r01c01f09p01-ch3.tiff
# Removing file r01c01f09p01-ch4.tiff
# Removing file r01c01f10p01-ch1.tiff
# Removing file r01c01f10p01-ch4.tiff
# Keeping file r01c01f11p01-ch1.tiff
# Keeping file r01c01f11p01-ch2.tiff
# Keeping file r01c01f11p01-ch3.tiff
# Keeping file r01c01f11p01-ch4.tiff
# Keeping file r01c02f10p01-ch1.tiff
# Keeping file r01c02f10p01-ch2.tiff
# Keeping file r01c02f10p01-ch3.tiff
# Keeping file r01c02f10p01-ch4.tiff