Dataframe 在数据框对象中操纵和转换*txt

Dataframe 在数据框对象中操纵和转换*txt,dataframe,yolo,r,text,data-manipulation,Dataframe,Yolo,R,Text,Data Manipulation,我想用中的培训结果v3模型。但在我的示例中,*.txt中有一个非常复杂的输出对象: 原始文件 现在,我想创建一个数据帧,我知道模型中的每个I操作都是以load:expression和before开始和结束的 这个表达式我有“1:799.219543,799.219543平均值,0.000000速率,654.661284秒,24个图像”总是以数字开头,并且: (表示当前培训迭代/批次), 但我需要一些规则(信息不是必需的,从区域开始,每24行出现一次),首先只用于分离特定的培训迭代/批处理结果,

我想用中的培训结果
v3模型
。但在我的示例中,
*.txt
中有一个非常复杂的输出对象:

原始文件

现在,我想创建一个数据帧,我知道模型中的每个I操作都是以
load:
expression和before开始和结束的 这个表达式我有
“1:799.219543,799.219543平均值,0.000000速率,654.661284秒,24个图像”
总是以数字开头,并且
(表示当前培训迭代/批次), 但我需要一些规则(信息不是必需的,从
区域开始,每24行出现一次),首先只用于分离特定的培训迭代/批处理结果,如:

1: 799.219543, 799.219543 avg, 0.000000 rate, 654.661284 seconds, 24 images
2: 799.555359, 799.253113 avg, 0.000000 rate, 672.519735 seconds, 48 images
...
55: 1025.803833, 1181.399658 avg, 0.000000 rate, 919.132681 seconds, 1320 images
在为创建最终数据帧进行一些新操作之后:

iteration  total_loss  loss_error  rate      time       n_images       
1          799.219543  799.219543  0.000000  654.661284 24
2          799.555359  799.253113  0.000000  672.519735 48
...
55         1025.803833 1181.399658 0.000000  919.132681  1320

请注意,已经处理过这种类型文件的人有什么建议吗?

您可以使用
读取行
并使用
grep
对以数字开头、后跟冒号的行使用子集。之后,使用
strsplit
gsub
进行一些清洁,将
转换为.numeric
设置名称
。完成了

tmp <- readLines("log.txt") 
tmp <- tmp[grep("^\\d*\\:", unlist(tmp))]
tmp <- do.call(rbind, strsplit(tmp, ", "))
tmp <- data.frame(do.call(rbind, strsplit(tmp[, 1], "\\: ")), tmp[, -1], 
                  stringsAsFactors=FALSE)
tmp[] <- lapply(tmp, gsub, pat="\\s.+", repl="")
tmp[] <- lapply(tmp, as.numeric)
res <- setNames(tmp, c("iteration", "total_loss", "loss_error", "rate", 
                       "time", "n_images"))

head(res)
#   iteration total_loss loss_error rate     time n_images
# 1         1   799.2195   799.2195    0 654.6613       24
# 2         2   799.5554   799.2531    0 672.5197       48
# 3         3   801.0438   799.4322    0 667.1184       72
# 4         4   799.9001   799.4790    0 647.3321       96
# 5         5   801.5366   799.6848    0 660.7798      120
# 6         6   799.3589   799.6522    0 683.4424      144

tmp带
tidyverse
包装工作:

library(tidyverse)
myfile <- read_lines("log.txt")

names_col <- c("iteration", "total_loss", "loss_error", "rate", "time",
"n_images")

mydf <- myfile %>%
  str_subset("images$") %>%
  enframe(name = NULL) %>%
  separate(col = value, into = names_col, sep = "[:,]") %>%
  mutate_all(parse_number)

head(as.data.frame(mydf))
#  iteration total_loss loss_error rate     time n_images
# 1         1   799.2195   799.2195    0 654.6613       24
# 2         2   799.5554   799.2531    0 672.5197       48
# 3         3   801.0438   799.4322    0 667.1184       72
# 4         4   799.9001   799.4790    0 647.3321       96
# 5         5   801.5366   799.6848    0 660.7798      120
# 6         6   799.3589   799.6522    0 683.4424      144
库(tidyverse)
我的文件%
分离(列=值,进入=名称_列,sep=“[:,]”)%>%
全部变异(解析编号)
头部(如数据帧(mydf))
#迭代总损失错误率时间n图像
# 1         1   799.2195   799.2195    0 654.6613       24
# 2         2   799.5554   799.2531    0 672.5197       48
# 3         3   801.0438   799.4322    0 667.1184       72
# 4         4   799.9001   799.4790    0 647.3321       96
# 5         5   801.5366   799.6848    0 660.7798      120
# 6         6   799.3589   799.6522    0 683.4424      144
tmp <- readLines("log.txt") 
tmp <- tmp[grep("^\\d*\\:", unlist(tmp))]
tmp <- do.call(rbind, strsplit(tmp, ", "))
tmp <- data.frame(do.call(rbind, strsplit(tmp[, 1], "\\: ")), tmp[, -1], 
                  stringsAsFactors=FALSE)
tmp[] <- lapply(tmp, gsub, pat="\\s.+", repl="")
tmp[] <- lapply(tmp, as.numeric)
res <- setNames(tmp, c("iteration", "total_loss", "loss_error", "rate", 
                       "time", "n_images"))

head(res)
#   iteration total_loss loss_error rate     time n_images
# 1         1   799.2195   799.2195    0 654.6613       24
# 2         2   799.5554   799.2531    0 672.5197       48
# 3         3   801.0438   799.4322    0 667.1184       72
# 4         4   799.9001   799.4790    0 647.3321       96
# 5         5   801.5366   799.6848    0 660.7798      120
# 6         6   799.3589   799.6522    0 683.4424      144
library(tidyverse)
myfile <- read_lines("log.txt")

names_col <- c("iteration", "total_loss", "loss_error", "rate", "time",
"n_images")

mydf <- myfile %>%
  str_subset("images$") %>%
  enframe(name = NULL) %>%
  separate(col = value, into = names_col, sep = "[:,]") %>%
  mutate_all(parse_number)

head(as.data.frame(mydf))
#  iteration total_loss loss_error rate     time n_images
# 1         1   799.2195   799.2195    0 654.6613       24
# 2         2   799.5554   799.2531    0 672.5197       48
# 3         3   801.0438   799.4322    0 667.1184       72
# 4         4   799.9001   799.4790    0 647.3321       96
# 5         5   801.5366   799.6848    0 660.7798      120
# 6         6   799.3589   799.6522    0 683.4424      144