如何将in.HTM作为数据帧读入R?
我对我的一些交易进行回溯测试,我有非常大的如何将in.HTM作为数据帧读入R?,r,data-science,data-manipulation,data-cleaning,R,Data Science,Data Manipulation,Data Cleaning,我对我的一些交易进行回溯测试,我有非常大的.HTM本地文件。它们大约是250mb一块,我很难将它们读入R来对数据集进行一些数据分析。我曾尝试将本地文件中的数据复制并粘贴到excel中,然后读取到R中,但该文件太大,我无法在网页或excel崩溃之前将所有数据成功复制到excel中 我正试图进行以下工作: 1.将.htm文件成功读入R 2.通过下表解析页面顶部的文本和页面中心的图像 3.将具有列和行的数据放入数据框中进行数据分析 有人有什么精明的想法来阅读这些数据吗?我尝试过使用包RCurl、rli
.HTM
本地文件。它们大约是250mb一块,我很难将它们读入R来对数据集进行一些数据分析。我曾尝试将本地文件中的数据复制并粘贴到excel中,然后读取到R中,但该文件太大,我无法在网页或excel崩溃之前将所有数据成功复制到excel中
我正试图进行以下工作:
1.将.htm
文件成功读入R
2.通过下表解析页面顶部的文本和页面中心的图像
3.将具有列和行的数据放入数据框中进行数据分析
有人有什么精明的想法来阅读这些数据吗?我尝试过使用包RCurl
、rlist
和XML
读取.lines,但没有成功
如有任何帮助/反馈,我将不胜感激
类似于:
require(dplyr)
require(plyer)
require(data.table)
require(jsonlite)
require(httr)
require(rvest)
require(XML)
require(plyr)
COLORDER = c("symbol", "code", "type", "expiry", "strike", "premium", "bid", "ask", "volume", "open.interest", "retrieved")
# AUSTRALIAN OPTIONS --------------------------------------------------------------------------------------------------
# ASX is the Australian Securities Exchange.
URLASX = 'http://www.asx.com.au/asx/markets/optionPrices.do?by=underlyingCode&underlyingCode=%s&expiryDate=&optionType=B'
getOptionChainAsx <- function(symbol) {
url = sprintf(URLASX, symbol)
html <- read_html(url)
# Use the second element in the list (the first element gives data on the underlying stock)
#
options = (html %>% html_nodes("table.options") %>% html_table(header = TRUE))[[2]] %>%
plyr::rename(c("Bid" = "bid", "Offer" = "ask", "Openinterest" = "open.interest", "Volume" = "volume", "Expirydate" = "expiry",
"P/C" = "type", "Margin Price" = "premium", "Exercise" = "strike", "Code" = "code")) %>%
transform(
symbol = symbol,
code = code,
retrieved = Sys.time(),
open.interest = suppressWarnings(as.integer(gsub(",", "", open.interest))),
premium = suppressWarnings(as.numeric(premium)),
bid = suppressWarnings(as.numeric(bid)),
ask = suppressWarnings(as.numeric(ask)),
volume = suppressWarnings(as.integer(gsub(",", "", volume))),
expiry = as.Date(expiry, format = "%d/%m/%Y")
) %>% dplyr::arrange(type, strike, expiry)
options[, COLORDER]
}
getOptionChainAsx("BHP")
require(dplyr)
需要(plyer)
要求(数据表)
require(jsonlite)
要求(httr)
要求(rvest)
需要(XML)
需要(plyr)
着色剂=c(“符号”、“代码”、“类型”、“到期日”、“罢工”、“溢价”、“出价”、“询问”、“数量”、“未结利息”、“已检索”)
#澳大利亚选择--------------------------------------------------------------------------------------------------
#澳大利亚证券交易所是澳大利亚证券交易所。
URLASX=http://www.asx.com.au/asx/markets/optionPrices.do?by=underlyingCode&underlyingCode=%s&expiryDate=&optionType=B'
getOptionChainAsx%html_表(header=TRUE))[[2]]%>%
plyr::重命名(c(“投标”=“投标”,“报价”=“询问”,“开放兴趣”=“开放兴趣”,“数量”=“数量”,“到期日”=“到期日”,
“P/C”=“类型”,“保证金价格”=“溢价”,“行权”=“行权”,“代码”=“代码”)))%>%
转化(
符号=符号,
代码=代码,
已检索=Sys.time(),
open.interest=suppressWarnings(作为.integer(gsub(“,”,“”,open.interest)),
高级=抑制警告(如.numeric(高级)),
bid=suppressWarnings(作为.numeric(bid)),
ask=抑制警告(如.numeric(ask)),
volume=suppressWarnings(作为.integer(gsub(“,”,“”,volume)),
到期日=截止日期(到期日,格式=“%d/%m/%Y”)
)%%>%dplyr::排列(类型、罢工、到期)
选项[,着色器]
}
getOptionChainAsx(“必和必拓”)
类似于:
require(dplyr)
require(plyer)
require(data.table)
require(jsonlite)
require(httr)
require(rvest)
require(XML)
require(plyr)
COLORDER = c("symbol", "code", "type", "expiry", "strike", "premium", "bid", "ask", "volume", "open.interest", "retrieved")
# AUSTRALIAN OPTIONS --------------------------------------------------------------------------------------------------
# ASX is the Australian Securities Exchange.
URLASX = 'http://www.asx.com.au/asx/markets/optionPrices.do?by=underlyingCode&underlyingCode=%s&expiryDate=&optionType=B'
getOptionChainAsx <- function(symbol) {
url = sprintf(URLASX, symbol)
html <- read_html(url)
# Use the second element in the list (the first element gives data on the underlying stock)
#
options = (html %>% html_nodes("table.options") %>% html_table(header = TRUE))[[2]] %>%
plyr::rename(c("Bid" = "bid", "Offer" = "ask", "Openinterest" = "open.interest", "Volume" = "volume", "Expirydate" = "expiry",
"P/C" = "type", "Margin Price" = "premium", "Exercise" = "strike", "Code" = "code")) %>%
transform(
symbol = symbol,
code = code,
retrieved = Sys.time(),
open.interest = suppressWarnings(as.integer(gsub(",", "", open.interest))),
premium = suppressWarnings(as.numeric(premium)),
bid = suppressWarnings(as.numeric(bid)),
ask = suppressWarnings(as.numeric(ask)),
volume = suppressWarnings(as.integer(gsub(",", "", volume))),
expiry = as.Date(expiry, format = "%d/%m/%Y")
) %>% dplyr::arrange(type, strike, expiry)
options[, COLORDER]
}
getOptionChainAsx("BHP")
require(dplyr)
需要(plyer)
要求(数据表)
require(jsonlite)
要求(httr)
要求(rvest)
需要(XML)
需要(plyr)
着色剂=c(“符号”、“代码”、“类型”、“到期日”、“罢工”、“溢价”、“出价”、“询问”、“数量”、“未结利息”、“已检索”)
#澳大利亚选择--------------------------------------------------------------------------------------------------
#澳大利亚证券交易所是澳大利亚证券交易所。
URLASX=http://www.asx.com.au/asx/markets/optionPrices.do?by=underlyingCode&underlyingCode=%s&expiryDate=&optionType=B'
getOptionChainAsx%html_表(header=TRUE))[[2]]%>%
plyr::重命名(c(“投标”=“投标”,“报价”=“询问”,“开放兴趣”=“开放兴趣”,“数量”=“数量”,“到期日”=“到期日”,
“P/C”=“类型”,“保证金价格”=“溢价”,“行权”=“行权”,“代码”=“代码”)))%>%
转化(
符号=符号,
代码=代码,
已检索=Sys.time(),
open.interest=suppressWarnings(作为.integer(gsub(“,”,“”,open.interest)),
高级=抑制警告(如.numeric(高级)),
bid=suppressWarnings(作为.numeric(bid)),
ask=抑制警告(如.numeric(ask)),
volume=suppressWarnings(作为.integer(gsub(“,”,“”,volume)),
到期日=截止日期(到期日,格式=“%d/%m/%Y”)
)%%>%dplyr::排列(类型、罢工、到期)
选项[,着色器]
}
getOptionChainAsx(“必和必拓”)
正如您提到的,您的.HTM
文件是本地文件,因此要读取本地文件,下面是代码:
rawHTML <- paste(readLines("path/toYour/file.html"), collapse="\n")
rawHTML正如您所提到的,您的.HTM
文件是本地文件,因此要读取本地文件,下面是代码:
rawHTML <- paste(readLines("path/toYour/file.html"), collapse="\n")
rawHTML