C# 如何将excel文件读取到数据框?
是否可以将xls文件中的数据直接读取到数据帧?我发现Deedle.Excel有openExcel方法,但无法使用它。如果可能,请提供示例。在安装deedle.excel和ExcelDataReader.DataSet后,我使用以下代码(我不确定这是否是最好的,但至少它可以工作):C# 如何将excel文件读取到数据框?,c#,deedle,C#,Deedle,是否可以将xls文件中的数据直接读取到数据帧?我发现Deedle.Excel有openExcel方法,但无法使用它。如果可能,请提供示例。在安装deedle.excel和ExcelDataReader.DataSet后,我使用以下代码(我不确定这是否是最好的,但至少它可以工作): 使用Deedle; 使用ExcelDataReader; //你的代码被省略了 ... //下面是关键代码 框架mydf; 使用(var stream=File.Open(myFilePath,FileMode.Ope
使用Deedle;
使用ExcelDataReader;
//你的代码被省略了
...
//下面是关键代码
框架mydf;
使用(var stream=File.Open(myFilePath,FileMode.Open,FileAccess.Read))
{
使用(var reader=ExcelReaderFactory.CreateReader(流))
{
var result=reader.AsDataSet(新的ExcelDataSetConfiguration()
{
UseColumnDataType=true,
ConfigureDataTable=(\u)=>new ExcelDataTableConfiguration()
{
UseHeaderRow=true,//使用第一行作为标题
}
});
DataTableReader rd=result.Tables[0]。CreateDataReader();//只需使用第一张工作表
mydf=Frame.ReadReader(rd);
}
}
//使用下面的mydf
...
不久前,我创建了一个库,可以读取xlsb、xlsx、xls、csv和txt,并将其转换为Deedle框架。它还将Deedle框架放回Excel电子表格。xlsx部件使用EPPlus,因此不需要驱动程序
以下是代码XLS加载部分的脚本版本(为了简洁起见,我省略了其他功能):
#r@“C:\Users\flavi\.nuget\packages\deedle\2.1.2\lib\net45\deedle.dll”
#r@“C:\Users\flavi\.nuget\packages\epplus\5.0.4\lib\net45\epplus.dll”
开放式Deedle
模块专用函数=
让adjustStartCell(startRow,startCol)=最大startRow 1,最大startCol 1//防止输入小于1时出现错误
模块专用XLSX=
OpenOfficeOpenXML
开放系统
开放函数
让加载(fi:FileInfo)(sheetName:string)startine startCol hashreaders takeUntil skipCols=
使用(新的ExcelPackage(fi))(乐趣p->
让wsOpt=p.Workbook.Worksheets。[sheetName]|>Option.ofObj
将wsOpt与匹配
|一些ws->
让ADJSTARTINE,adjStartCol=adjustStartCell(STARTINE,startCol)//防止输入小于1时出错
设linePlusHeaders=adj+如果hashheaders,则1 else 0//跳过1行,以防出现标题
让lastRow=ws.Dimension.End.Row//获取给定工作表的最后一行,以知道何时停止获取数据
让filteredSkipCols=skipCols |>List.filter(乐趣x->x“”)
Seq.initInfinite(fun i->(i+adjStartCol,string ws.Cells.[adjstartine,adjStartCol+i].Value))//inits列维度映射
|>Seq.takeWhile(乐趣)->
匹配元素
|null |“”->false//在找到第一个空单元格时停止获取数据
|txt当txt=takeUntil->false//或提供特定值时
|_->正确)
|>(fun h->if hashheader然后h else h |>Seq.mapi(fun i(c,|)->(c,string(i+1)))
|>Seq.map(趣味(col,header)->
Seq.initInfinite(乐趣i->//开始获取行数据
i、 标头,ws.Cells。[i+linePlusHeaders,col].Value)
|>Seq.takeWhile(fun(row,u,)->row(fun s->if filteredSkipCols[]然后s |>Seq.filter(fun(hd,)->filteredSkipCols |>List.contains hd |>not)其他列)//
|>Seq.concat |>Frame.ofValues |>Ok
|无->结果。错误(“未找到工作表:“+sheetName+”))
模块专用控制器=
开放系统
让加载文件路径sheetName startine startCol hashreaders takeUntil skipCols=
尝试
let file=new FileInfo(filePath)
将file.Exists、file.Extension.ToLower()与
|false,->Result.Error(“文件不存在:“+filePath”)
||,“.xlsx”|,“.xls”->xlsx.load文件表名startine startCol hashreaders takeUntil skipCols
|->Result.Error(“无效的文件扩展名:”+文件扩展名)
具有
|ex->Result.Error(ex.Message)
[]
类型API private()=
静态让getDefaultParams startcell哈希头函数takeUntil skipCols sheetName=
let(startine,startCol)=defaultArg startcell(1,1)
让hasHeaders0=defaultArg hasHeaders为true
let takeUntil0=defaultArg takeUntil“”
设skipCols0=defaultArg skipCols[]
let sheetName0=defaultArg sheetName“Sheet1”
((startLine、startCol)、HashHeaders0、takeUntil0、skipCols0、sheetName0)
静态成员安全加载(文件路径、sheetName、startcell、HashReaders、TakeTill、skipCols)=
let(startine,startCol),hashreaders0,takeUntil0,skipCols0,sheetName0=getDefaultParams startcell hashreaders takeUntil skipCols sheetName
Controller.load文件路径sheetName0 startLine startCol HashHeaders0 takeUntil0 skipCols0
静态成员未安全加载(文件路径、sheetName、startcell、哈希头、TakeTill、skipCols)=
let(startine,startCol),hashreaders0,takeUntil0,skipCols0,sheetName0=getDefaultParams startcell hashreaders takeUntil skipCols sheetName
匹配Controller.load文件路径sheetName0 startine startCol hashreaders0 takeUntil0 skipCols0与
|Ok df->df
|结果.错误e->failwith(e)
//如何使用的示例:
API.unsafeLoad(@“C:\someFolder\someFile.xlsx”,“SomeSheet”)
您对Deedle.Excel使用了什么连接字符串?您使用了JET或ACE驱动程序吗?您的Excel文件是xls(Excel 2003)还是xlsx(Excel 2007+)?xls。我需要任何驱动程序吗
using Deedle;
using ExcelDataReader;
// your code is omit
...
// below is the key code
Frame<int, string> mydf;
using (var stream = File.Open(myFilePath, FileMode.Open, FileAccess.Read))
{
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
var result = reader.AsDataSet(new ExcelDataSetConfiguration()
{
UseColumnDataType = true,
ConfigureDataTable = (_) => new ExcelDataTableConfiguration()
{
UseHeaderRow = true, // to use the first row as header
}
});
DataTableReader rd = result.Tables[0].CreateDataReader(); //just use the first sheet
mydf = Frame.ReadReader(rd);
}
}
// use mydf below
...
#r @"C:\Users\flavi\.nuget\packages\deedle\2.1.2\lib\net45\Deedle.dll"
#r @"C:\Users\flavi\.nuget\packages\epplus\5.0.4\lib\net45\EPPlus.dll"
open Deedle
module private Functions =
let adjustStartCell (startRow,startCol) = max startRow 1,max startCol 1 //prevents errors if input is smaller than 1
module private XLSX =
open OfficeOpenXml
open System.IO
open Functions
let load (fi : FileInfo) (sheetName:string) startLine startCol hasHeaders takeUntil skipCols =
using (new ExcelPackage(fi)) (fun p ->
let wsOpt = p.Workbook.Worksheets.[sheetName] |> Option.ofObj
match wsOpt with
|Some ws ->
let adjStartLine,adjStartCol = adjustStartCell (startLine,startCol) //prevents errors if input is smaller than 1
let linePlusHeaders = adjStartLine + if hasHeaders then 1 else 0 //skips 1 line in case there's a header
let lastRow = ws.Dimension.End.Row //gets last row of the given sheet to know when to stop getting data
let filteredSkipCols = skipCols |> List.filter (fun x -> x<>"")
Seq.initInfinite (fun i -> (i+adjStartCol,string ws.Cells.[adjStartLine,adjStartCol+i].Value)) //inits col dimension mapping
|> Seq.takeWhile (fun (_,elem) ->
match elem with
|null|"" -> false //stops taking data when founds the first empty cell
|txt when txt = takeUntil -> false //or when a specific value is provided
|_ -> true)
|> (fun h -> if hasHeaders then h else h |> Seq.mapi (fun i (c,_) -> (c,string (i+1))))
|> Seq.map (fun (col,header) ->
Seq.initInfinite (fun i -> //starts getting row data
i,header,ws.Cells.[i+linePlusHeaders,col].Value)
|> Seq.takeWhile (fun (row,_,_) -> row<=lastRow-linePlusHeaders)
|> (fun s -> if filteredSkipCols<>[] then s |> Seq.filter (fun (_,hd,_) -> filteredSkipCols |> List.contains hd |> not) else s)) //filter columns
|> Seq.concat |> Frame.ofValues |> Ok
|None -> Result.Error("Worksheet not found: " + sheetName + "."))
module private Controller =
open System.IO
let load filePath sheetName startLine startCol hasHeaders takeUntil skipCols =
try
let file = new FileInfo(filePath)
match file.Exists,file.Extension.ToLower() with
| false,_ -> Result.Error ("The file does not exist: " + filePath)
| _,".xlsx" | _,".xls" -> XLSX.load file sheetName startLine startCol hasHeaders takeUntil skipCols
| _ -> Result.Error("Invalid file extension: " + file.Extension)
with
|ex -> Result.Error(ex.Message)
[<AbstractClass; Sealed>]
type API private() =
static let getDefaultParams startcell hasHeaders takeUntil skipCols sheetName =
let (startLine,startCol) = defaultArg startcell (1,1)
let hasHeaders0 = defaultArg hasHeaders true
let takeUntil0 = defaultArg takeUntil ""
let skipCols0 = defaultArg skipCols []
let sheetName0 = defaultArg sheetName "Sheet1"
((startLine,startCol),hasHeaders0,takeUntil0,skipCols0,sheetName0)
static member safeLoad(filePath, ?sheetName, ?startcell, ?hasHeaders, ?takeUntil, ?skipCols) =
let (startLine,startCol),hasHeaders0,takeUntil0,skipCols0,sheetName0 = getDefaultParams startcell hasHeaders takeUntil skipCols sheetName
Controller.load filePath sheetName0 startLine startCol hasHeaders0 takeUntil0 skipCols0
static member unsafeLoad(filePath, ?sheetName, ?startcell, ?hasHeaders, ?takeUntil, ?skipCols) =
let (startLine,startCol),hasHeaders0,takeUntil0,skipCols0,sheetName0 = getDefaultParams startcell hasHeaders takeUntil skipCols sheetName
match Controller.load filePath sheetName0 startLine startCol hasHeaders0 takeUntil0 skipCols0 with
|Ok df -> df
|Result.Error e -> failwith(e)
//example of how to use:
API.unsafeLoad(@"C:\someFolder\someFile.xlsx","SomeSheet")