Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/276.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 如何将excel文件读取到数据框?_C#_Deedle - Fatal编程技术网

C# 如何将excel文件读取到数据框?

C# 如何将excel文件读取到数据框?,c#,deedle,C#,Deedle,是否可以将xls文件中的数据直接读取到数据帧?我发现Deedle.Excel有openExcel方法,但无法使用它。如果可能,请提供示例。在安装deedle.excel和ExcelDataReader.DataSet后,我使用以下代码(我不确定这是否是最好的,但至少它可以工作): 使用Deedle; 使用ExcelDataReader; //你的代码被省略了 ... //下面是关键代码 框架mydf; 使用(var stream=File.Open(myFilePath,FileMode.Ope

是否可以将xls文件中的数据直接读取到数据帧?我发现Deedle.Excel有openExcel方法,但无法使用它。如果可能,请提供示例。

在安装deedle.excel和ExcelDataReader.DataSet后,我使用以下代码(我不确定这是否是最好的,但至少它可以工作):

使用Deedle;
使用ExcelDataReader;
//你的代码被省略了
...
//下面是关键代码
框架mydf;
使用(var stream=File.Open(myFilePath,FileMode.Open,FileAccess.Read))
{
使用(var reader=ExcelReaderFactory.CreateReader(流))
{
var result=reader.AsDataSet(新的ExcelDataSetConfiguration()
{
UseColumnDataType=true,
ConfigureDataTable=(\u)=>new ExcelDataTableConfiguration()
{
UseHeaderRow=true,//使用第一行作为标题
}
});
DataTableReader rd=result.Tables[0]。CreateDataReader();//只需使用第一张工作表
mydf=Frame.ReadReader(rd);
}
}
//使用下面的mydf
...

不久前,我创建了一个库,可以读取xlsb、xlsx、xls、csv和txt,并将其转换为Deedle框架。它还将Deedle框架放回Excel电子表格。xlsx部件使用EPPlus,因此不需要驱动程序

以下是代码XLS加载部分的脚本版本(为了简洁起见,我省略了其他功能):

#r@“C:\Users\flavi\.nuget\packages\deedle\2.1.2\lib\net45\deedle.dll”
#r@“C:\Users\flavi\.nuget\packages\epplus\5.0.4\lib\net45\epplus.dll”
开放式Deedle
模块专用函数=
让adjustStartCell(startRow,startCol)=最大startRow 1,最大startCol 1//防止输入小于1时出现错误
模块专用XLSX=
OpenOfficeOpenXML
开放系统
开放函数
让加载(fi:FileInfo)(sheetName:string)startine startCol hashreaders takeUntil skipCols=
使用(新的ExcelPackage(fi))(乐趣p->
让wsOpt=p.Workbook.Worksheets。[sheetName]|>Option.ofObj
将wsOpt与匹配
|一些ws->
让ADJSTARTINE,adjStartCol=adjustStartCell(STARTINE,startCol)//防止输入小于1时出错
设linePlusHeaders=adj+如果hashheaders,则1 else 0//跳过1行,以防出现标题
让lastRow=ws.Dimension.End.Row//获取给定工作表的最后一行,以知道何时停止获取数据
让filteredSkipCols=skipCols |>List.filter(乐趣x->x“”)
Seq.initInfinite(fun i->(i+adjStartCol,string ws.Cells.[adjstartine,adjStartCol+i].Value))//inits列维度映射
|>Seq.takeWhile(乐趣)->
匹配元素
|null |“”->false//在找到第一个空单元格时停止获取数据
|txt当txt=takeUntil->false//或提供特定值时
|_->正确)
|>(fun h->if hashheader然后h else h |>Seq.mapi(fun i(c,|)->(c,string(i+1)))
|>Seq.map(趣味(col,header)->
Seq.initInfinite(乐趣i->//开始获取行数据
i、 标头,ws.Cells。[i+linePlusHeaders,col].Value)
|>Seq.takeWhile(fun(row,u,)->row(fun s->if filteredSkipCols[]然后s |>Seq.filter(fun(hd,)->filteredSkipCols |>List.contains hd |>not)其他列)//
|>Seq.concat |>Frame.ofValues |>Ok
|无->结果。错误(“未找到工作表:“+sheetName+”))
模块专用控制器=
开放系统
让加载文件路径sheetName startine startCol hashreaders takeUntil skipCols=
尝试
let file=new FileInfo(filePath)
将file.Exists、file.Extension.ToLower()与
|false,->Result.Error(“文件不存在:“+filePath”)
||,“.xlsx”|,“.xls”->xlsx.load文件表名startine startCol hashreaders takeUntil skipCols
|->Result.Error(“无效的文件扩展名:”+文件扩展名)
具有
|ex->Result.Error(ex.Message)
[]
类型API private()=
静态让getDefaultParams startcell哈希头函数takeUntil skipCols sheetName=
let(startine,startCol)=defaultArg startcell(1,1)
让hasHeaders0=defaultArg hasHeaders为true
let takeUntil0=defaultArg takeUntil“”
设skipCols0=defaultArg skipCols[]
let sheetName0=defaultArg sheetName“Sheet1”
((startLine、startCol)、HashHeaders0、takeUntil0、skipCols0、sheetName0)
静态成员安全加载(文件路径、sheetName、startcell、HashReaders、TakeTill、skipCols)=
let(startine,startCol),hashreaders0,takeUntil0,skipCols0,sheetName0=getDefaultParams startcell hashreaders takeUntil skipCols sheetName
Controller.load文件路径sheetName0 startLine startCol HashHeaders0 takeUntil0 skipCols0
静态成员未安全加载(文件路径、sheetName、startcell、哈希头、TakeTill、skipCols)=
let(startine,startCol),hashreaders0,takeUntil0,skipCols0,sheetName0=getDefaultParams startcell hashreaders takeUntil skipCols sheetName
匹配Controller.load文件路径sheetName0 startine startCol hashreaders0 takeUntil0 skipCols0与
|Ok df->df
|结果.错误e->failwith(e)
//如何使用的示例:
API.unsafeLoad(@“C:\someFolder\someFile.xlsx”,“SomeSheet”)

您对Deedle.Excel使用了什么连接字符串?您使用了JET或ACE驱动程序吗?您的Excel文件是xls(Excel 2003)还是xlsx(Excel 2007+)?xls。我需要任何驱动程序吗
using Deedle;
using ExcelDataReader;

// your code is omit
...
// below is the key code
            Frame<int, string> mydf;

            using (var stream = File.Open(myFilePath, FileMode.Open, FileAccess.Read))
            {
                using (var reader = ExcelReaderFactory.CreateReader(stream))
                {
                    var result = reader.AsDataSet(new ExcelDataSetConfiguration()
                    {
                        UseColumnDataType = true,
                        ConfigureDataTable = (_) => new ExcelDataTableConfiguration()
                        {
                            UseHeaderRow = true,  // to use the first row as header
                        }
                    });
                    DataTableReader rd = result.Tables[0].CreateDataReader();  //just use the first sheet
                    mydf = Frame.ReadReader(rd);
                }
            }

// use mydf below
...
#r @"C:\Users\flavi\.nuget\packages\deedle\2.1.2\lib\net45\Deedle.dll"
#r @"C:\Users\flavi\.nuget\packages\epplus\5.0.4\lib\net45\EPPlus.dll"

open Deedle

module private Functions =
    let adjustStartCell (startRow,startCol) = max startRow 1,max startCol 1        //prevents errors if input is smaller than 1

module private XLSX = 
    open OfficeOpenXml
    open System.IO
    open Functions

    let load (fi : FileInfo) (sheetName:string) startLine startCol hasHeaders takeUntil skipCols =
        using (new ExcelPackage(fi)) (fun p ->            
            let wsOpt = p.Workbook.Worksheets.[sheetName] |> Option.ofObj
            match wsOpt with
            |Some ws ->           
                let adjStartLine,adjStartCol = adjustStartCell (startLine,startCol)      //prevents errors if input is smaller than 1
                let linePlusHeaders = adjStartLine + if hasHeaders then 1 else 0    //skips 1 line in case there's a header
                let lastRow = ws.Dimension.End.Row              //gets last row of the given sheet to know when to stop getting data
                let filteredSkipCols = skipCols |> List.filter (fun x -> x<>"")
                Seq.initInfinite (fun i -> (i+adjStartCol,string ws.Cells.[adjStartLine,adjStartCol+i].Value))  //inits col dimension mapping
                |> Seq.takeWhile (fun (_,elem) ->
                                match elem with
                                |null|"" -> false                   //stops taking data when founds the first empty cell
                                |txt when txt = takeUntil -> false  //or when a specific value is provided
                                |_ -> true)
                |> (fun h -> if hasHeaders then h else h |> Seq.mapi (fun i (c,_) -> (c,string (i+1))))
                |> Seq.map (fun (col,header) ->
                Seq.initInfinite (fun i ->      //starts getting row data
                i,header,ws.Cells.[i+linePlusHeaders,col].Value)
                |> Seq.takeWhile (fun (row,_,_) -> row<=lastRow-linePlusHeaders)
                |> (fun s -> if filteredSkipCols<>[] then s |> Seq.filter (fun (_,hd,_) -> filteredSkipCols |> List.contains hd |> not) else s)) //filter columns
                |> Seq.concat |> Frame.ofValues |> Ok                    
            |None -> Result.Error("Worksheet not found: " + sheetName + "."))

module private Controller =
    open System.IO

    let load filePath sheetName startLine startCol hasHeaders takeUntil skipCols =
        try 
            let file = new FileInfo(filePath)
            match file.Exists,file.Extension.ToLower() with
            | false,_ -> Result.Error ("The file does not exist: " + filePath)
            | _,".xlsx" | _,".xls" -> XLSX.load file sheetName startLine startCol hasHeaders takeUntil skipCols
            | _ -> Result.Error("Invalid file extension: " + file.Extension)
        with 
            |ex -> Result.Error(ex.Message)

[<AbstractClass; Sealed>]
type API private() =
    static let getDefaultParams startcell hasHeaders takeUntil skipCols sheetName =
        let (startLine,startCol) = defaultArg startcell (1,1)
        let hasHeaders0 = defaultArg hasHeaders true
        let takeUntil0 = defaultArg takeUntil ""
        let skipCols0 = defaultArg skipCols []
        let sheetName0 = defaultArg sheetName "Sheet1"
        ((startLine,startCol),hasHeaders0,takeUntil0,skipCols0,sheetName0)

    static member safeLoad(filePath, ?sheetName, ?startcell, ?hasHeaders, ?takeUntil, ?skipCols) =
        let (startLine,startCol),hasHeaders0,takeUntil0,skipCols0,sheetName0 = getDefaultParams startcell hasHeaders takeUntil skipCols sheetName
        Controller.load filePath sheetName0 startLine startCol hasHeaders0 takeUntil0 skipCols0

    static member unsafeLoad(filePath, ?sheetName, ?startcell, ?hasHeaders, ?takeUntil, ?skipCols) =
        let (startLine,startCol),hasHeaders0,takeUntil0,skipCols0,sheetName0 = getDefaultParams startcell hasHeaders takeUntil skipCols sheetName
        match Controller.load filePath sheetName0 startLine startCol hasHeaders0 takeUntil0 skipCols0 with
        |Ok df -> df
        |Result.Error e -> failwith(e)

//example of how to use:
API.unsafeLoad(@"C:\someFolder\someFile.xlsx","SomeSheet")