C# 将Excel快速导入数据表
我正在尝试将Excel文件读入Data.DataTable列表,尽管使用当前方法可能需要很长时间。我基本上是一个工作表一个工作表,一个单元格一个单元格地做,这往往需要很长时间。有没有更快的方法?这是我的密码:C# 将Excel快速导入数据表,c#,.net,excel,office-interop,C#,.net,Excel,Office Interop,我正在尝试将Excel文件读入Data.DataTable列表,尽管使用当前方法可能需要很长时间。我基本上是一个工作表一个工作表,一个单元格一个单元格地做,这往往需要很长时间。有没有更快的方法?这是我的密码: List<DataTable> List = new List<DataTable>(); // Counting sheets for (int count = 1; count < WB.Worksheets.Count; ++c
List<DataTable> List = new List<DataTable>();
// Counting sheets
for (int count = 1; count < WB.Worksheets.Count; ++count)
{
// Create a new DataTable for every Worksheet
DATA.DataTable DT = new DataTable();
WS = (EXCEL.Worksheet)WB.Worksheets.get_Item(count);
textBox1.Text = count.ToString();
// Get range of the worksheet
Range = WS.UsedRange;
// Create new Column in DataTable
for (cCnt = 1; cCnt <= Range.Columns.Count; cCnt++)
{
textBox3.Text = cCnt.ToString();
Column = new DataColumn();
Column.DataType = System.Type.GetType("System.String");
Column.ColumnName = cCnt.ToString();
DT.Columns.Add(Column);
// Create row for Data Table
for (rCnt = 0; rCnt <= Range.Rows.Count; rCnt++)
{
textBox2.Text = rCnt.ToString();
try
{
cellVal = (string)(Range.Cells[rCnt, cCnt] as EXCEL.Range).Value2;
}
catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException)
{
ConvertVal = (double)(Range.Cells[rCnt, cCnt] as EXCEL.Range).Value2;
cellVal = ConvertVal.ToString();
}
// Add to the DataTable
if (cCnt == 1)
{
Row = DT.NewRow();
Row[cCnt.ToString()] = cellVal;
DT.Rows.Add(Row);
}
else
{
Row = DT.Rows[rCnt];
Row[cCnt.ToString()] = cellVal;
}
}
}
// Add DT to the list. Then go to the next sheet in the Excel Workbook
List.Add(DT);
}
List List=新列表();
//计票单
对于(int count=1;count 对于(cCnt=1;cCnt请查看以下链接
(发布6个解决方案)
校准.Value2
是一项昂贵的操作,因为这是一个COM互操作调用。我会将整个范围读取到一个数组中,然后在数组中循环:
object[,] data = Range.Value2;
// Create new Column in DataTable
for (int cCnt = 1; cCnt <= Range.Columns.Count; cCnt++)
{
textBox3.Text = cCnt.ToString();
var Column = new DataColumn();
Column.DataType = System.Type.GetType("System.String");
Column.ColumnName = cCnt.ToString();
DT.Columns.Add(Column);
// Create row for Data Table
for (int rCnt = 1; rCnt <= Range.Rows.Count; rCnt++)
{
textBox2.Text = rCnt.ToString();
string CellVal = String.Empty;
try
{
cellVal = (string)(data[rCnt, cCnt]);
}
catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException)
{
ConvertVal = (double)(data[rCnt, cCnt]);
cellVal = ConvertVal.ToString();
}
DataRow Row;
// Add to the DataTable
if (cCnt == 1)
{
Row = DT.NewRow();
Row[cCnt.ToString()] = cellVal;
DT.Rows.Add(Row);
}
else
{
Row = DT.Rows[rCnt + 1];
Row[cCnt.ToString()] = cellVal;
}
}
}
object[,]data=Range.Value2;
//在DataTable中创建新列
对于(int-cCnt=1;cCntMS Office互操作速度较慢,甚至Microsoft也不建议在服务器端使用互操作,并且无法用于导入大型Excel文件。有关更多详细信息,请参阅Microsoft的观点
相反,您可以使用任何Excel库,例如。这是一个显示如何读取Excel文件的代码示例:
ExcelDocument workbook = new ExcelDocument();
DataSet ds = workbook.easy_ReadXLSActiveSheet_AsDataSet("excel.xls");
DataTable dataTable = ds.Tables[0];
如果您的Excel文件有多个工作表或仅用于导入单元格区域(为了获得更好的性能),请查看更多的代码示例。以防其他人使用EPPlus。此实现非常简单,但有一些注释会引起注意。如果您要再层一个方法GetWorkbookAsDataSet()
在顶部,它可以满足OP的要求
/// <summary>
/// Assumption: Worksheet is in table format with no weird padding or blank column headers.
///
/// Assertion: Duplicate column names will be aliased by appending a sequence number (eg. Column, Column1, Column2)
/// </summary>
/// <param name="worksheet"></param>
/// <returns></returns>
public static DataTable GetWorksheetAsDataTable(ExcelWorksheet worksheet)
{
var dt = new DataTable(worksheet.Name);
dt.Columns.AddRange(GetDataColumns(worksheet).ToArray());
var headerOffset = 1; //have to skip header row
var width = dt.Columns.Count;
var depth = GetTableDepth(worksheet, headerOffset);
for (var i = 1; i <= depth; i++)
{
var row = dt.NewRow();
for (var j = 1; j <= width; j++)
{
var currentValue = worksheet.Cells[i + headerOffset, j].Value;
//have to decrement b/c excel is 1 based and datatable is 0 based.
row[j - 1] = currentValue == null ? null : currentValue.ToString();
}
dt.Rows.Add(row);
}
return dt;
}
/// <summary>
/// Assumption: There are no null or empty cells in the first column
/// </summary>
/// <param name="worksheet"></param>
/// <returns></returns>
private static int GetTableDepth(ExcelWorksheet worksheet, int headerOffset)
{
var i = 1;
var j = 1;
var cellValue = worksheet.Cells[i + headerOffset, j].Value;
while (cellValue != null)
{
i++;
cellValue = worksheet.Cells[i + headerOffset, j].Value;
}
return i - 1; //subtract one because we're going from rownumber (1 based) to depth (0 based)
}
private static IEnumerable<DataColumn> GetDataColumns(ExcelWorksheet worksheet)
{
return GatherColumnNames(worksheet).Select(x => new DataColumn(x));
}
private static IEnumerable<string> GatherColumnNames(ExcelWorksheet worksheet)
{
var columns = new List<string>();
var i = 1;
var j = 1;
var columnName = worksheet.Cells[i, j].Value;
while (columnName != null)
{
columns.Add(GetUniqueColumnName(columns, columnName.ToString()));
j++;
columnName = worksheet.Cells[i, j].Value;
}
return columns;
}
private static string GetUniqueColumnName(IEnumerable<string> columnNames, string columnName)
{
var colName = columnName;
var i = 1;
while (columnNames.Contains(colName))
{
colName = columnName + i.ToString();
i++;
}
return colName;
}
//
///假设:工作表是表格格式,没有奇怪的填充或空白列标题。
///
///断言:重复的列名将通过附加序列号(例如列、列1、列2)来别名
///
///
///
公共静态数据表GetWorksheetsDataTable(Excel工作表)
{
var dt=新数据表(工作表名称);
AddRange(GetDataColumns(工作表).ToArray());
var headerOffset=1;//必须跳过标题行
变量宽度=dt.Columns.Count;
变量深度=GetTableDepth(工作表,表头偏移);
对于(变量i=1;iDim sSheetName作为字符串
作为字符串的dimsconnection
Dim dtTablesList作为数据表
Dim OLEDEXCEL命令作为OleDbCommand
Dim oleExcelReader作为OLEDB数据读取器
作为OLEDB连接的Dim OLEDEXCEL连接
sConnection=“Provider=Microsoft.ACE.OLEDB.12.0;数据源=C:\Test.xls;Extended Properties=”“Excel 12.0;HDR=No;IMEX=1”“”
oleExcelConnection=新的OLEDB连接(sConnection)
oleExcelConnection.Open()
dtTablesList=oleExcelConnection.GetSchema(“表”)
如果dtTablesList.Rows.Count>0,则
sSheetName=dtTablesList.Rows(0)(“表名”).ToString
如果结束
dtTablesList.Clear()
dttableList.Dispose()
如果sSheetName为“”,则
oleExcelCommand=oleExcelConnection.CreateCommand()
oleExcelCommand.CommandText=“从[”&sSheetName&“]中选择*
oleExcelCommand.CommandType=CommandType.Text
oleExcelReader=oleExcelCommand.ExecuteReader
nOutputRow=0
一边读,一边读
结束时
oleExcelReader.Close()
如果结束
oleExcelConnection.Close()
类数据读取器
{
Excel.applicationxlapp;
Excel.工作簿;
Excel.Range-xlRange;
Excel.工作表;
公共数据表GetSheetDataAsDataTable(字符串文件路径、字符串sheetName)
{
DataTable dt=新的DataTable();
尝试
{
xlApp=new Excel.Application();
xlBook=xlApp.Workbooks.Open(文件路径);
xlSheet=xlBook.Worksheets[sheetName];
xlRange=xlSheet.UsedRange;
DataRow行=null;
对于(int i=1;i),这仍然可以很好地工作。我有40k条记录,处理时间从大约2分钟减少到大约2秒。我对答案中的变量用法感到非常困惑。它似乎不友好。1.我不能在那里使用Range.Value2
,它显示错误为“无法将对象[]隐式转换为对象[],]”。2.我不确定Convertval
变量。@parkourkarthik我现在无法验证,但如果您的范围是一行或一列,则可能会得到一个一维对象[]
返回,尽管我认为它始终是一个二维数组。如果您还没有回答,请随意作为单独的问题提问。@parkourkarthikConvertVal
来自OP的原始问题,因此我无法说明它的用途。不过,它似乎与原始问题无关。我似乎找不到所有声明f所有使用的变量。“有没有更快捷的方法?不幸的是没有。”不正确。此代码正在创建(并且错误地未能处理)它读取的每个Excel单元格值都有一个COM对象。这是最慢的方法!一次将整个工作表读入一个数组,然后遍历该数组中的项,速度要快得多。
Dim sSheetName As String
Dim sConnection As String
Dim dtTablesList As DataTable
Dim oleExcelCommand As OleDbCommand
Dim oleExcelReader As OleDbDataReader
Dim oleExcelConnection As OleDbConnection
sConnection = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=C:\Test.xls;Extended Properties=""Excel 12.0;HDR=No;IMEX=1"""
oleExcelConnection = New OleDbConnection(sConnection)
oleExcelConnection.Open()
dtTablesList = oleExcelConnection.GetSchema("Tables")
If dtTablesList.Rows.Count > 0 Then
sSheetName = dtTablesList.Rows(0)("TABLE_NAME").ToString
End If
dtTablesList.Clear()
dtTablesList.Dispose()
If sSheetName <> "" Then
oleExcelCommand = oleExcelConnection.CreateCommand()
oleExcelCommand.CommandText = "Select * From [" & sSheetName & "]"
oleExcelCommand.CommandType = CommandType.Text
oleExcelReader = oleExcelCommand.ExecuteReader
nOutputRow = 0
While oleExcelReader.Read
End While
oleExcelReader.Close()
End If
oleExcelConnection.Close()
class DataReader
{
Excel.Application xlApp;
Excel.Workbook xlBook;
Excel.Range xlRange;
Excel.Worksheet xlSheet;
public DataTable GetSheetDataAsDataTable(String filePath, String sheetName)
{
DataTable dt = new DataTable();
try
{
xlApp = new Excel.Application();
xlBook = xlApp.Workbooks.Open(filePath);
xlSheet = xlBook.Worksheets[sheetName];
xlRange = xlSheet.UsedRange;
DataRow row=null;
for (int i = 1; i <= xlRange.Rows.Count; i++)
{
if (i != 1)
row = dt.NewRow();
for (int j = 1; j <= xlRange.Columns.Count; j++)
{
if (i == 1)
dt.Columns.Add(xlRange.Cells[1, j].value);
else
row[j-1] = xlRange.Cells[i, j].value;
}
if(row !=null)
dt.Rows.Add(row);
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
xlBook.Close();
xlApp.Quit();
}
return dt;
}
}