使用OpenXmlReader

我讨厌借助StackOverflow这样(看似)基本的东西,但我在过去的几个小时里一直在和微软打架,似乎已经走到了死胡同。 我正在尝试阅读(大)Excel 2007+电子表格,谷歌已经告诉我,使用OpenXml SDK是一个非常受欢迎的选择。 所以我给了这个东西一个镜头,阅读了一些教程,检查了微软自己的库页面,并且得到了很少的东西。

我正在使用一个小测试电子表格,只有一列数字和一个字符串 – 大规模测试将在稍后进行。 我尝试了几种与我即将发布的实现相似的实现,但没有一种实现读取数据。 下面的代码主要来自另一个StackOverflow线程,它似乎已经工作 – 对我来说不是这样。 我想我会让你们检查/调试/帮助这个版本,因为它可能比我今天写的任何内容都要少。

static void ReadExcelFileSAX(string fileName) { using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, true)) { WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart; WorksheetPart worksheetPart = workbookPart.WorksheetParts.First(); OpenXmlPartReader reader = new OpenXmlPartReader(worksheetPart); string text; string rowNum; while (reader.Read()) { if (reader.ElementType == typeof(Row)) { do { if (reader.HasAttributes) { rowNum = reader.Attributes.First(a => a.LocalName == "r").Value; Console.Write("rowNum: " + rowNum); //we never even get here, I tested it with a breakpoint } } while (reader.ReadNextSibling()); // Skip to the next row Console.ReadKey(); break; // We just looped through all the rows so no need to continue reading the worksheet } if (reader.ElementType == typeof(Cell)) { } if (reader.ElementType != typeof(Worksheet)) // Dont' want to skip the contents of the worksheet reader.Skip(); // Skip contents of any node before finding the first row. } reader.Close(); Console.WriteLine(); Console.ReadKey(); } } 

而且,在旁注中,是否有任何使用OpenXml SDK的好方法我不知何故错过了?

我认为您使用了错误的WorksheetPart来读取行。

这条线

 workbookPart.WorksheetParts.First(); 

获取集合的第一个WorksheetPart ,它不一定是您在Microsoft Excel中看到的第一个工作表。

因此,遍历所有WorksheetParts ,您应该在控制台窗口中看到一些输出。

 static void ReadExcelFileSAX(string fileName) { using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, true)) { WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart; // Iterate through all WorksheetParts foreach (WorksheetPart worksheetPart in workbookPart.WorksheetParts) { OpenXmlPartReader reader = new OpenXmlPartReader(worksheetPart); string text; string rowNum; while (reader.Read()) { if (reader.ElementType == typeof(Row)) { do { if (reader.HasAttributes) { rowNum = reader.Attributes.First(a => a.LocalName == "r").Value; Console.Write("rowNum: " + rowNum); } } while (reader.ReadNextSibling()); // Skip to the next row break; // We just looped through all the rows so no // need to continue reading the worksheet } if (reader.ElementType != typeof(Worksheet)) reader.Skip(); } reader.Close(); } } } 

要读取所有单元格值,请使用以下函数(省略所有error handling详细信息):

 static void ReadAllCellValues(string fileName) { using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false)) { WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart; foreach(WorksheetPart worksheetPart in workbookPart.WorksheetParts) { OpenXmlReader reader = OpenXmlReader.Create(worksheetPart); while (reader.Read()) { if (reader.ElementType == typeof(Row)) { reader.ReadFirstChild(); do { if (reader.ElementType == typeof(Cell)) { Cell c = (Cell)reader.LoadCurrentElement(); string cellValue; if (c.DataType != null && c.DataType == CellValues.SharedString) { SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements().ElementAt(int.Parse(c.CellValue.InnerText)); cellValue = ssi.Text.Text; } else { cellValue = c.CellValue.InnerText; } Console.Out.Write("{0}: {1} ", c.CellReference, cellValue); } } while (reader.ReadNextSibling()); Console.Out.WriteLine(); } } } } } 

在上面的代码中,您会看到必须使用SharedStringTablePart处理数据类型为SharedString单元格。

为了读取空白单元格,我使用在行读取器外部分配的变量,在while循环中,我正在检查列索引是否大于或不来自我的变量,因为它在每个单元格读取后递增。 如果这不匹配,我正在用我想要的值填充我的专栏。 这是我用来将空白单元格追踪到我尊重列值的技巧。 这是代码:

 public static DataTable ReadIntoDatatableFromExcel(string newFilePath) { /*Creating a table with 20 columns*/ var dt = CreateProviderRvenueSharingTable(); try { /*using stream so that if excel file is in another process then it can read without error*/ using (Stream stream = new FileStream(newFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(stream, false)) { var workbookPart = spreadsheetDocument.WorkbookPart; var workbook = workbookPart.Workbook; /*get only unhide tabs*/ var sheets = workbook.Descendants().Where(e => e.State == null); foreach (var sheet in sheets) { var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id); /*Remove empty sheets*/ List rows = worksheetPart.Worksheet.Elements().First().Elements() .Where(r => r.InnerText != string.Empty).ToList(); if (rows.Count > 1) { OpenXmlReader reader = OpenXmlReader.Create(worksheetPart); int i = 0; int BTR = 0;/*Break the reader while empty rows are found*/ while (reader.Read()) { if (reader.ElementType == typeof(Row)) { /*ignoring first row with headers and check if data is there after header*/ if (i < 2) { i++; continue; } reader.ReadFirstChild(); DataRow row = dt.NewRow(); int CN = 0; if (reader.ElementType == typeof(Cell)) { do { Cell c = (Cell)reader.LoadCurrentElement(); /*reader skipping blank cells so data is getting worng in datatable's rows according to header*/ if (CN != 0) { int cellColumnIndex = ExcelHelper.GetColumnIndexFromName( ExcelHelper.GetColumnName(c.CellReference)); if (cellColumnIndex < 20 && CN < cellColumnIndex - 1) { do { row[CN] = string.Empty; CN++; } while (CN < cellColumnIndex - 1); } } /*stopping execution if first cell does not have any value which means empty row*/ if (CN == 0 && c.DataType == null && c.CellValue == null) { BTR++; break; } string cellValue = GetCellValue(c, workbookPart); row[CN] = cellValue; CN++; /*if any text exists after T column (index 20) then skip the reader*/ if (CN == 20) { break; } } while (reader.ReadNextSibling()); } /*reader skipping blank cells so fill the array upto 19 index*/ while (CN != 0 && CN < 20) { row[CN] = string.Empty; CN++; } if (CN == 20) { dt.Rows.Add(row); } } /*escaping empty rows below data filled rows after checking 5 times */ if (BTR > 5) break; } reader.Close(); } } } } } catch (Exception ex) { throw ex; } return dt; } private static string GetCellValue(Cell c, WorkbookPart workbookPart) { string cellValue = string.Empty; if (c.DataType != null && c.DataType == CellValues.SharedString) { SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable .Elements() .ElementAt(int.Parse(c.CellValue.InnerText)); if (ssi.Text != null) { cellValue = ssi.Text.Text; } } else { if (c.CellValue != null) { cellValue = c.CellValue.InnerText; } } return cellValue; } public static int GetColumnIndexFromName(string columnNameOrCellReference) { int columnIndex = 0; int factor = 1; for (int pos = columnNameOrCellReference.Length - 1; pos >= 0; pos--) // R to L { if (Char.IsLetter(columnNameOrCellReference[pos])) // for letters (columnName) { columnIndex += factor * ((columnNameOrCellReference[pos] - 'A') + 1); factor *= 26; } } return columnIndex; } public static string GetColumnName(string cellReference) { /* Advance from L to R until a number, then return 0 through previous position*/ for (int lastCharPos = 0; lastCharPos <= 3; lastCharPos++) if (Char.IsNumber(cellReference[lastCharPos])) return cellReference.Substring(0, lastCharPos); throw new ArgumentOutOfRangeException("cellReference"); } 

代码适用于: 1。此代码读取空白单元格2.读取完成后跳过空行。 3.从第一个按升序读取工作表4.如果另一个进程正在使用excel文件,OpenXML仍会读取它。