C#OPEN XML:从EXCEL获取数据到DATATABLE时,跳过空单元格

任务

将数据从excel导入DataTable

问题

将跳过不包含任何数据的单元格,并将具有该行中数据的下一个单元格用作空列的值。 例如

A1为空A2具有值Tom然后在导入数据A1获得A2的值并且A2保持为空

为了说清楚,我在下面提供一些屏幕截图

这是excel数据

在此处输入图像描述

这是从excel导入数据后的DataTable 在此处输入图像描述

 public class ImportExcelOpenXml { public static DataTable Fill_dataTable(string fileName) { DataTable dt = new DataTable(); using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(fileName, false)) { WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart; IEnumerable sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild().Elements(); string relationshipId = sheets.First().Id.Value; WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId); Worksheet workSheet = worksheetPart.Worksheet; SheetData sheetData = workSheet.GetFirstChild(); IEnumerable rows = sheetData.Descendants(); foreach (Cell cell in rows.ElementAt(0)) { dt.Columns.Add(GetCellValue(spreadSheetDocument, cell)); } foreach (Row row in rows) //this will also include your header row... { DataRow tempRow = dt.NewRow(); for (int i = 0; i < row.Descendants().Count(); i++) { tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants().ElementAt(i)); } dt.Rows.Add(tempRow); } } dt.Rows.RemoveAt(0); //...so i'm taking it out here. return dt; } public static string GetCellValue(SpreadsheetDocument document, Cell cell) { SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart; string value = cell.CellValue.InnerXml; if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString) { return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText; } else { return value; } } } 

我的想法

我觉得有一些问题

public IEnumerable Descendants() where T : OpenXmlElement;

如果我想要使用Descendants的列数

 IEnumerable rows = sheetData.Descendants<(); int colCnt = rows.ElementAt(0).Count(); 

要么

如果我使用Descendants获取行数

 IEnumerable rows = sheetData.Descendants<(); int rowCnt = rows.Count();` 

在这两种情况下, Descendants都在跳过空单元格

Descendants有没有其他选择。

非常感谢您的建议

PS:我还想过通过使用像A1,A2这样的列名来获取单元格值但为了做到这一点,我必须得到使用Descendants函数无法实现的列和行的精确计数。

如果连续所有单元格中都有数据,那么一切都运行良好。 你连续一个空单元格的那一刻就会出现问题。

为什么它首先发生

这是因为在下面的代码中:

 row.Descendants().Count() 

Count() 是非空填充单元格(不是所有列)的数量。 因此,当您将row.Descendants().ElementAt(i)作为GetCellValue方法的参数GetCellValue

 GetCellValue(spreadSheetDocument, row.Descendants().ElementAt(i)); 

然后它将找到下一个非空填充单元格的内容(不一定是该列索引中的内容, i )例如,如果第一ElementAt(1)空并且我们调用ElementAt(1) ,则返回第二列中的值并且整个逻辑搞砸了。

解决方案 – 我们需要处理空单元的出现 :基本上我们需要找出单元的原始列索引,以防在它之前有空单元格。 因此,您需要替换以下的for循环代码:

 for (int i = 0; i < row.Descendants().Count(); i++) { tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants().ElementAt(i)); } 

 for (int i = 0; i < row.Descendants().Count(); i++) { Cell cell = row.Descendants().ElementAt(i); int actualCellIndex = CellReferenceToIndex(cell); tempRow[actualCellIndex] = GetCellValue(spreadSheetDocument, cell); } 

并在您的代码中添加以下方法,该方法在上面修改过的代码段中使用,以获取任何单元格的原始/正确列索引:

 private static int CellReferenceToIndex(Cell cell) { int index = 0; string reference = cell.CellReference.ToString().ToUpper(); foreach (char ch in reference) { if (Char.IsLetter(ch)) { int value = (int)ch - (int)'A'; index = (index == 0) ? value : ((index + 1) * 26) + value; } else return index; } return index; } 
 public void Read2007Xlsx() { try { DataTable dt = new DataTable(); using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(@"D:\File.xlsx", false)) { WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart; IEnumerable sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild().Elements(); string relationshipId = sheets.First().Id.Value; WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId); Worksheet workSheet = worksheetPart.Worksheet; SheetData sheetData = workSheet.GetFirstChild(); IEnumerable rows = sheetData.Descendants(); foreach (Cell cell in rows.ElementAt(0)) { dt.Columns.Add(GetCellValue(spreadSheetDocument, cell)); } foreach (Row row in rows) //this will also include your header row... { DataRow tempRow = dt.NewRow(); int columnIndex = 0; foreach (Cell cell in row.Descendants()) { // Gets the column index of the cell with data int cellColumnIndex = (int)GetColumnIndexFromName(GetColumnName(cell.CellReference)); cellColumnIndex--; //zero based index if (columnIndex < cellColumnIndex) { do { tempRow[columnIndex] = ""; //Insert blank data here; columnIndex++; } while (columnIndex < cellColumnIndex); } tempRow[columnIndex] = GetCellValue(spreadSheetDocument, cell); columnIndex++; } dt.Rows.Add(tempRow); } } dt.Rows.RemoveAt(0); //...so i'm taking it out here. } catch (Exception ex) { } } ///  /// Given a cell name, parses the specified cell to get the column name. ///  /// Address of the cell (ie. B2) /// Column Name (ie. B) public static string GetColumnName(string cellReference) { // Create a regular expression to match the column name portion of the cell name. Regex regex = new Regex("[A-Za-z]+"); Match match = regex.Match(cellReference); return match.Value; } ///  /// Given just the column name (no row index), it will return the zero based column index. /// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ). /// A length of three can be implemented when needed. ///  /// Column Name (ie. A or AB) /// Zero based index if the conversion was successful; otherwise null public static int? GetColumnIndexFromName(string columnName) { //return columnIndex; string name = columnName; int number = 0; int pow = 1; for (int i = name.Length - 1; i >= 0; i--) { number += (name[i] - 'A' + 1) * pow; pow *= 26; } return number; } public static string GetCellValue(SpreadsheetDocument document, Cell cell) { SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart; if (cell.CellValue ==null) { return ""; } string value = cell.CellValue.InnerXml; if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString) { return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText; } else { return value; } } 

尝试这个代码,我做了一些修改,它对我有用。

  public static DataTable Fill_dataTable(string filePath) { DataTable dt = new DataTable(); using (SpreadsheetDocument doc = SpreadsheetDocument.Open(filePath, false)) { Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild(); Worksheet worksheet = doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart.Worksheet; IEnumerable rows = worksheet.GetFirstChild().Descendants(); DataTable dt = new DataTable(); List columnRef = new List(); foreach (Row row in rows) { if (row.RowIndex != null) { if (row.RowIndex.Value == 1) { foreach (Cell cell in row.Descendants()) { dt.Columns.Add(GetValue(doc, cell)); columnRef.Add(cell.CellReference.ToString().Substring(0, cell.CellReference.ToString().Length - 1)); } } else { dt.Rows.Add(); int i = 0; foreach (Cell cell in row.Descendants()) { while (columnRef(i) + dt.Rows.Count + 1 != cell.CellReference) { dt.Rows(dt.Rows.Count - 1)(i) = ""; i += 1; } dt.Rows(dt.Rows.Count - 1)(i) = GetValue(doc, cell); i += 1; } } } } } return dt; }