多列数据转换

我正在接收数据源中的数据,我需要先将数据透视,然后才能将信息发送到UI进行显示。 I am new to concept of pivoting & I am not sure how to go about it.

该问题分为两部分:

  1. 形成标题
  2. 透视数据以匹配标题

要记住的事情:

  1. 我有一些我不想转动的专栏。 我把它们称为static columns.

  2. 我需要转动某些列以形成多级头信息。 我把它们称为dynamic columns

  3. 某些列需要旋转,其中包含实际值。 我称它们为value columns

  4. 可以拥有的dynamic, static and value columns没有限制

  5. 假设,当数据到来时,我们将首先获得静态列的数据,然后是动态列,然后是值列。

    有关更多信息,请参见附图。

在此处输入图像描述

虚拟数据:

 class Program { static void Main(string[] args) { var _staticColumnCount = 2; //Columns that should not be pivoted var _dynamicColumnCount = 2; // Columns which needs to be pivoted to form header var _valueColumnCount = 1; //Columns that represent Actual value var valueColumnIndex = 4; //Assuming index starts with 0; List<List> headerInfo = new List<List>(); headerInfo.Add(new List {"Product Three", "Item Ten"}); headerInfo.Add(new List {"Product Two", "Item Five"}); headerInfo.Add(new List {"Product Two", "Item Seven"}); headerInfo.Add(new List {"Product Two", "Item Nine"}); headerInfo.Add(new List {"Product One", "Item One"}); headerInfo.Add(new List {"Product One", "Item Two"}); headerInfo.Add(new List {"Product One", "Item Four"}); headerInfo.Add(new List {"Product One", "Item Six"}); headerInfo.Add(new List {"Product One", "Item Eight"}); headerInfo.Add(new List {"Product One", "Item Eleven"}); List<List> data = new List<List>(); data.Add(new List {"Global", "Europe", "Product One", "Item One", "579984.59"}); data.Add(new List {"Global", "North America", "Product One", "Item Two", "314586.73"}); data.Add(new List {"Global", "Asia", "Product One", "Item One", "62735.13"}); data.Add(new List {"Global", "Asia", "Product Two", "Item Five", "12619234.69"}); data.Add(new List {"Global", "North America", "Product Two", "Item Five", "8953713.39"}); data.Add(new List {"Global", "Europe", "Product One", "Item Two", "124267.4"}); data.Add(new List {"Global", "Asia", "Product One", "Item Four", "482338.49"}); data.Add(new List {"Global", "North America", "Product One", "Item Four", "809185.13"}); data.Add(new List {"Global", "Europe", "Product One", "Item Four", "233101"}); data.Add(new List {"Global", "Asia", "Product One", "Item Two", "120561.65"}); data.Add(new List {"Global", "North America", "Product One", "Item Six", "1517359.37"}); data.Add(new List {"Global", "Europe", "Product One", "Item Six", "382590.45"}); data.Add(new List {"Global", "North America", "Product One", "Item Eight", "661835.64"}); data.Add(new List {"Global", "Europe", "Product Three", "Item Three", "0"}); data.Add(new List {"Global", "Europe", "Product One", "Item Eight", "0"}); data.Add(new List {"Global", "Europe", "Product Two", "Item Five", "3478145.38"}); data.Add(new List {"Global", "Asia", "Product One", "Item Six", "0"}); data.Add(new List {"Global", "North America", "Product Two", "Item Seven", "4247059.97"}); data.Add(new List {"Global", "Asia", "Product Two", "Item Seven", "2163718.01"}); data.Add(new List {"Global", "Europe", "Product Two", "Item Seven", "2158782.48"}); data.Add(new List {"Global", "North America", "Product Two", "Item Nine", "72634.46"}); data.Add(new List {"Global", "Europe", "Product Two", "Item Nine", "127500"}); data.Add(new List {"Global", "North America", "Product One", "Item One", "110964.44"}); data.Add(new List {"Global", "Asia", "Product Three", "Item Ten", "2064.99"}); data.Add(new List {"Global", "Europe", "Product One", "Item Eleven", "0"}); data.Add(new List {"Global", "Asia", "Product Two", "Item Nine", "1250"}); } } 

您所谓的static columns通常称为行组dynamic columns列组value columns值聚合或简单

为了实现这个目标,我建议使用以下简单的数据结构:

 public class PivotData { public IReadOnlyList Columns { get; set; } public IReadOnlyList Rows { get; set; } } public class PivotDataRow { public PivotValues Data { get; set; } public IReadOnlyList Values { get; set; } } 

PivotDataColumns成员将表示您所谓的标题 ,而Row成员 – 具有包含行组值和Values Data成员的PivotDataRow对象列表 – 相应Columns索引值( PivotDataRow.Values将始终具有相同的Count作为PivotData.Columns.Count )。

上述数据结构可序列化/可反序列化为JSON(使用Newtosoft.Json测试),可用于使用所需格式填充UI。

用于表示行组值,列组值和聚合值的核心数据结构如下:

 public class PivotValues : IReadOnlyList, IEquatable, IComparable { readonly IReadOnlyList source; readonly int offset, count; public PivotValues(IReadOnlyList source) : this(source, 0, source.Count) { } public PivotValues(IReadOnlyList source, int offset, int count) { this.source = source; this.offset = offset; this.count = count; } public string this[int index] => source[offset + index]; public int Count => count; public IEnumerator GetEnumerator() { for (int i = 0; i < count; i++) yield return this[i]; } IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); public override int GetHashCode() { unchecked { var comparer = EqualityComparer.Default; int hash = 17; for (int i = 0; i < count; i++) hash = hash * 31 + comparer.GetHashCode(this[i]); return hash; } } public override bool Equals(object obj) => Equals(obj as PivotValues); public bool Equals(PivotValues other) { if (this == other) return true; if (other == null) return false; var comparer = EqualityComparer.Default; for (int i = 0; i < count; i++) if (!comparer.Equals(this[i], other[i])) return false; return true; } public int CompareTo(PivotValues other) { if (this == other) return 0; if (other == null) return 1; var comparer = Comparer.Default; for (int i = 0; i < count; i++) { var compare = comparer.Compare(this[i], other[i]); if (compare != 0) return compare; } return 0; } public override string ToString() => string.Join(", ", this); // For debugging } 

基本上它表示具有相等和顺序比较语义的string列表的范围(切片)。 前者允许在数据透视转换期间使用基于哈希的高效LINQ运算符,而后者允许可选的排序。 此外,此数据结构允许有效转换,无需分配新列表,同时在从JSON反序列化时保留实际列表。

(通过实现IEquatable接口 – GetHashCodeEquals方法提供相等比较。通过这样做,它允许根据输入List>List元素内指定范围内的值将两个PivotValues类实例视为相等List> 。类似,通过实现IComparable接口提供排序 – CompareTo方法))

转型本身很简单:

 public static PivotData ToPivot(this List> data, int rowDataCount, int columnDataCount, int valueDataCount) { int rowDataStart = 0; int columnDataStart = rowDataStart + rowDataCount; int valueDataStart = columnDataStart + columnDataCount; var columns = data .Select(r => new PivotValues(r, columnDataStart, columnDataCount)) .Distinct() .OrderBy(c => c) // Optional .ToList(); var emptyValues = new PivotValues(new string[valueDataCount]); // For missing (row, column) intersection var rows = data .GroupBy(r => new PivotValues(r, rowDataStart, rowDataCount)) .Select(rg => new PivotDataRow { Data = rg.Key, Values = columns.GroupJoin(rg, c => c, r => new PivotValues(r, columnDataStart, columnDataCount), (c, vg) => vg.Any() ? new PivotValues(vg.First(), valueDataStart, valueDataCount) : emptyValues ).ToList() }) .OrderBy(r => r.Data) // Optional .ToList(); return new PivotData { Columns = columns, Rows = rows }; } 

首先使用简单的LINQ Distinct运算符确定列(标题)。 然后通过按行列对源集进行分组来确定行。 每个行分组中的值由外部连接Columns与分组内容确定。

由于我们的数据结构实现,LINQ转换非常有效(空间和时间)。 列和行排序是可选的,如果您不需要,可以将其删除。

使用虚拟数据进行样本测试:

 var pivotData = data.ToPivot(2, 2, 1); var json = JsonConvert.SerializeObject(pivotData); var pivotData2 = JsonConvert.DeserializeObject(json); 

这是执行此操作的LINQ方法:

 var working = data .Select(d => new { Region_L1 = d[0], Region_L2 = d[1], Product_L1 = d[2], Product_L2 = d[3], Value = double.Parse(d[4]), }); var output = working .GroupBy(x => new { x.Region_L1, x.Region_L2 }, x => new { x.Product_L1, x.Product_L2, x.Value }) .Select(x => new { x.Key, Lookup = x.ToLookup(y => new { y.Product_L1, y.Product_L2 }, y => y.Value) }) .Select(x => new { x.Key.Region_L1, x.Key.Region_L2, P_One_One = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item One" }].Sum(), P_One_Two = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Two" }].Sum(), P_One_Four = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Four" }].Sum(), P_One_Six = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Six" }].Sum(), P_One_Eight = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Eight" }].Sum(), P_One_Eleven = x.Lookup[new { Product_L1 = "Product One", Product_L2 = "Item Eleven" }].Sum(), P_Two_Five = x.Lookup[new { Product_L1 = "Product Two", Product_L2 = "Item Five" }].Sum(), P_Two_Seven = x.Lookup[new { Product_L1 = "Product Two", Product_L2 = "Item Seven" }].Sum(), P_Two_Nine = x.Lookup[new { Product_L1 = "Product Two", Product_L2 = "Item Nine" }].Sum(), P_Three_Three = x.Lookup[new { Product_L1 = "Product Three", Product_L2 = "Item Three" }].Sum(), P_Three_Ten = x.Lookup[new { Product_L1 = "Product Three", Product_L2 = "Item Ten" }].Sum(), }); 

这给了:

输出1

请注意,LINQ需要输出列的特定字段名称。

如果列数不知道,但你有一个方便的headerInfo List>那么你可以这样做:

 var output = working .GroupBy(x => new { x.Region_L1, x.Region_L2 }, x => new { x.Product_L1, x.Product_L2, x.Value }) .Select(x => new { x.Key, Lookup = x.ToLookup(y => new { y.Product_L1, y.Product_L2 }, y => y.Value) }) .Select(x => new { x.Key.Region_L1, x.Key.Region_L2, Headers = headerInfo .Select(y => new { Product_L1 = y[0], Product_L2 = y[1] }) .Select(y => new { y.Product_L1, y.Product_L2, Value = x.Lookup[y].Sum() }) .ToArray(), }); 

这给了:

输出2

您可以使用NReco PivotData库以下列方式按任意数量的列创建数据透视表(不要忘记安装“NReco.PivotData”nuget包):

 // rows in dataset are represented as 'arrays' // lets define 'field name' -> 'field index' mapping var fieldToIndex = new Dictionary() { {"Region L1", 0}, {"Region L2", 1}, {"Product L1", 2}, {"Product L2", 3}, {"Val", 4} }; // create multidimensional dataset var pvtData = new PivotData( // group by 4 dimensions new[]{"Region L1", "Region L2", "Product L1", "Product L2"}, // value (use CompositeAggregatorFactory for multiple values) new SumAggregatorFactory("Val") ); pvtData.ProcessData(data, (row, field) => ((IList)row)[fieldToIndex[field]] ); // create pivot table data model by the grouped data var pvtTbl = new PivotTable( // dimensions for rows new[] {"Region L1", "Region L2"}, // dimensions for columns new[] {"Product L1", "Product L2"}, pvtData); // now you can iterate through 'pvtTbl.RowKeys' and 'pvtTbl.ColumnKeys' // to get row\column header labels and use 'pvtTbl.GetValue()' // or 'pvtTbl[]' to pivot table get values // you can easily render pivot table to HTML (or Excel, CSV) with // components from PivotData Toolkit (NReco.PivotData.Extensions assembly): var htmlResult = new StringWriter(); var pvtHtmlWr = new PivotTableHtmlWriter(htmlResult); pvtHtmlWr.Write(pvtTbl); var pvtTblHtml = htmlResult.ToString(); 

默认情况下,数据透视表行/列按标题(AZ)排序。 您可以根据需要更改订单 。

PivotData OLAP库(PivotData,PivotTable类)可以在单部署项目中免费使用。 高级组件(如PivotTableHtmlWriter)需要商业许可证密钥。

有点“简化”版本:

 string[][] data = { new [] { "Global", "Europe" , "Product One" , "Item One" , "579984.59" }, new [] { "Global", "North America", "Product One" , "Item Two" , "314586.73" }, new [] { "Global", "Asia" , "Product One" , "Item One" , "62735.13" }, new [] { "Global", "Asia" , "Product Two" , "Item Five" , "12619234.69" }, new [] { "Global", "North America", "Product Two" , "Item Five" , "8953713.39" }, new [] { "Global", "Europe" , "Product One" , "Item Two" , "124267.4" }, new [] { "Global", "Asia" , "Product One" , "Item Four" , "482338.49" }, new [] { "Global", "North America", "Product One" , "Item Four" , "809185.13" }, new [] { "Global", "Europe" , "Product One" , "Item Four" , "233101" }, new [] { "Global", "Asia" , "Product One" , "Item Two" , "120561.65" }, new [] { "Global", "North America", "Product One" , "Item Six" , "1517359.37" }, new [] { "Global", "Europe" , "Product One" , "Item Six" , "382590.45" }, new [] { "Global", "North America", "Product One" , "Item Eight" , "661835.64" }, new [] { "Global", "Europe" , "Product Three", "Item Three" , "0" }, new [] { "Global", "Europe" , "Product One" , "Item Eight" , "0" }, new [] { "Global", "Europe" , "Product Two" , "Item Five" , "3478145.38" }, new [] { "Global", "Asia" , "Product One" , "Item Six" , "0" }, new [] { "Global", "North America", "Product Two" , "Item Seven" , "4247059.97" }, new [] { "Global", "Asia" , "Product Two" , "Item Seven" , "2163718.01" }, new [] { "Global", "Europe" , "Product Two" , "Item Seven" , "2158782.48" }, new [] { "Global", "North America", "Product Two" , "Item Nine" , "72634.46" }, new [] { "Global", "Europe" , "Product Two" , "Item Nine" , "127500" }, new [] { "Global", "North America", "Product One" , "Item One" , "110964.44" }, new [] { "Global", "Asia" , "Product Three", "Item Ten" , "2064.99" }, new [] { "Global", "Europe" , "Product One" , "Item Eleven", "0" }, new [] { "Global", "Asia" , "Product Two" , "Item Nine" , "1250" } }; string[][] headerInfo = { new [] { "Product One" , "Item One" }, new [] { "Product One" , "Item Two" }, new [] { "Product One" , "Item Four" }, new [] { "Product One" , "Item Six" }, new [] { "Product One" , "Item Eight" }, new [] { "Product One" , "Item Eleven" }, new [] { "Product Two" , "Item Five" }, new [] { "Product Two" , "Item Seven" }, new [] { "Product Two" , "Item Nine" }, new [] { "Product Three", "Item Three" }, new [] { "Product Three", "Item Ten" } }; 

由于我们可以预先确定结果的大小,因此我们可以将其定义为多维数组。

让我们选择一个函数式方法并将结果视为累加器 ,因此我们只需编写一个reducer (用于Linq Aggregate方法)。

它将基于字典来映射水平到垂直线的变换,而另一个状态字典来构建它的行(当然字典需要标准的比较器)。

假设你有一个经过validation的 header_info ,所以 – 首先 – 我必须修复第一个与最后一个重复的条目。

与其他解决方案相比,以下解决方案非常有效 (我的笔记本电脑只需要1毫秒,比接受的答案快4倍)。

 using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; namespace pivot { class Program { static void Main(string[] args) { var _staticColumnCount = 2; //Columns that should not be pivoted var _dynamicColumnCount = 2; // Columns which needs to be pivoted to form header var _valueColumnCount = 1; //Columns that represent Actual value var valueColumnIndex = 4; //Assuming index starts with 0; List> headerInfo = new List>(); headerInfo.Add(new List { "Product Three", "Item Three" }); headerInfo.Add(new List { "Product Two", "Item Five" }); headerInfo.Add(new List { "Product Two", "Item Seven" }); headerInfo.Add(new List { "Product Two", "Item Nine" }); headerInfo.Add(new List { "Product One", "Item One" }); headerInfo.Add(new List { "Product One", "Item Two" }); headerInfo.Add(new List { "Product One", "Item Four" }); headerInfo.Add(new List { "Product One", "Item Six" }); headerInfo.Add(new List { "Product One", "Item Eight" }); headerInfo.Add(new List { "Product One", "Item Eleven" }); headerInfo.Add(new List { "Product Three", "Item Ten" }); List> data = new List>(); data.Add(new List { "Global", "Europe", "Product One", "Item One", "579984.59" }); data.Add(new List { "Global", "North America", "Product One", "Item Two", "314586.73" }); data.Add(new List { "Global", "Asia", "Product One", "Item One", "62735.13" }); data.Add(new List { "Global", "Asia", "Product Two", "Item Five", "12619234.69" }); data.Add(new List { "Global", "North America", "Product Two", "Item Five", "8953713.39" }); data.Add(new List { "Global", "Europe", "Product One", "Item Two", "124267.4" }); data.Add(new List { "Global", "Asia", "Product One", "Item Four", "482338.49" }); data.Add(new List { "Global", "North America", "Product One", "Item Four", "809185.13" }); data.Add(new List { "Global", "Europe", "Product One", "Item Four", "233101" }); data.Add(new List { "Global", "Asia", "Product One", "Item Two", "120561.65" }); data.Add(new List { "Global", "North America", "Product One", "Item Six", "1517359.37" }); data.Add(new List { "Global", "Europe", "Product One", "Item Six", "382590.45" }); data.Add(new List { "Global", "North America", "Product One", "Item Eight", "661835.64" }); data.Add(new List { "Global", "Europe", "Product Three", "Item Three", "0" }); data.Add(new List { "Global", "Europe", "Product One", "Item Eight", "0" }); data.Add(new List { "Global", "Europe", "Product Two", "Item Five", "3478145.38" }); data.Add(new List { "Global", "Asia", "Product One", "Item Six", "0" }); data.Add(new List { "Global", "North America", "Product Two", "Item Seven", "4247059.97" }); data.Add(new List { "Global", "Asia", "Product Two", "Item Seven", "2163718.01" }); data.Add(new List { "Global", "Europe", "Product Two", "Item Seven", "2158782.48" }); data.Add(new List { "Global", "North America", "Product Two", "Item Nine", "72634.46" }); data.Add(new List { "Global", "Europe", "Product Two", "Item Nine", "127500" }); data.Add(new List { "Global", "North America", "Product One", "Item One", "110964.44" }); data.Add(new List { "Global", "Asia", "Product Three", "Item Ten", "2064.99" }); data.Add(new List { "Global", "Europe", "Product One", "Item Eleven", "0" }); data.Add(new List { "Global", "Asia", "Product Two", "Item Nine", "1250" }); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Reducer reducer = new Reducer(); reducer.headerCount = headerInfo.Count; reducer.headerCount = headerInfo.Count; var resultCount = (int)Math.Ceiling((double)data.Count / (double)reducer.headerCount); ValueArray[,] results = new ValueArray[resultCount, _staticColumnCount + reducer.headerCount]; reducer.headerDict = new Dictionary, int>(new MyComparer()); reducer.skipCols = _staticColumnCount; reducer.headerKeys = _dynamicColumnCount; reducer.rowDict = new Dictionary, int>(new MyComparer()); reducer.currentLine = 0; reducer.valueCount = _valueColumnCount; for (int i = 0; i < reducer.headerCount; i++) { reducer.headerDict.Add(headerInfo[i], i); } results = data.Aggregate(results, reducer.reduce); stopwatch.Stop(); Console.WriteLine("millisecs: " + stopwatch.ElapsedMilliseconds); for (int i = 0; i < resultCount; i++) { var curr_header = new string[reducer.headerCount]; IEnumerable curr_key = null; for (int j = 0; j < reducer.headerCount; j++) { curr_header[j] = "[" + String.Join(",", (results[i, reducer.skipCols + j]?.values) ?? new string[0]) + "]"; curr_key = curr_key ?? (results[i, reducer.skipCols + j]?.row_keys); } Console.WriteLine(String.Join(",", curr_key) + ": " + String.Join(",", curr_header) ); } Console.ReadKey(); // if you want to compare it to the accepted answer stopwatch.Reset(); stopwatch.Start(); var pivotData = data.ToPivot(2, 2, 1); // with all needed classes/methods stopwatch.Stop(); Console.WriteLine("millisecs: " + stopwatch.ElapsedMilliseconds); Console.ReadKey(); } internal class ValueArray { internal IEnumerable row_keys; internal string[] values; } internal class Reducer { internal int headerCount; internal int skipCols; internal int headerKeys; internal int valueCount; internal Dictionary, int> headerDict; internal Dictionary, int> rowDict; internal int currentLine; internal ValueArray[,] reduce(ValueArray[,] results, List line) { var header_col = headerDict[line.Skip(skipCols).Take(headerKeys)]; var row_keys = line.Take(skipCols); var curr_values = new string[valueCount]; for (int i = 0; i < valueCount; i++) { curr_values[i] = line[skipCols + headerKeys + i]; } if (rowDict.ContainsKey(row_keys)) { results[rowDict[row_keys], skipCols + header_col] = new ValueArray(); results[rowDict[row_keys], skipCols + header_col].row_keys = row_keys; results[rowDict[row_keys], skipCols + header_col].values = curr_values; } else { rowDict.Add(row_keys, currentLine); results[currentLine, skipCols + header_col] = new ValueArray(); results[currentLine, skipCols + header_col].row_keys = row_keys; results[currentLine, skipCols + header_col].values = curr_values; currentLine++; } return results; } } public class MyComparer : IEqualityComparer> { public bool Equals(IEnumerable x, IEnumerable y) { return x.SequenceEqual(y); } public int GetHashCode(IEnumerable obj) { return obj.First().GetHashCode(); } } } }