如何在合并时删除空格

我有一些代码需要3个不同的PDF字节数组并合并它们。这段代码效果很好。问题（有些人）认为每个PDF都被认为是一个完整的页面（如果打印），即使它上面只有4英寸的内容，因此垂直留下7英寸的空白区域。然后将中间文档放入其中，并且可以在其末尾处具有或不具有垂直白色空间。然后页脚也会放在自己的页面上。

这是代码：

byte[] Bytes = rv.LocalReport.Render("PDF", null, out MimeType, out Encoding, out Extension, out StreamIDs, out Warnings); List MergeSets = // This is filled prior to this code // Append any other pages to this primary letter if (MergeSets.Count > 0) { MemoryStream ms = new MemoryStream(); Document document = new Document(); PdfCopy copy = new PdfCopy(document, ms); document.Open(); PdfImportedPage page; PdfReader reader = new PdfReader(Bytes); // read the generated primary Letter int pages = reader.NumberOfPages; for (int i = 0; i < pages; ) { page = copy.GetImportedPage(reader, ++i); copy.AddPage(page); } // foreach of the pages in the Cover Letter // Now append the merge sets foreach (byte[] ba in MergeSets) { reader = new PdfReader(ba); pages = reader.NumberOfPages; for (int i = 0; i < pages; ) { page = copy.GetImportedPage(reader, ++i); copy.AddPage(page); } // foreach of the pages in the current merge set } // foreach of the sets of data document.Close(); ServerSaved = SaveGeneratedLetter(ms.GetBuffer(), DateTime.Now.Year, hl.LetterName, SaveName); } // if there is anything to merge

有没有办法合并每个页面来剪辑/删除/删除每个pdf末尾的垂直空白区域，以便它显示为一个无缝文档？

更新：这是我试图合并的一些示例.pdf文件。

标题，正文，页脚

更新2：使用答案：

我已将@ mkl的代码转换为C＃，而且在这里。

工具类：

 public class PdfVeryDenseMergeTool { private Rectangle PageSize; private float TopMargin; private float BottomMargin; private float Gap; private Document Document = null; private PdfWriter Writer = null; private float YPosition = 0; public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) { this.PageSize = size; this.TopMargin = top; this.BottomMargin = bottom; this.Gap = gap; } // PdfVeryDenseMergeTool public void Merge(MemoryStream outputStream, List inputs) { try { this.OpenDocument(outputStream); foreach (PdfReader reader in inputs) { this.Merge(reader); } // foreach of the PDF files to merge } finally { this.CloseDocument(); } // try-catch-finally } // Merge public void OpenDocument(MemoryStream outputStream) { this.Document = new Document(PageSize, 36, 36, this.TopMargin, this.BottomMargin); this.Writer = PdfWriter.GetInstance(Document, outputStream); this.Document.Open(); this.NewPage(); } // OpenDocument public void CloseDocument() { try { this.Document.Close(); } finally { this.Document = null; this.Writer = null; this.YPosition = 0; } // try-finally } // CloseDocument public void NewPage() { this.Document.NewPage(); this.YPosition = PageSize.GetTop(this.TopMargin); } // Merge public void Merge(PdfReader reader) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++) { this.Merge(reader, parser, pageIndex); } // foreach of the pages of the current PDF } // Merge public void Merge(PdfReader reader, PdfReaderContentParser parser, int pageIndex) { PdfImportedPage importedPage = Writer.GetImportedPage(reader, pageIndex); PdfContentByte directContent = Writer.DirectContent; PageVerticalAnalyzer finder = parser.ProcessContent(pageIndex, new PageVerticalAnalyzer()); if (finder.VerticalFlips.Count  0) { if (!first) this.NewPage(); float freeSpace = this.YPosition - PageSize.GetBottom(BottomMargin); int endFlip = startFlip + 1; while ((endFlip > 1) && (finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip - 2] < freeSpace)) endFlip -= 2; if (endFlip < startFlip) { float height = finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip]; directContent.SaveState(); directContent.Rectangle(0, this.YPosition - height, pageSizeToImport.Width, height); directContent.Clip(); directContent.NewPath(); this.Writer.DirectContent.AddTemplate(importedPage, 0, this.YPosition - (finder.VerticalFlips[startFlip] - pageSizeToImport.Bottom)); directContent.RestoreState(); this.YPosition -= height + this.Gap; startFlip = endFlip - 1; } else if (!first) { throw new ArgumentException(string.Format("Page {0} content too large", pageIndex)); } // if first = false; } // while } // Merge } // PdfVeryDenseMergeTool

RenderListener类：
更新3：修复了1行代码及其工作原理：请参阅代码中的注释

 public class PageVerticalAnalyzer : IRenderListener { public PageVerticalAnalyzer() { } public List VerticalFlips = new List(); public void AddVerticalUseSection(float from, float to) { if (to < from) { float temp = to; to = from; from = temp; } int i = 0; int j = 0; for (i = 0; i < VerticalFlips.Count; i++) { float flip = VerticalFlips[i]; if (flip < from) continue; for (j = i; j < VerticalFlips.Count; j++) { flip = VerticalFlips[j]; if (flip  i) VerticalFlips.RemoveAt(j); // This was the problem line with just .Remove(j) if (toOutsideInterval) VerticalFlips.Insert(i, to); if (fromOutsideInterval) VerticalFlips.Insert(i, from); } // AddVerticalUseSection public void BeginTextBlock() { /* Do nothing */ } public void EndTextBlock() { /* Do nothing */ } public void RenderImage(ImageRenderInfo renderInfo) { Matrix ctm = renderInfo.GetImageCTM(); List YCoords = new List(4) { 0, 0, 0, 0 }; for (int x = 0; x < 2; x++) { for (int y = 0; y < 2; y++) { Vector corner = new Vector(x, y, 1).Cross(ctm); YCoords[2 * x + y] = corner[Vector.I2]; } } YCoords.Sort(); AddVerticalUseSection(YCoords[0], YCoords[3]); } // RenderImage public void RenderText(TextRenderInfo renderInfo) { LineSegment ascentLine = renderInfo.GetAscentLine(); LineSegment descentLine = renderInfo.GetDescentLine(); List YCoords = new List(4) { ascentLine.GetStartPoint()[Vector.I2], ascentLine.GetEndPoint()[Vector.I2], descentLine.GetStartPoint()[Vector.I2], descentLine.GetEndPoint()[Vector.I2], }; YCoords.Sort(); AddVerticalUseSection(YCoords[0], YCoords[3]); } // RenderText } // PageVericalAnalyzer

用于收集文件和运行该工具的代码：

 public void TestMergeDocuments() { PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(iTextSharp.text.PageSize.A4, 18, 18, 10); List Files = new List(); // Code to load each of the 3 files I need into this byte array list using (MemoryStream ms = new MemoryStream()) { List files = new List(); foreach (byte[] ba in Files) { files.Add(new PdfReader(ba)); } // foreach of the sets of data tool.Merge(ms, files); // Save the file using: ms.GetBuffer() } // using the memory stream } // TestMergeDocuments

以下示例工具已经根据PdfDenseMergeTool工具的PdfDenseMergeTool从这个答案中实现，OP已经评论PdfDenseMergeTool接近[他]需要的东西。 就像PdfDenseMergeTool一样，这个工具在Java / iText中实现，我比C＃/ iTextSharp更喜欢它。 由于OP 已将 PdfDenseMergeTool 转换为C＃/ iTextSharp，因此在此处翻译此工具也不应该是一个太大的问题。

PdfVeryDenseMergeTool

此工具与PdfDenseMergeTool类似，从多个PdfReader实例获取页面的页面内容，并尝试密集地合并它们，即如果有足够的可用空间，则将多个源页面的内容放到单个目标页面上。与之前的工具相比，此工具甚至可以拆分源页面内容，以实现更密集的合并。

就像其他工具一样， PdfVeryDenseMergeTool不考虑矢量图形因为iText（Sharp）解析API只转发文本和位图图像

PdfVeryDenseMergeTool将不完全适合目标页面的源页面拆分为水平线，该水平线未与文本字形或位图图形的边界框相交。

工具类：

 public class PdfVeryDenseMergeTool { public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) { this.pageSize = size; this.topMargin = top; this.bottomMargin = bottom; this.gap = gap; } public void merge(OutputStream outputStream, Iterable inputs) throws DocumentException, IOException { try { openDocument(outputStream); for (PdfReader reader: inputs) { merge(reader); } } finally { closeDocument(); } } void openDocument(OutputStream outputStream) throws DocumentException { final Document document = new Document(pageSize, 36, 36, topMargin, bottomMargin); final PdfWriter writer = PdfWriter.getInstance(document, outputStream); document.open(); this.document = document; this.writer = writer; newPage(); } void closeDocument() { try { document.close(); } finally { this.document = null; this.writer = null; this.yPosition = 0; } } void newPage() { document.newPage(); yPosition = pageSize.getTop(topMargin); } void merge(PdfReader reader) throws IOException { PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int page = 1; page <= reader.getNumberOfPages(); page++) { merge(reader, parser, page); } } void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException { PdfImportedPage importedPage = writer.getImportedPage(reader, page); PdfContentByte directContent = writer.getDirectContent(); PageVerticalAnalyzer finder = parser.processContent(page, new PageVerticalAnalyzer()); if (finder.verticalFlips.size() < 2) return; Rectangle pageSizeToImport = reader.getPageSize(page); int startFlip = finder.verticalFlips.size() - 1; boolean first = true; while (startFlip > 0) { if (!first) newPage(); float freeSpace = yPosition - pageSize.getBottom(bottomMargin); int endFlip = startFlip + 1; while ((endFlip > 1) && (finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2) < freeSpace)) endFlip -=2; if (endFlip < startFlip) { float height = finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip); directContent.saveState(); directContent.rectangle(0, yPosition - height, pageSizeToImport.getWidth(), height); directContent.clip(); directContent.newPath(); writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom())); directContent.restoreState(); yPosition -= height + gap; startFlip = endFlip - 1; } else if (!first) throw new IllegalArgumentException(String.format("Page %s content sections too large.", page)); first = false; } } Document document = null; PdfWriter writer = null; float yPosition = 0; final Rectangle pageSize; final float topMargin; final float bottomMargin; final float gap; }

（ PdfVeryDenseMergeTool.java ）

此工具使用自定义RenderListener与iText解析器API一起使用：

 public class PageVerticalAnalyzer implements RenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } /* * @see RenderListener#renderText(TextRenderInfo) */ @Override public void renderText(TextRenderInfo renderInfo) { LineSegment ascentLine = renderInfo.getAscentLine(); LineSegment descentLine = renderInfo.getDescentLine(); float[] yCoords = new float[]{ ascentLine.getStartPoint().get(Vector.I2), ascentLine.getEndPoint().get(Vector.I2), descentLine.getStartPoint().get(Vector.I2), descentLine.getEndPoint().get(Vector.I2) }; Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /* * @see RenderListener#renderImage(ImageRenderInfo) */ @Override public void renderImage(ImageRenderInfo renderInfo) { Matrix ctm = renderInfo.getImageCTM(); float[] yCoords = new float[4]; for (int x=0; x < 2; x++) for (int y=0; y < 2; y++) { Vector corner = new Vector(x, y, 1).cross(ctm); yCoords[2*x+y] = corner.get(Vector.I2); } Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /** * This method marks the given interval as used. */ void addVerticalUseSection(float from, float to) { if (to < from) { float temp = to; to = from; from = temp; } int i=0, j=0; for (; i i) verticalFlips.remove(j); if (toOutsideInterval) verticalFlips.add(i, to); if (fromOutsideInterval) verticalFlips.add(i, from); } final List verticalFlips = new ArrayList(); }

（ PageVerticalAnalyzer.java ）

它使用如下：

 PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 5); tool.merge(output, inputs);

（ VeryDenseMerging.java ）

适用于OP的样本文件

Header.pdf

Header.pdf页面

Body.pdf

Body.pdf页面

Footer.pdf

Footer.pdf页面

它会产生

A4非常密集的合并结果

如果将目标文档页面大小定义为A5格局：

 PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(new RectangleReadOnly(595,421), 18, 18, 5); tool.merge(output, inputs);

（ VeryDenseMerging.java ）

它产生了这个：

A5非常密集的合并结果

谨防！ 这只是概念的certificate，并没有考虑所有可能性。例如，没有正确处理具有非平凡的旋转值的源页面或目标页面的情况。因此，它尚未准备好用于生产。

当前（5.5.6 SNAPSHOT）iText版本的改进

目前针对5.5.6的iText开发版增强了解析器function，也可以发送矢量图形信号。因此，我扩展了PageVerticalAnalyzer来使用它：

 public class PageVerticalAnalyzer implements ExtRenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } @Override public void clipPath(int rule) { } ... static class SubPathSection { public SubPathSection(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); pathFromY = effectiveY; pathToY = effectiveY; } void extendTo(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); if (effectiveY < pathFromY) pathFromY = effectiveY; else if (effectiveY > pathToY) pathToY = effectiveY; } float getTransformedY(float x, float y, Matrix m) { return new Vector(x, y, 1).cross(m).get(Vector.I2); } float getFromY() { return pathFromY; } float getToY() { return pathToY; } private float pathFromY; private float pathToY; } /* * Beware: The implementation is not correct as it includes the control points of curves * which may be far outside the actual curve. * * @see ExtRenderListener#modifyPath(PathConstructionRenderInfo) */ @Override public void modifyPath(PathConstructionRenderInfo renderInfo) { Matrix ctm = renderInfo.getCtm(); List segmentData = renderInfo.getSegmentData(); switch (renderInfo.getOperation()) { case PathConstructionRenderInfo.MOVETO: subPath = null; case PathConstructionRenderInfo.LINETO: case PathConstructionRenderInfo.CURVE_123: case PathConstructionRenderInfo.CURVE_13: case PathConstructionRenderInfo.CURVE_23: for (int i = 0; i < segmentData.size()-1; i+=2) { if (subPath == null) { subPath = new SubPathSection(segmentData.get(i), segmentData.get(i+1), ctm); path.add(subPath); } else subPath.extendTo(segmentData.get(i), segmentData.get(i+1), ctm); } break; case PathConstructionRenderInfo.RECT: float x = segmentData.get(0); float y = segmentData.get(1); float w = segmentData.get(2); float h = segmentData.get(3); SubPathSection section = new SubPathSection(x, y, ctm); section.extendTo(x+w, y, ctm); section.extendTo(x, y+h, ctm); section.extendTo(x+w, y+h, ctm); path.add(section); case PathConstructionRenderInfo.CLOSE: subPath = null; break; default: } } /* * @see ExtRenderListener#renderPath(PathPaintingRenderInfo) */ @Override public Path renderPath(PathPaintingRenderInfo renderInfo) { if (renderInfo.getOperation() != PathPaintingRenderInfo.NO_OP) { for (SubPathSection section : path) addVerticalUseSection(section.getFromY(), section.getToY()); } path.clear(); subPath = null; return null; } List path = new ArrayList(); SubPathSection subPath = null; ... }

（ PageVerticalAnalyzer.java ）

一个简单的测试（ VeryDenseMerging.java方法testMergeOnlyGraphics ）合并这些文件

circlesOnlyA.pdf

circlesOnlyB.pdf

circlesOnlyC.pdf

circlesOnlyD.pdf

进入这个：

circlesOnlyMerge-veryDense.pdf

但再次提防：这仅仅是概念的certificate。 特别是需要改进modifyPath() ，实现不正确，因为它包含可能远远超出实际曲线的曲线控制点。

如何在合并时删除空格

PdfVeryDenseMergeTool

当前（5.5.6 SNAPSHOT）iText版本的改进

如何通过点击asp.net中的按钮播放声音？

在listview控件中进行分页

Listview ItemSelectionChanged发射两次？

测试Microsoft Surface应用程序的最佳方法

如何从Catch块返回错误消息。现在回来是空的

获取arrays中的所有组合

AutoMockContainer，支持具有非接口依赖性的自动锁定类

使Camera LookDirection看起来正面

构造函数注入和默认重载

generics与inheritance（当不涉及集合类时）

如何在合并时删除空格

PdfVeryDenseMergeTool

当前（5.5.6 SNAPSHOT）iText版本的改进

如何通过点击asp.net中的按钮播放声音？

在listview控件中进行分页

Listview ItemSelectionChanged发射两次？

测试Microsoft Surface应用程序的最佳方法

如何从Catch块返回错误消息。 现在回来是空的

获取arrays中的所有组合

AutoMockContainer，支持具有非接口依赖性的自动锁定类

使Camera LookDirection看起来正面

构造函数注入和默认重载

generics与inheritance（当不涉及集合类时）

如何从Catch块返回错误消息。现在回来是空的