如何在合并时删除空格

我有一些代码需要3个不同的PDF字节数组并合并它们。 这段代码效果很好。 问题(有些人)认为每个PDF都被认为是一个完整的页面(如果打印),即使它上面只有4英寸的内容,因此垂直留下7英寸的空白区域。 然后将中间文档放入其中,并且可以在其末尾处具有或不具有垂直白色空间。 然后页脚也会放在自己的页面上。

这是代码:

byte[] Bytes = rv.LocalReport.Render("PDF", null, out MimeType, out Encoding, out Extension, out StreamIDs, out Warnings); List MergeSets = // This is filled prior to this code // Append any other pages to this primary letter if (MergeSets.Count > 0) { MemoryStream ms = new MemoryStream(); Document document = new Document(); PdfCopy copy = new PdfCopy(document, ms); document.Open(); PdfImportedPage page; PdfReader reader = new PdfReader(Bytes); // read the generated primary Letter int pages = reader.NumberOfPages; for (int i = 0; i < pages; ) { page = copy.GetImportedPage(reader, ++i); copy.AddPage(page); } // foreach of the pages in the Cover Letter // Now append the merge sets foreach (byte[] ba in MergeSets) { reader = new PdfReader(ba); pages = reader.NumberOfPages; for (int i = 0; i < pages; ) { page = copy.GetImportedPage(reader, ++i); copy.AddPage(page); } // foreach of the pages in the current merge set } // foreach of the sets of data document.Close(); ServerSaved = SaveGeneratedLetter(ms.GetBuffer(), DateTime.Now.Year, hl.LetterName, SaveName); } // if there is anything to merge 

有没有办法合并每个页面来剪辑/删除/删除每个pdf末尾的垂直空白区域,以便它显示为一个无缝文档?

更新:这是我试图合并的一些示例.pdf文件。

标题 , 正文 , 页脚

更新2:使用答案:

我已将@ mkl的代码转换为C#,而且在这里。

工具类:

 public class PdfVeryDenseMergeTool { private Rectangle PageSize; private float TopMargin; private float BottomMargin; private float Gap; private Document Document = null; private PdfWriter Writer = null; private float YPosition = 0; public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) { this.PageSize = size; this.TopMargin = top; this.BottomMargin = bottom; this.Gap = gap; } // PdfVeryDenseMergeTool public void Merge(MemoryStream outputStream, List inputs) { try { this.OpenDocument(outputStream); foreach (PdfReader reader in inputs) { this.Merge(reader); } // foreach of the PDF files to merge } finally { this.CloseDocument(); } // try-catch-finally } // Merge public void OpenDocument(MemoryStream outputStream) { this.Document = new Document(PageSize, 36, 36, this.TopMargin, this.BottomMargin); this.Writer = PdfWriter.GetInstance(Document, outputStream); this.Document.Open(); this.NewPage(); } // OpenDocument public void CloseDocument() { try { this.Document.Close(); } finally { this.Document = null; this.Writer = null; this.YPosition = 0; } // try-finally } // CloseDocument public void NewPage() { this.Document.NewPage(); this.YPosition = PageSize.GetTop(this.TopMargin); } // Merge public void Merge(PdfReader reader) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++) { this.Merge(reader, parser, pageIndex); } // foreach of the pages of the current PDF } // Merge public void Merge(PdfReader reader, PdfReaderContentParser parser, int pageIndex) { PdfImportedPage importedPage = Writer.GetImportedPage(reader, pageIndex); PdfContentByte directContent = Writer.DirectContent; PageVerticalAnalyzer finder = parser.ProcessContent(pageIndex, new PageVerticalAnalyzer()); if (finder.VerticalFlips.Count  0) { if (!first) this.NewPage(); float freeSpace = this.YPosition - PageSize.GetBottom(BottomMargin); int endFlip = startFlip + 1; while ((endFlip > 1) && (finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip - 2] < freeSpace)) endFlip -= 2; if (endFlip < startFlip) { float height = finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip]; directContent.SaveState(); directContent.Rectangle(0, this.YPosition - height, pageSizeToImport.Width, height); directContent.Clip(); directContent.NewPath(); this.Writer.DirectContent.AddTemplate(importedPage, 0, this.YPosition - (finder.VerticalFlips[startFlip] - pageSizeToImport.Bottom)); directContent.RestoreState(); this.YPosition -= height + this.Gap; startFlip = endFlip - 1; } else if (!first) { throw new ArgumentException(string.Format("Page {0} content too large", pageIndex)); } // if first = false; } // while } // Merge } // PdfVeryDenseMergeTool 

RenderListener类:
更新3:修复了1行代码及其工作原理:请参阅代码中的注释

 public class PageVerticalAnalyzer : IRenderListener { public PageVerticalAnalyzer() { } public List VerticalFlips = new List(); public void AddVerticalUseSection(float from, float to) { if (to < from) { float temp = to; to = from; from = temp; } int i = 0; int j = 0; for (i = 0; i < VerticalFlips.Count; i++) { float flip = VerticalFlips[i]; if (flip < from) continue; for (j = i; j < VerticalFlips.Count; j++) { flip = VerticalFlips[j]; if (flip  i) VerticalFlips.RemoveAt(j); // This was the problem line with just .Remove(j) if (toOutsideInterval) VerticalFlips.Insert(i, to); if (fromOutsideInterval) VerticalFlips.Insert(i, from); } // AddVerticalUseSection public void BeginTextBlock() { /* Do nothing */ } public void EndTextBlock() { /* Do nothing */ } public void RenderImage(ImageRenderInfo renderInfo) { Matrix ctm = renderInfo.GetImageCTM(); List YCoords = new List(4) { 0, 0, 0, 0 }; for (int x = 0; x < 2; x++) { for (int y = 0; y < 2; y++) { Vector corner = new Vector(x, y, 1).Cross(ctm); YCoords[2 * x + y] = corner[Vector.I2]; } } YCoords.Sort(); AddVerticalUseSection(YCoords[0], YCoords[3]); } // RenderImage public void RenderText(TextRenderInfo renderInfo) { LineSegment ascentLine = renderInfo.GetAscentLine(); LineSegment descentLine = renderInfo.GetDescentLine(); List YCoords = new List(4) { ascentLine.GetStartPoint()[Vector.I2], ascentLine.GetEndPoint()[Vector.I2], descentLine.GetStartPoint()[Vector.I2], descentLine.GetEndPoint()[Vector.I2], }; YCoords.Sort(); AddVerticalUseSection(YCoords[0], YCoords[3]); } // RenderText } // PageVericalAnalyzer 

用于收集文件和运行该工具的代码:

 public void TestMergeDocuments() { PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(iTextSharp.text.PageSize.A4, 18, 18, 10); List Files = new List(); // Code to load each of the 3 files I need into this byte array list using (MemoryStream ms = new MemoryStream()) { List files = new List(); foreach (byte[] ba in Files) { files.Add(new PdfReader(ba)); } // foreach of the sets of data tool.Merge(ms, files); // Save the file using: ms.GetBuffer() } // using the memory stream } // TestMergeDocuments 

以下示例工具已经根据PdfDenseMergeTool工具的PdfDenseMergeTool从这个答案中实现,OP已经评论PdfDenseMergeTool接近[他]需要的东西。 就像PdfDenseMergeTool一样,这个工具在Java / iText中实现,我比C#/ iTextSharp更喜欢它。 由于OP 已将 PdfDenseMergeTool 转换为C#/ iTextSharp,因此在此处翻译此工具也不应该是一个太大的问题。

PdfVeryDenseMergeTool

此工具与PdfDenseMergeTool类似,从多个PdfReader实例获取页面的页面内容,并尝试密集地合并它们,即如果有足够的可用空间,则将多个源页面的内容放到单个目标页面上。 与之前的工具相比,此工具甚至可以拆分源页面内容,以实现更密集的合并。

就像其他工具一样, PdfVeryDenseMergeTool不考虑矢量图形因为iText(Sharp)解析API只转发文本和位图图像

PdfVeryDenseMergeTool将不完全适合目标页面的源页面拆分为水平线,该水平线未与文本字形或位图图形的边界框相交。

工具类:

 public class PdfVeryDenseMergeTool { public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) { this.pageSize = size; this.topMargin = top; this.bottomMargin = bottom; this.gap = gap; } public void merge(OutputStream outputStream, Iterable inputs) throws DocumentException, IOException { try { openDocument(outputStream); for (PdfReader reader: inputs) { merge(reader); } } finally { closeDocument(); } } void openDocument(OutputStream outputStream) throws DocumentException { final Document document = new Document(pageSize, 36, 36, topMargin, bottomMargin); final PdfWriter writer = PdfWriter.getInstance(document, outputStream); document.open(); this.document = document; this.writer = writer; newPage(); } void closeDocument() { try { document.close(); } finally { this.document = null; this.writer = null; this.yPosition = 0; } } void newPage() { document.newPage(); yPosition = pageSize.getTop(topMargin); } void merge(PdfReader reader) throws IOException { PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int page = 1; page <= reader.getNumberOfPages(); page++) { merge(reader, parser, page); } } void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException { PdfImportedPage importedPage = writer.getImportedPage(reader, page); PdfContentByte directContent = writer.getDirectContent(); PageVerticalAnalyzer finder = parser.processContent(page, new PageVerticalAnalyzer()); if (finder.verticalFlips.size() < 2) return; Rectangle pageSizeToImport = reader.getPageSize(page); int startFlip = finder.verticalFlips.size() - 1; boolean first = true; while (startFlip > 0) { if (!first) newPage(); float freeSpace = yPosition - pageSize.getBottom(bottomMargin); int endFlip = startFlip + 1; while ((endFlip > 1) && (finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2) < freeSpace)) endFlip -=2; if (endFlip < startFlip) { float height = finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip); directContent.saveState(); directContent.rectangle(0, yPosition - height, pageSizeToImport.getWidth(), height); directContent.clip(); directContent.newPath(); writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom())); directContent.restoreState(); yPosition -= height + gap; startFlip = endFlip - 1; } else if (!first) throw new IllegalArgumentException(String.format("Page %s content sections too large.", page)); first = false; } } Document document = null; PdfWriter writer = null; float yPosition = 0; final Rectangle pageSize; final float topMargin; final float bottomMargin; final float gap; } 

( PdfVeryDenseMergeTool.java )

此工具使用自定义RenderListener与iText解析器API一起使用:

 public class PageVerticalAnalyzer implements RenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } /* * @see RenderListener#renderText(TextRenderInfo) */ @Override public void renderText(TextRenderInfo renderInfo) { LineSegment ascentLine = renderInfo.getAscentLine(); LineSegment descentLine = renderInfo.getDescentLine(); float[] yCoords = new float[]{ ascentLine.getStartPoint().get(Vector.I2), ascentLine.getEndPoint().get(Vector.I2), descentLine.getStartPoint().get(Vector.I2), descentLine.getEndPoint().get(Vector.I2) }; Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /* * @see RenderListener#renderImage(ImageRenderInfo) */ @Override public void renderImage(ImageRenderInfo renderInfo) { Matrix ctm = renderInfo.getImageCTM(); float[] yCoords = new float[4]; for (int x=0; x < 2; x++) for (int y=0; y < 2; y++) { Vector corner = new Vector(x, y, 1).cross(ctm); yCoords[2*x+y] = corner.get(Vector.I2); } Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /** * This method marks the given interval as used. */ void addVerticalUseSection(float from, float to) { if (to < from) { float temp = to; to = from; from = temp; } int i=0, j=0; for (; i i) verticalFlips.remove(j); if (toOutsideInterval) verticalFlips.add(i, to); if (fromOutsideInterval) verticalFlips.add(i, from); } final List verticalFlips = new ArrayList(); } 

( PageVerticalAnalyzer.java )

它使用如下:

 PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 5); tool.merge(output, inputs); 

( VeryDenseMerging.java )

适用于OP的样本文件

Header.pdf

Header.pdf页面

Body.pdf

Body.pdf页面

Footer.pdf

Footer.pdf页面

它会产生

A4非常密集的合并结果

如果将目标文档页面大小定义为A5格局:

 PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(new RectangleReadOnly(595,421), 18, 18, 5); tool.merge(output, inputs); 

( VeryDenseMerging.java )

它产生了这个:

A5非常密集的合并结果

谨防! 这只是概念的certificate,并没有考虑所有可能性。 例如,没有正确处理具有非平凡的旋转值的源页面或目标页面的情况。 因此,它尚未准备好用于生产。


当前(5.5.6 SNAPSHOT)iText版本的改进

目前针对5.5.6的iText开发版增强了解析器function,也可以发送矢量图形信号。 因此,我扩展了PageVerticalAnalyzer来使用它:

 public class PageVerticalAnalyzer implements ExtRenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } @Override public void clipPath(int rule) { } ... static class SubPathSection { public SubPathSection(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); pathFromY = effectiveY; pathToY = effectiveY; } void extendTo(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); if (effectiveY < pathFromY) pathFromY = effectiveY; else if (effectiveY > pathToY) pathToY = effectiveY; } float getTransformedY(float x, float y, Matrix m) { return new Vector(x, y, 1).cross(m).get(Vector.I2); } float getFromY() { return pathFromY; } float getToY() { return pathToY; } private float pathFromY; private float pathToY; } /* * Beware: The implementation is not correct as it includes the control points of curves * which may be far outside the actual curve. * * @see ExtRenderListener#modifyPath(PathConstructionRenderInfo) */ @Override public void modifyPath(PathConstructionRenderInfo renderInfo) { Matrix ctm = renderInfo.getCtm(); List segmentData = renderInfo.getSegmentData(); switch (renderInfo.getOperation()) { case PathConstructionRenderInfo.MOVETO: subPath = null; case PathConstructionRenderInfo.LINETO: case PathConstructionRenderInfo.CURVE_123: case PathConstructionRenderInfo.CURVE_13: case PathConstructionRenderInfo.CURVE_23: for (int i = 0; i < segmentData.size()-1; i+=2) { if (subPath == null) { subPath = new SubPathSection(segmentData.get(i), segmentData.get(i+1), ctm); path.add(subPath); } else subPath.extendTo(segmentData.get(i), segmentData.get(i+1), ctm); } break; case PathConstructionRenderInfo.RECT: float x = segmentData.get(0); float y = segmentData.get(1); float w = segmentData.get(2); float h = segmentData.get(3); SubPathSection section = new SubPathSection(x, y, ctm); section.extendTo(x+w, y, ctm); section.extendTo(x, y+h, ctm); section.extendTo(x+w, y+h, ctm); path.add(section); case PathConstructionRenderInfo.CLOSE: subPath = null; break; default: } } /* * @see ExtRenderListener#renderPath(PathPaintingRenderInfo) */ @Override public Path renderPath(PathPaintingRenderInfo renderInfo) { if (renderInfo.getOperation() != PathPaintingRenderInfo.NO_OP) { for (SubPathSection section : path) addVerticalUseSection(section.getFromY(), section.getToY()); } path.clear(); subPath = null; return null; } List path = new ArrayList(); SubPathSection subPath = null; ... } 

( PageVerticalAnalyzer.java )

一个简单的测试( VeryDenseMerging.java方法testMergeOnlyGraphics )合并这些文件

circlesOnlyA.pdf

circlesOnlyB.pdf

circlesOnlyC.pdf

circlesOnlyD.pdf

进入这个:

circlesOnlyMerge-veryDense.pdf

但再次提防:这仅仅是概念的certificate。 特别是需要改进modifyPath() ,实现不正确,因为它包含可能远远超出实际曲线的曲线控制点。