将PDF拆分为C#中的多个文件

我们有一个C#Windows服务,它通过使用第三方组件读取PDF上的二维条形码来处理所有PDF,然后更新数据库并将文档存储在文档存储库中。

有没有办法在读取条形码后将文件剪切并将其存储为另一个文件?

例如,如果有一个10页的文档,它应该分成10个不同的文件。

谢谢。

您可以使用像PDFSharp这样的PDF库,读取文件,遍历每个页面,将它们添加到新的PDF文档中并将它们保存在文件系统中。 您也可以删除或保留原文。

这是相当多的代码,但非常简单,这些示例应该让你开始。

http://www.pdfsharp.net/wiki/Default.aspx?Page=ConcatenateDocuments-sample&NS=&AspxAutoDetectCookieSupport=1

我遇到了同样的问题,你可以使用itextsharp组件工具来拆分文档

public Split(String[] args) { if (args.Length != 4) { Console.Error.WriteLine("This tools needs 4 parameters:\njava Split srcfile destfile1 destfile2 pagenumber"); } else { try { int pagenumber = int.Parse(args[3]); // we create a reader for a certain document PdfReader reader = new PdfReader(args[0]); // we retrieve the total number of pages int n = reader.NumberOfPages; Console.WriteLine("There are " + n + " pages in the original file."); if (pagenumber < 2 || pagenumber > n) { throw new DocumentException("You can't split this document at page " + pagenumber + "; there is no such page."); } // step 1: creation of a document-object Document document1 = new Document(reader.GetPageSizeWithRotation(1)); Document document2 = new Document(reader.GetPageSizeWithRotation(pagenumber)); // step 2: we create a writer that listens to the document PdfWriter writer1 = PdfWriter.GetInstance(document1, new FileStream(args[1], FileMode.Create)); PdfWriter writer2 = PdfWriter.GetInstance(document2, new FileStream(args[2], FileMode.Create)); // step 3: we open the document document1.Open(); PdfContentByte cb1 = writer1.DirectContent; document2.Open(); PdfContentByte cb2 = writer2.DirectContent; PdfImportedPage page; int rotation; int i = 0; // step 4: we add content while (i < pagenumber - 1) { i++; document1.SetPageSize(reader.GetPageSizeWithRotation(i)); document1.NewPage(); page = writer1.GetImportedPage(reader, i); rotation = reader.GetPageRotation(i); if (rotation == 90 || rotation == 270) { cb1.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height); } else { cb1.AddTemplate(page, 1f, 0, 0, 1f, 0, 0); } } while (i < n) { i++; document2.SetPageSize(reader.GetPageSizeWithRotation(i)); document2.NewPage(); page = writer2.GetImportedPage(reader, i); rotation = reader.GetPageRotation(i); if (rotation == 90 || rotation == 270) { cb2.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height); } else { cb2.AddTemplate(page, 1f, 0, 0, 1f, 0, 0); } Console.WriteLine("Processed page " + i); } // step 5: we close the document document1.Close(); document2.Close(); } catch(Exception e) { Console.Error.WriteLine(e.Message); Console.Error.WriteLine(e.StackTrace); } } } 

之前的问题部分回答了您的问题 – 如何分割pdf文档,如果您知道条形码的位置,那么您可以轻松地拆分文档:

如何将PDF文件拆分成页面(最好是C#)

该建议是一个名为PDFSharp的库和一个演示PDF分割的示例 。

 public int ExtractPages(string sourcePdfPath, string DestinationFolder) { int p = 0; try { iTextSharp.text.Document document; iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdfPath), new ASCIIEncoding().GetBytes("")); if (!Directory.Exists(sourcePdfPath.ToLower().Replace(".pdf", ""))) { Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", "")); } else { Directory.Delete(sourcePdfPath.ToLower().Replace(".pdf", ""), true); Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", "")); } for (p = 1; p <= reader.NumberOfPages; p++) { using (MemoryStream memoryStream = new MemoryStream()) { document = new iTextSharp.text.Document(); iTextSharp.text.pdf.PdfWriter writer = iTextSharp.text.pdf.PdfWriter.GetInstance(document, memoryStream); writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_2); writer.CompressionLevel = iTextSharp.text.pdf.PdfStream.BEST_COMPRESSION; writer.SetFullCompression(); document.SetPageSize(reader.GetPageSize(p)); document.NewPage(); document.Open(); document.AddDocListener(writer); iTextSharp.text.pdf.PdfContentByte cb = writer.DirectContent; iTextSharp.text.pdf.PdfImportedPage pageImport = writer.GetImportedPage(reader, p); int rot = reader.GetPageRotation(p); if (rot == 90 || rot == 270) { cb.AddTemplate(pageImport, 0, -1.0F, 1.0F, 0, 0, reader.GetPageSizeWithRotation(p).Height); } else { cb.AddTemplate(pageImport, 1.0F, 0, 0, 1.0F, 0, 0); } document.Close(); document.Dispose(); File.WriteAllBytes(DestinationFolder + "/" + p + ".pdf", memoryStream.ToArray()); } } reader.Close(); reader.Dispose(); } catch { } finally { GC.Collect(); } return p - 1; } 

在任何需要的地方调用此函数并传递源和目标文件夹路径

此代码基于PDFsharp库

http://www.pdfsharp.com/PDFsharp/

如果您想按书签分割,那么这里是代码。

  public static void SplitPDFByBookMark(string fileName) { string sInFile = fileName; PdfReader pdfReader = new PdfReader(sInFile); try { IList> bookmarks = SimpleBookmark.GetBookmark(pdfReader); for (int i = 0; i < bookmarks.Count; ++i) { IDictionary BM = (IDictionary)bookmarks[0]; IDictionary nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1]; string startPage = BM["Page"].ToString().Split(' ')[0].ToString(); string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString(); SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName); } } catch (Exception ex) { throw ex; } } private static void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName) { Document document = new Document(); FileStream fs = new System.IO.FileStream(System.IO.Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create); try { PdfWriter writer = PdfWriter.GetInstance(document, fs); document.Open(); PdfContentByte cb = writer.DirectContent; //holds pdf data PdfImportedPage page; if (pageFrom == PageTo && pageFrom == 1) { document.NewPage(); page = writer.GetImportedPage(reader, pageFrom); cb.AddTemplate(page, 0, 0); pageFrom++; fs.Flush(); document.Close(); fs.Close(); } else { while (pageFrom < PageTo) { document.NewPage(); page = writer.GetImportedPage(reader, pageFrom); cb.AddTemplate(page, 0, 0); pageFrom++; fs.Flush(); document.Close(); fs.Close(); } } } catch (Exception ex) { throw ex; } finally { if (document.IsOpen()) document.Close(); if (fs != null) fs.Close(); } } 
  public void SplitPDFByBookMark(string fileName) { string sInFile = fileName; var pdfReader = new PdfReader(sInFile); try { IList> bookmarks = SimpleBookmark.GetBookmark(pdfReader); for (int i = 0; i < bookmarks.Count; ++i) { IDictionary BM = (IDictionary)bookmarks[i]; IDictionary nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1]; string startPage = BM["Page"].ToString().Split(' ')[0].ToString(); string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString(); SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName); } } catch (Exception ex) { throw ex; } } private void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName) { Document document = new Document(); using (var fs = new FileStream(Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create)) { try { using (var writer = PdfWriter.GetInstance(document, fs)) { document.Open(); PdfContentByte cb = writer.DirectContent; //holds pdf data PdfImportedPage page; if (pageFrom == PageTo && pageFrom == 1) { document.NewPage(); page = writer.GetImportedPage(reader, pageFrom); cb.AddTemplate(page, 0, 0); pageFrom++; fs.Flush(); document.Close(); fs.Close(); } else { while (pageFrom < PageTo) { document.NewPage(); page = writer.GetImportedPage(reader, pageFrom); cb.AddTemplate(page, 0, 0); pageFrom++; fs.Flush(); document.Close(); fs.Close(); } } } //PdfWriter writer = PdfWriter.GetInstance(document, fs); } catch (Exception ex) { throw ex; } } } 

您可以从nuget安装itextsharp并将此代码复制并粘贴到ac#app中调用SplitPDFByBookMark()方法并传递pdf文件名。 这段代码将搜索你的书签并完成!