在c#中将网页捕获为图像,确保javascript呈现的元素可见

我试图使用标准的c#.net代码捕获以下页面。 我一直在寻找人们的各种方法,其中大多数方法涉及实例化浏览器对象和使用绘图到位图方法。 但是,这些都不会在此页面上获取图表的内容:

http://www.highcharts.com/demo/combo-dual-axes

也许javascript没有时间运行,但添加Thread.Sleep(x)没有帮助。

这个商业组件正确捕获它,但我宁愿避免在我的项目中需要额外的依赖,并且当其他解决方案太接近时支付150美元!

有人发现他们的解决方案正确呈现吗?

您可能已尝试过IECapt 。 我认为这是正确的方法。 我创建了它的修改版本并使用timer而不是Thread.Sleep它按预期捕获您的站点。

– – – 编辑 – – –

这是丑陋的来源。 只需添加对Microsoft HTML Object Library的引用。

这是用法:

 HtmlCapture capture = new HtmlCapture(@"c:\temp\myimg.png"); capture.HtmlImageCapture += new HtmlCapture.HtmlCaptureEvent(capture_HtmlImageCapture); capture.Create("http://www.highcharts.com/demo/combo-dual-axes"); void capture_HtmlImageCapture(object sender, Uri url) { this.Close(); } 

文件1

 using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using System.IO; namespace MyIECapt { public class HtmlCapture { private WebBrowser web; private Timer tready; private Rectangle screen; private Size? imgsize = null; //an event that triggers when the html document is captured public delegate void HtmlCaptureEvent(object sender, Uri url); public event HtmlCaptureEvent HtmlImageCapture; string fileName = ""; //class constructor public HtmlCapture(string fileName) { this.fileName = fileName; //initialise the webbrowser and the timer web = new WebBrowser(); tready = new Timer(); tready.Interval = 2000; screen = Screen.PrimaryScreen.Bounds; //set the webbrowser width and hight web.Width = 1024; //screen.Width; web.Height = 768; // screen.Height; //suppress script errors and hide scroll bars web.ScriptErrorsSuppressed = true; web.ScrollBarsEnabled = false; //attached events web.Navigating += new WebBrowserNavigatingEventHandler(web_Navigating); web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted); tready.Tick += new EventHandler(tready_Tick); } public void Create(string url) { imgsize = null; web.Navigate(url); } public void Create(string url, Size imgsz) { this.imgsize = imgsz; web.Navigate(url); } void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { //start the timer tready.Start(); } void web_Navigating(object sender, WebBrowserNavigatingEventArgs e) { //stop the timer tready.Stop(); } void tready_Tick(object sender, EventArgs e) { try { //stop the timer tready.Stop(); mshtml.IHTMLDocument2 docs2 = (mshtml.IHTMLDocument2)web.Document.DomDocument; mshtml.IHTMLDocument3 docs3 = (mshtml.IHTMLDocument3)web.Document.DomDocument; mshtml.IHTMLElement2 body2 = (mshtml.IHTMLElement2)docs2.body; mshtml.IHTMLElement2 root2 = (mshtml.IHTMLElement2)docs3.documentElement; // Determine dimensions for the image; we could add minWidth here // to ensure that we get closer to the minimal width (the width // computed might be a few pixels less than what we want). int width = Math.Max(body2.scrollWidth, root2.scrollWidth); int height = Math.Max(root2.scrollHeight, body2.scrollHeight); //get the size of the document's body Rectangle docRectangle = new Rectangle(0, 0, width, height); web.Width = docRectangle.Width; web.Height = docRectangle.Height; //if the imgsize is null, the size of the image will //be the same as the size of webbrowser object //otherwise set the image size to imgsize Rectangle imgRectangle; if (imgsize == null) imgRectangle = docRectangle; else imgRectangle = new Rectangle() { Location = new Point(0, 0), Size = imgsize.Value }; //create a bitmap object Bitmap bitmap = new Bitmap(imgRectangle.Width, imgRectangle.Height); //get the viewobject of the WebBrowser IViewObject ivo = web.Document.DomDocument as IViewObject; using (Graphics g = Graphics.FromImage(bitmap)) { //get the handle to the device context and draw IntPtr hdc = g.GetHdc(); ivo.Draw(1, -1, IntPtr.Zero, IntPtr.Zero, IntPtr.Zero, hdc, ref imgRectangle, ref docRectangle, IntPtr.Zero, 0); g.ReleaseHdc(hdc); } //invoke the HtmlImageCapture event bitmap.Save(fileName); bitmap.Dispose(); } catch { //System.Diagnostics.Process.GetCurrentProcess().Kill(); } if(HtmlImageCapture!=null) HtmlImageCapture(this, web.Url); } } } 

和File2

 using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Drawing; using System.Runtime.InteropServices; namespace MyIECapt { [ComVisible(true), ComImport()] [GuidAttribute("0000010d-0000-0000-C000-000000000046")] [InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)] public interface IViewObject { [return: MarshalAs(UnmanagedType.I4)] [PreserveSig] int Draw( [MarshalAs(UnmanagedType.U4)] UInt32 dwDrawAspect, int lindex, IntPtr pvAspect, [In] IntPtr ptd, IntPtr hdcTargetDev, IntPtr hdcDraw, [MarshalAs(UnmanagedType.Struct)] ref Rectangle lprcBounds, [MarshalAs(UnmanagedType.Struct)] ref Rectangle lprcWBounds, IntPtr pfnContinue, [MarshalAs(UnmanagedType.U4)] UInt32 dwContinue); [PreserveSig] int GetColorSet([In, MarshalAs(UnmanagedType.U4)] int dwDrawAspect, int lindex, IntPtr pvAspect, [In] IntPtr ptd, IntPtr hicTargetDev, [Out] IntPtr ppColorSet); [PreserveSig] int Freeze([In, MarshalAs(UnmanagedType.U4)] int dwDrawAspect, int lindex, IntPtr pvAspect, [Out] IntPtr pdwFreeze); [PreserveSig] int Unfreeze([In, MarshalAs(UnmanagedType.U4)] int dwFreeze); } } 

Thread.Sleep将简单地挂起您的Web浏览器正在运行的线程 – 您如何在它被挂起时呈现任何内容? 🙂

相反,您需要允许线程处理工作。 您可以通过Thread.Sleep(0)Application.DoEvents()的组合实现此目的,具体如下:

 DateTime finish = DateTime.Now.AddSeconds(3); while (DateTime.Now < finish) { Application.DoEvents(); Thread.Sleep(0); } 

@LB,谢谢你的帮助!

对于想要在类库中运行它的人来说,只需一个FYI,WebBrowser需要单线程公寓,所以做这样的事情:

  var t = new Thread(InitAndDo); //InitAndDo would have your code creating the webbrowser object etc... t.SetApartmentState(ApartmentState.STA); t.Start(); 

然后在导航调用完成后,Gotcha,添加这行代码,以便您获得完成的导航事件:

  web.Navigate(Url); Application.Run(); 

我为此创建了一个nuget包https://github.com/dcumin39/RenderHighCharts/wiki