C# Chrome无头驱动程序为我们节省了html和资源

C# Chrome无头驱动程序为我们节省了html和资源,c#,selenium-webdriver,selenium-chromedriver,headless,google-chrome-headless,C#,Selenium Webdriver,Selenium Chromedriver,Headless,Google Chrome Headless,我正在寻找以编程方式下载网页输出,以保存我们的html和css和js资源以及网页上使用的图像,是否可以使用chrome驱动程序 如果有人使用ChromeDriver headless或任何其他指针或示例代码引用实现了类似功能,我们将不胜感激。要获取页面的HTML,您可以使用: WebDriver driver = new ChromeDriver(); driver.getPageSource(); 要使用ChromeDriver无头模式,请在WebDriver声明之前添加以下行: Chrom

我正在寻找以编程方式下载网页输出,以保存我们的html和css和js资源以及网页上使用的图像,是否可以使用chrome驱动程序


如果有人使用ChromeDriver headless或任何其他指针或示例代码引用实现了类似功能,我们将不胜感激。

要获取页面的HTML,您可以使用:

WebDriver driver = new ChromeDriver();
driver.getPageSource();
要使用ChromeDriver无头模式,请在WebDriver声明之前添加以下行:

ChromeOptions options = new ChromeOptions();
options.setHeadless(true);
或:


我做了一些调查,开箱即用的Chrome驱动程序,由于安全原因,该功能无法通过使用Chrome驱动程序的另存为下载所有资源和HTML

我自己已经开始创建一个单独的项目,使用Chrome驱动程序下载所有的资源和HTML,仍然会对这个项目进行微调,以获得更好的效果,也许这段代码在开始时会很有用

 [TestMethod]
 public void DownloadSingpleWebPageAndResources()
 {
            List<PageInfo> pageInfos = new List<PageInfo>();
            pageInfos.Add(new PageInfo() { PageTcmId = "12-12335", DownloadUri = new Uri("https://www.test.com/en/index.html") });

            bool found = false;
            var client = new ChromeDriverClientRequest("JOB-12-TEST1", pageInfos);
            client.PackageGenerationFolderPath = new DirectoryInfo(@"C:\temp");
            client.DownloadWebsitePageResources();
            found = true;

            Assert.IsTrue(found, "The item was not resolved");
 }

public sealed class ChromeDriverClientRequest : IDisposable
{
    public IWebProxy proxy { set; get; }
    public string JobId { set; get; }
    public List<PageInfo> PageInfos { set; get; }

    private DirectoryInfo _packageGenerationFolderPath;

    /// <summary>
    /// Gets or sets the destination folder path.
    /// </summary>
    /// <value>The destination folder path.</value>
    public DirectoryInfo PackageGenerationFolderPath
    {
        get
        {
            if (_packageGenerationFolderPath != null && !_packageGenerationFolderPath.Exists)
            {
                _packageGenerationFolderPath.Create();
            }

            return _packageGenerationFolderPath;
        }
        set => _packageGenerationFolderPath = value;
    }

    public ChromeDriverClientRequest()
    {

    }

    public ChromeDriverClientRequest(string jobId, List<PageInfo> pageInfos)
    {
        JobId = jobId;
        PageInfos = pageInfos;
    }

    /// <summary>
    /// DownloadWebsitePage
    /// </summary>
    /// <returns></returns>
    public void DownloadWebsitePageResources()
    {
        ChromeOptions options = new ChromeOptions();
        options.AddArgument("--window-size=1920,1080");
        options.AddArgument("--disable-gpu");
        options.AddArgument("--disable-extensions");
        options.AddArgument("--proxy-server='direct://'");
        options.AddArgument("--proxy-bypass-list=*");
        options.AddArgument("--start-maximized");
        options.AddArgument("--headless");
        options.AddArgument("no-sandbox");
        options.AcceptInsecureCertificates = true;
        options.PageLoadStrategy = PageLoadStrategy.Normal;

        ChromeDriver driver = null;

        try
        {
            //Create a assets folder
            string path = Path.Combine(PackageGenerationFolderPath.FullName, JobId, "assets");
            if (!Directory.Exists(path))
            {
                Directory.CreateDirectory(path);
            }

            driver = new ChromeDriver(options);

            foreach (PageInfo page in PageInfos)
            {
                driver.Navigate().GoToUrl(page.DownloadUri);
                object html = driver.ExecuteScript("return document.body.parentElement.outerHTML");

                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(html.ToString());

                ProcessImagesToDownload(ref htmlDoc, page);
                ProcessHtml5ImagesTagsToDownload(ref htmlDoc, page);
                ProcessStylesheetsToDownload(ref htmlDoc, page);
                ProcessScriptsToDownload(ref htmlDoc, page);
                SaveProcessedHtmlFile(ref htmlDoc, page);
            }

            createZipFile();
        }
        catch (Exception ex)
        {
            throw new Exception(ex.Message);
        }
        finally
        {
            if(driver!=null)
                driver.Quit();
        }
    }

    /// <summary>
    /// 
    /// </summary>
    /// <param name="zipPath"></param>
    /// <param name="archiveFileName"></param>
    private void createZipFile()
    {
        string DirectoryToBeArchive = Path.Combine(PackageGenerationFolderPath.FullName, JobId);
        string DirectoryToBeArchiveZipFileName = Path.Combine(PackageGenerationFolderPath.FullName, JobId + ".zip");

        if (File.Exists(DirectoryToBeArchiveZipFileName))
        {
            File.Delete(DirectoryToBeArchiveZipFileName);
            ZipFile.CreateFromDirectory(DirectoryToBeArchive, DirectoryToBeArchiveZipFileName, CompressionLevel.Fastest, false);
        }
        else
        {
            ZipFile.CreateFromDirectory(DirectoryToBeArchive, DirectoryToBeArchiveZipFileName, CompressionLevel.Fastest, false);
        }

        Directory.Delete(DirectoryToBeArchive, true);
    }


    /// <summary>
    /// Save Processed Html File
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>

    private void SaveProcessedHtmlFile(ref HtmlDocument htmlDoc, PageInfo page)
    {
        string htmlSourceFiname = Path.GetFileName(page.DownloadUri.ToString());

        if (!string.IsNullOrEmpty(Path.GetExtension(page.DownloadUri.ToString())))
        {
            htmlSourceFiname = htmlSourceFiname.Replace(Path.GetExtension(page.DownloadUri.ToString()), ".html");
        }
        else if(!string.IsNullOrEmpty(htmlSourceFiname))
        {
            htmlSourceFiname = htmlSourceFiname + ".html";
        }
        else
        {
            htmlSourceFiname = "index.html";
        }

        using (FileStream sw = new FileStream(Path.Combine(PackageGenerationFolderPath.FullName, JobId, page.PageTcmId + "_" + htmlSourceFiname), FileMode.Create))
        {
            htmlDoc.Save(sw);
        }
    }

    /// <summary>
    /// Process Images To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessImagesToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection imagesNodes = htmlDoc.DocumentNode.SelectNodes("//img");
        if (imagesNodes != null)
        {
            foreach (HtmlNode node in imagesNodes)
            {
                if (node.Attributes["src"] != null)
                {
                    string url = node.Attributes["src"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["src"].Value.IndexOf("?"));
                    }

                    if (url.StartsWith("//"))
                    {
                        url = string.Format("{0}:{1}", page.DownloadUri.Scheme, url);
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadBinaryResource(uri);
                            node.Attributes["src"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadBinaryResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["src"].Value = "./assets/" + Path.GetFileName(node.Attributes["src"]!=null?node.Attributes["src"].Value.ToString():"");
                    }

                }
            }
        }

    }

    /// <summary>
    /// Process Html5 Images Tags To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessHtml5ImagesTagsToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection imagesNodes = htmlDoc.DocumentNode.SelectNodes("//source");
        if (imagesNodes != null)
        {
            foreach (HtmlNode node in imagesNodes)
            {
                if (node.Attributes["srcset"] != null)
                {
                    string url = node.Attributes["srcset"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["srcset"].Value.IndexOf("?"));
                    }

                    if (url.StartsWith("//"))
                    {
                        url = string.Format("{0}:{1}", page.DownloadUri.Scheme, url);
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadBinaryResource(uri);
                            node.Attributes["srcset"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadBinaryResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["srcset"].Value = "./assets/" + Path.GetFileName(node.Attributes["srcset"]!=null?node.Attributes["srcset"].Value.ToString():"");
                    }

                }
            }
        }

    }

    /// <summary>
    /// Process Stylesheets To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessStylesheetsToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//link[@rel=\"stylesheet\"]");
        if (linkNodes != null)
        {
            foreach (HtmlNode node in linkNodes)
            {
                if (node.Attributes["href"] != null)
                {
                    string url = node.Attributes["href"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["href"].Value.IndexOf("?"));
                    }

                    if (url.StartsWith("//"))
                    {
                        url = string.Format("{0}:{1}", page.DownloadUri.Scheme, url);
                    }

                    if (!url.StartsWith("http"))
                    {
                        url = new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri.ToString();
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadResource(uri);
                            node.Attributes["href"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["href"].Value = "./assets/" + Path.GetFileName(url);
                    }

                }
            }
        }
    }

    /// <summary>
    /// Process Scripts To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessScriptsToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection scriptNodes = htmlDoc.DocumentNode.SelectNodes("//script");
        if (scriptNodes != null)
        {
            foreach (HtmlNode node in scriptNodes)
            {
                if (node.Attributes["src"] != null)
                {
                    string url = node.Attributes["src"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["src"].Value.IndexOf("?"));
                    }

                    if (!url.StartsWith("http"))
                    {
                        url = new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri.ToString();
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadResource(uri);
                            node.Attributes["src"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["src"].Value = "./assets/" + Path.GetFileName(url);
                    }

                }
            }
        }
    }

    private bool IsAbsoluteUrl(string url)
    {
        return Uri.TryCreate(url, UriKind.Absolute, out Uri result);
    }

    private void DonwloadBinaryResource(Uri uri)
    {
        string resourcePathFileName = Path.Combine(PackageGenerationFolderPath.FullName, JobId, "assets", Path.GetFileName(uri.ToString()));
        if (!File.Exists(resourcePathFileName))
        {
            // Download file
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
            request.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36";

            //proxy
            if (proxy != null)
            {
                request.Proxy = proxy;
            }

            using (WebResponse response = request.GetResponse())
            {
                using (BinaryReader reader = new BinaryReader(response.GetResponseStream()))
                {
                    // Read file 
                    byte[] bytes = reader.ReadAllBytes();

                    // Write to local folder 
                    using (FileStream fs = new FileStream(resourcePathFileName, FileMode.Create))
                    {
                        fs.Write(bytes, 0, bytes.Length);
                    }
                }
            }
        }
    }

    /// <summary>
    /// Donwload Resource
    /// </summary>
    /// <param name="url"></param>
    private void DonwloadResource(Uri url)
    {
        string resourcePathFileName = Path.Combine(PackageGenerationFolderPath.FullName, JobId, "assets", Path.GetFileName(url.ToString()));
        if (!File.Exists(resourcePathFileName))
        {
            using (WebClient webClient = new WebClient())
            {
                webClient.Headers.Add("UserAgent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36");
                if (proxy != null)
                {
                    webClient.Proxy = proxy;
                }

                webClient.DownloadFileAsync(url, resourcePathFileName);
            }
        }
    }

    public void Dispose()
    {
        //TODO
    }
}
[TestMethod]
public void下载完整网页和资源()
{
List pageInfos=新列表();
Add(new PageInfo(){PageTcmId=“12-12335”,DownloadUri=newURI(“https://www.test.com/en/index.html") });
bool-found=false;
var客户端=新的ChromeDriverClientRequest(“作业-12-TEST1”,第页信息);
client.PackageGenerationFolderPath=新目录信息(@“C:\temp”);
client.DownloadWebsitePageResources();
发现=真;
Assert.IsTrue(发现“项目未解决”);
}
公共密封类ChromeDriverClientRequest:IDisposable
{
公共IWebProxy代理{set;get;}
公共字符串JobId{set;get;}
公共列表页面信息{set;get;}
私有目录信息包生成文件夹路径;
/// 
///获取或设置目标文件夹路径。
/// 
///目标文件夹路径。
公共目录信息包GenerationFolderPath
{
得到
{
if(_packageGenerationFolderPath!=null&!_packageGenerationFolderPath.Exists)
{
_packageGenerationFolderPath.Create();
}
返回_packageGenerationFolderPath;
}
set=>\u packageGenerationFolderPath=值;
}
公共ChromeDriverClientRequest()
{
}
公共ChromeDriverClientRequest(字符串jobId,列表页面信息)
{
JobId=JobId;
PageInfos=PageInfos;
}
/// 
///下载网页
/// 
/// 
public void DownloadWebsitePageResources()
{
ChromeOptions选项=新的ChromeOptions();
options.AddArgument(“--windowsize=19201080”);
options.AddArgument(“--disable gpu”);
options.AddArgument(“--disable extensions”);
options.AddArgument(“--proxy server='direct://'”);
options.AddArgument(“--proxy pass list=*”);
options.AddArgument(“--start maximized”);
选项。添加参数(“--headless”);
options.AddArgument(“无沙盒”);
options.AcceptSecureCertificates=true;
options.PageLoadStrategy=PageLoadStrategy.Normal;
ChromeDriver驱动程序=空;
尝试
{
//创建资产文件夹
字符串path=path.Combine(PackageGenerationFolderPath.FullName,作业ID,“资产”);
如果(!Directory.Exists(path))
{
CreateDirectory(路径);
}
驱动程序=新的色度驱动程序(可选);
foreach(页面信息中的页面信息页面)
{
driver.Navigate().gotour(page.DownloadUri);
object html=driver.ExecuteScript(“return document.body.parentElement.outerHTML”);
HtmlDocument htmlDoc=新HtmlDocument();
htmlDoc.LoadHtml(html.ToString());
ProcessImagesToDownload(参考htmlDoc,第页);
ProcessHtml5ImagesTagsToDownload(参考htmlDoc,第页);
ProcessStylesheetsToDownload(参考htmlDoc,第页);
ProcessScriptsToDownload(参考htmlDoc,第页);
SaveProcessedHtmlFile(参考htmlDoc,第页);
}
createZipFile();
}
捕获(例外情况除外)
{
抛出新异常(例如消息);
}
最后
{
if(驱动程序!=null)
driver.Quit();
}
}
/// 
/// 
/// 
/// 
/// 
私有void createZipFile()
{
字符串DirectoryToBeArchive=Path.Combine(PackageGenerationFolderPath.FullName,JobId);
字符串DirectoryToBeArchiveZipFileName=Path.Combine(PackageGenerationFolderPath.FullName,JobId+“.zip”);
if(File.Exists(DirectoryToBeArchiveZipFileName))
{
Delete(DirectoryToBeArchiveZipFileName);
CreateFromDirectory(DirectoryToBeArchive,DirectoryToBeArchiveZipFileName,CompressionLevel.faster,false);
}
其他的
{
CreateFromDirectory(DirectoryToBeArchive,DirectoryToBeArchiveZipFileName,CompressionLevel.faster,false);
}
Delete(DirectoryToBeArchive,true);
}
/// 
///保存已处理的Html文件
/// 
/// 
/// 
私有void SaveProcessedHtmlFile(参考HtmlDocument htmlDoc,页面信息页面)
{
字符串htmlSourceFiname=Path.GetFileName(page.DownloadUri.ToString());
如果(!string.IsNullOrEmpty(Path.GetExtension(page.DownloadUri.ToString()))
{
htmlSourceFiname=htmlSourceFiname.Replace(Path.GetExtension(page.DownloadUri.ToString()),“.html”);
}
如果(!string.IsNullOrEmpty(htmlSourceFiname))
{
htmlSourceFiname=htmlSourceFiname+“.html”;
}
其他的
{
htmlSourceFiname=“index.html”;
}
使用(FileStream sw=new FileStream(Path.Combine(PackageGenerationFolderPath.FullName,JobId,page.PageTcmId+“”+htmlSourceFiname)、FileMode.Create))
{
 [TestMethod]
 public void DownloadSingpleWebPageAndResources()
 {
            List<PageInfo> pageInfos = new List<PageInfo>();
            pageInfos.Add(new PageInfo() { PageTcmId = "12-12335", DownloadUri = new Uri("https://www.test.com/en/index.html") });

            bool found = false;
            var client = new ChromeDriverClientRequest("JOB-12-TEST1", pageInfos);
            client.PackageGenerationFolderPath = new DirectoryInfo(@"C:\temp");
            client.DownloadWebsitePageResources();
            found = true;

            Assert.IsTrue(found, "The item was not resolved");
 }

public sealed class ChromeDriverClientRequest : IDisposable
{
    public IWebProxy proxy { set; get; }
    public string JobId { set; get; }
    public List<PageInfo> PageInfos { set; get; }

    private DirectoryInfo _packageGenerationFolderPath;

    /// <summary>
    /// Gets or sets the destination folder path.
    /// </summary>
    /// <value>The destination folder path.</value>
    public DirectoryInfo PackageGenerationFolderPath
    {
        get
        {
            if (_packageGenerationFolderPath != null && !_packageGenerationFolderPath.Exists)
            {
                _packageGenerationFolderPath.Create();
            }

            return _packageGenerationFolderPath;
        }
        set => _packageGenerationFolderPath = value;
    }

    public ChromeDriverClientRequest()
    {

    }

    public ChromeDriverClientRequest(string jobId, List<PageInfo> pageInfos)
    {
        JobId = jobId;
        PageInfos = pageInfos;
    }

    /// <summary>
    /// DownloadWebsitePage
    /// </summary>
    /// <returns></returns>
    public void DownloadWebsitePageResources()
    {
        ChromeOptions options = new ChromeOptions();
        options.AddArgument("--window-size=1920,1080");
        options.AddArgument("--disable-gpu");
        options.AddArgument("--disable-extensions");
        options.AddArgument("--proxy-server='direct://'");
        options.AddArgument("--proxy-bypass-list=*");
        options.AddArgument("--start-maximized");
        options.AddArgument("--headless");
        options.AddArgument("no-sandbox");
        options.AcceptInsecureCertificates = true;
        options.PageLoadStrategy = PageLoadStrategy.Normal;

        ChromeDriver driver = null;

        try
        {
            //Create a assets folder
            string path = Path.Combine(PackageGenerationFolderPath.FullName, JobId, "assets");
            if (!Directory.Exists(path))
            {
                Directory.CreateDirectory(path);
            }

            driver = new ChromeDriver(options);

            foreach (PageInfo page in PageInfos)
            {
                driver.Navigate().GoToUrl(page.DownloadUri);
                object html = driver.ExecuteScript("return document.body.parentElement.outerHTML");

                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(html.ToString());

                ProcessImagesToDownload(ref htmlDoc, page);
                ProcessHtml5ImagesTagsToDownload(ref htmlDoc, page);
                ProcessStylesheetsToDownload(ref htmlDoc, page);
                ProcessScriptsToDownload(ref htmlDoc, page);
                SaveProcessedHtmlFile(ref htmlDoc, page);
            }

            createZipFile();
        }
        catch (Exception ex)
        {
            throw new Exception(ex.Message);
        }
        finally
        {
            if(driver!=null)
                driver.Quit();
        }
    }

    /// <summary>
    /// 
    /// </summary>
    /// <param name="zipPath"></param>
    /// <param name="archiveFileName"></param>
    private void createZipFile()
    {
        string DirectoryToBeArchive = Path.Combine(PackageGenerationFolderPath.FullName, JobId);
        string DirectoryToBeArchiveZipFileName = Path.Combine(PackageGenerationFolderPath.FullName, JobId + ".zip");

        if (File.Exists(DirectoryToBeArchiveZipFileName))
        {
            File.Delete(DirectoryToBeArchiveZipFileName);
            ZipFile.CreateFromDirectory(DirectoryToBeArchive, DirectoryToBeArchiveZipFileName, CompressionLevel.Fastest, false);
        }
        else
        {
            ZipFile.CreateFromDirectory(DirectoryToBeArchive, DirectoryToBeArchiveZipFileName, CompressionLevel.Fastest, false);
        }

        Directory.Delete(DirectoryToBeArchive, true);
    }


    /// <summary>
    /// Save Processed Html File
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>

    private void SaveProcessedHtmlFile(ref HtmlDocument htmlDoc, PageInfo page)
    {
        string htmlSourceFiname = Path.GetFileName(page.DownloadUri.ToString());

        if (!string.IsNullOrEmpty(Path.GetExtension(page.DownloadUri.ToString())))
        {
            htmlSourceFiname = htmlSourceFiname.Replace(Path.GetExtension(page.DownloadUri.ToString()), ".html");
        }
        else if(!string.IsNullOrEmpty(htmlSourceFiname))
        {
            htmlSourceFiname = htmlSourceFiname + ".html";
        }
        else
        {
            htmlSourceFiname = "index.html";
        }

        using (FileStream sw = new FileStream(Path.Combine(PackageGenerationFolderPath.FullName, JobId, page.PageTcmId + "_" + htmlSourceFiname), FileMode.Create))
        {
            htmlDoc.Save(sw);
        }
    }

    /// <summary>
    /// Process Images To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessImagesToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection imagesNodes = htmlDoc.DocumentNode.SelectNodes("//img");
        if (imagesNodes != null)
        {
            foreach (HtmlNode node in imagesNodes)
            {
                if (node.Attributes["src"] != null)
                {
                    string url = node.Attributes["src"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["src"].Value.IndexOf("?"));
                    }

                    if (url.StartsWith("//"))
                    {
                        url = string.Format("{0}:{1}", page.DownloadUri.Scheme, url);
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadBinaryResource(uri);
                            node.Attributes["src"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadBinaryResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["src"].Value = "./assets/" + Path.GetFileName(node.Attributes["src"]!=null?node.Attributes["src"].Value.ToString():"");
                    }

                }
            }
        }

    }

    /// <summary>
    /// Process Html5 Images Tags To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessHtml5ImagesTagsToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection imagesNodes = htmlDoc.DocumentNode.SelectNodes("//source");
        if (imagesNodes != null)
        {
            foreach (HtmlNode node in imagesNodes)
            {
                if (node.Attributes["srcset"] != null)
                {
                    string url = node.Attributes["srcset"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["srcset"].Value.IndexOf("?"));
                    }

                    if (url.StartsWith("//"))
                    {
                        url = string.Format("{0}:{1}", page.DownloadUri.Scheme, url);
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadBinaryResource(uri);
                            node.Attributes["srcset"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadBinaryResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["srcset"].Value = "./assets/" + Path.GetFileName(node.Attributes["srcset"]!=null?node.Attributes["srcset"].Value.ToString():"");
                    }

                }
            }
        }

    }

    /// <summary>
    /// Process Stylesheets To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessStylesheetsToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection linkNodes = htmlDoc.DocumentNode.SelectNodes("//link[@rel=\"stylesheet\"]");
        if (linkNodes != null)
        {
            foreach (HtmlNode node in linkNodes)
            {
                if (node.Attributes["href"] != null)
                {
                    string url = node.Attributes["href"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["href"].Value.IndexOf("?"));
                    }

                    if (url.StartsWith("//"))
                    {
                        url = string.Format("{0}:{1}", page.DownloadUri.Scheme, url);
                    }

                    if (!url.StartsWith("http"))
                    {
                        url = new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri.ToString();
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadResource(uri);
                            node.Attributes["href"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["href"].Value = "./assets/" + Path.GetFileName(url);
                    }

                }
            }
        }
    }

    /// <summary>
    /// Process Scripts To Download
    /// </summary>
    /// <param name="htmlDoc"></param>
    /// <param name="page"></param>
    private void ProcessScriptsToDownload(ref HtmlDocument htmlDoc, PageInfo page)
    {
        HtmlNodeCollection scriptNodes = htmlDoc.DocumentNode.SelectNodes("//script");
        if (scriptNodes != null)
        {
            foreach (HtmlNode node in scriptNodes)
            {
                if (node.Attributes["src"] != null)
                {
                    string url = node.Attributes["src"].Value;

                    if (url.Contains("?"))
                    {
                        url = url.Substring(0, node.Attributes["src"].Value.IndexOf("?"));
                    }

                    if (!url.StartsWith("http"))
                    {
                        url = new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri.ToString();
                    }

                    if (IsAbsoluteUrl(url))
                    {
                        Uri uri = new Uri(url);

                        if (string.Compare(page.DownloadUri.Host, uri.Host, true) == 0)
                        {
                            DonwloadResource(uri);
                            node.Attributes["src"].Value = "./assets/" + Path.GetFileName(url);
                        }
                    }
                    else
                    {
                        DonwloadResource(new UriBuilder(page.DownloadUri.Scheme, page.DownloadUri.Host, page.DownloadUri.Port, url).Uri);
                        node.Attributes["src"].Value = "./assets/" + Path.GetFileName(url);
                    }

                }
            }
        }
    }

    private bool IsAbsoluteUrl(string url)
    {
        return Uri.TryCreate(url, UriKind.Absolute, out Uri result);
    }

    private void DonwloadBinaryResource(Uri uri)
    {
        string resourcePathFileName = Path.Combine(PackageGenerationFolderPath.FullName, JobId, "assets", Path.GetFileName(uri.ToString()));
        if (!File.Exists(resourcePathFileName))
        {
            // Download file
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
            request.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36";

            //proxy
            if (proxy != null)
            {
                request.Proxy = proxy;
            }

            using (WebResponse response = request.GetResponse())
            {
                using (BinaryReader reader = new BinaryReader(response.GetResponseStream()))
                {
                    // Read file 
                    byte[] bytes = reader.ReadAllBytes();

                    // Write to local folder 
                    using (FileStream fs = new FileStream(resourcePathFileName, FileMode.Create))
                    {
                        fs.Write(bytes, 0, bytes.Length);
                    }
                }
            }
        }
    }

    /// <summary>
    /// Donwload Resource
    /// </summary>
    /// <param name="url"></param>
    private void DonwloadResource(Uri url)
    {
        string resourcePathFileName = Path.Combine(PackageGenerationFolderPath.FullName, JobId, "assets", Path.GetFileName(url.ToString()));
        if (!File.Exists(resourcePathFileName))
        {
            using (WebClient webClient = new WebClient())
            {
                webClient.Headers.Add("UserAgent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36");
                if (proxy != null)
                {
                    webClient.Proxy = proxy;
                }

                webClient.DownloadFileAsync(url, resourcePathFileName);
            }
        }
    }

    public void Dispose()
    {
        //TODO
    }
}