Convert PDF to separate HTML pages in C# and .NET


Complete code

using System;
using System.IO;

namespace Sample
{
    class Sample
    {
        static void Main(string[] args)
        {
            // Convert PDF to separate HTMLs.
            // Each PDF page will be converted to a single HTML document.
            string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");            
            DirectoryInfo htmlDir = new DirectoryInfo("htmls");
            if (!htmlDir.Exists)
                htmlDir.Create();

                                  // Get your free 30-day key here:   
			 // https://sautinsoft.com/start-for-free/            

            SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();

            f.HtmlOptions.IncludeImageInHtml = false;
            
            // Path (must exist) to a directory to store images after converting.             
            f.HtmlOptions.ImageFolder = htmlDir.FullName;

            f.OpenPdf(pdfFile);

            if (f.PageCount > 0)
            {
                // Convert each PDF page to separate HTML document.
                // simple text.html, simple text.html ... simple text.html.
                for (int page = 1; page <= f.PageCount; page++)
                {
                    f.HtmlOptions.Title = $"Page {page}";
                    f.HtmlOptions.ImageSubFolder = String.Format("page{0}_images", page);
                    string htmlString = f.ToHtml(page, page);

                    // Save htmlString to file
                    string htmlFile = Path.Combine(htmlDir.FullName, $"Page{page}.html");
                    File.WriteAllText(htmlFile, htmlString);

                    // Let's open only 1st and last pages.
                    if (page == 1 || page == f.PageCount)
                    {
                        // Open the result for demonstration purposes.
                        System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(htmlFile) { UseShellExecute = true });
                    }
                }
            }
        }
    }
}

Download

Imports System
Imports System.IO

Namespace Sample
    Friend Class Sample
        Shared Sub Main(ByVal args() As String)
            ' Convert PDF to separate HTMLs.
            ' Each PDF page will be converted to a single HTML document.
            Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
            Dim htmlDir As New DirectoryInfo("htmls")
            If Not htmlDir.Exists Then
                htmlDir.Create()
            End If
	                                ' Get your free 30-day key here: 
		    ' SautinSoft.PdfFocus.SetLicense("1234567890")
            Dim f As New SautinSoft.PdfFocus()

            f.HtmlOptions.IncludeImageInHtml = False

            ' Path (must exist) to a directory to store images after converting.             
            f.HtmlOptions.ImageFolder = htmlDir.FullName

            f.OpenPdf(pdfFile)

            If f.PageCount > 0 Then
                ' Convert each PDF page to separate HTML document.
                ' simple text.html, simple text.html ... simple text.html.
                For page As Integer = 1 To f.PageCount
                    f.HtmlOptions.Title = $"Page {page}"
                    f.HtmlOptions.ImageSubFolder = String.Format("page{0}_images", page)
                    Dim htmlString As String = f.ToHtml(page, page)

                    ' Save htmlString to file
                    Dim htmlFile As String = Path.Combine(htmlDir.FullName, $"Page{page}.html")
                    File.WriteAllText(htmlFile, htmlString)

                    ' Let's open only 1st and last pages.
                    If page = 1 OrElse page = f.PageCount Then
                        ' Open the result for demonstration purposes.
                        System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(htmlFile) With {.UseShellExecute = True})
                    End If
                Next page
            End If
        End Sub
    End Class
End Namespace

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.