How to load a HTML document in C# and .NET


  1. Load from a file:
    
    DocumentCore dc = DocumentCore.Load(@"d:\Book.html");
    
    The dc object represents a document loaded into memory. The file format is detected automatically from the file extension: ".Html" .

    After loading you'll get the document presented as the Tree Of Objects, where the root node is DocumentCore class.

    To guarantee that a loadable content is really HTML and set some loading options, use HtmlLoadOptions as 2nd parameter.

    
    DocumentCore dc = DocumentCore.Load(@"d:\Book.html", new HtmlLoadOptions());
    
  2. Load from a Stream:
    
                // Let us say we already have a HTML document as array of bytes.
                DocumentCore dc = null;
                using (MemoryStream htmlStream = new MemoryStream(htmlBytes))
                {
                    dc = DocumentCore.Load(htmlStream, new HtmlLoadOptions());
                }
                // Here we can do with our document 'dc' anything we need.
    
 

Complete code

using System;
using System.IO;
using SautinSoft.Document;

namespace Example
{
    class Program
    {
        
        static void Main(string[] args)
        {
            // Get your free 30-day key here:   
            // https://sautinsoft.com/start-for-free/

            LoadHtmlFromFile();
            //LoadHtmlFromStream();
        }

        /// <summary>
        /// Loads an HTML document into DocumentCore (dc) from a file.
        /// </summary>
        /// <remarks>
        /// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-html-document-net-csharp-vb.php
        /// </remarks>
        static void LoadHtmlFromFile()
        {
            string filePath = @"..\..\..\example.html";
            // The file format is detected automatically from the file extension: ".html".
            // But as shown in the example below, we can specify HtmlLoadOptions as 2nd parameter
            // to explicitly set that a loadable document has HTML format.
            DocumentCore dc = DocumentCore.Load(filePath);
            if (dc != null)
                Console.WriteLine("Loaded successfully!");

			Console.ReadKey();			
        }

        /// <summary>
        /// Loads an HTML document into DocumentCore (dc) from a MemoryStream.
        /// </summary>
        /// <remarks>
        /// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-html-document-net-csharp-vb.php
        /// </remarks>
        static void LoadHtmlFromStream()
        {
            // Get document bytes.
            byte[] fileBytes = File.ReadAllBytes(@"..\..\..\example.html");

            DocumentCore dc = null;

            // Create a MemoryStream
            using (MemoryStream ms = new MemoryStream(fileBytes))
            {
                // Load a document from the MemoryStream.
                // Specifying HtmlLoadOptions we explicitly set that a loadable document is HTML.
                dc = DocumentCore.Load(ms, new HtmlLoadOptions());
            }
            if (dc != null)
                Console.WriteLine("Loaded successfully!");
			
			Console.ReadKey();			
        }
    }
}

Download

Imports System
Imports System.IO
Imports SautinSoft.Document

Module Sample
    Sub Main()
        LoadHtmlFromFile()
        'LoadHtmlFromStream();
    End Sub
    ''' Get your free 30-day key here:   
    ''' https://sautinsoft.com/start-for-free/
    ''' <summary>
    ''' Loads an HTML document into DocumentCore (dc) from a file.
    ''' </summary>
    ''' <remarks>
    ''' Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-html-document-net-csharp-vb.php
    ''' </remarks>
    Sub LoadHtmlFromFile()
        Dim filePath As String = "..\..\..\example.html"
        ' The file format is detected automatically from the file extension: ".html".
        ' But as shown in the example below, we can specify HtmlLoadOptions as 2nd parameter
        ' to explicitly set that a loadable document has HTML format.
        Dim dc As DocumentCore = DocumentCore.Load(filePath)
        If dc IsNot Nothing Then
            Console.WriteLine("Loaded successfully!")
        End If
		
		Console.ReadKey()
    End Sub

    ''' <summary>
    ''' Loads an HTML document into DocumentCore (dc) from a MemoryStream.
    ''' </summary>
    ''' <remarks>
    ''' Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-html-document-net-csharp-vb.php
    ''' </remarks>
    Sub LoadHtmlFromStream()
        ' Get document bytes.
        Dim fileBytes() As Byte = File.ReadAllBytes("..\..\..\example.html")

        Dim dc As DocumentCore = Nothing

        ' Create a MemoryStream
        Using ms As New MemoryStream(fileBytes)
            ' Load a document from the MemoryStream.
            ' Specifying HtmlLoadOptions we explicitly set that a loadable document is HTML.
            dc = DocumentCore.Load(ms, New HtmlLoadOptions())
        End Using
        If dc IsNot Nothing Then
            Console.WriteLine("Loaded successfully!")
        End If
		
		Console.ReadKey()		
    End Sub
End Module

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.