Convert HTML file to Text file in C# and .NET


Complete code

using System;
using System.IO;
using System.Collections;

namespace Sample
{
    class Test
    {
        static void Main(string[] args)
        {
            // Convert HTML file to Text file.
            // If you need more information about UseOffice .Net email us at:
            // support@sautinsoft.com.

            SautinSoft.UseOffice u = new SautinSoft.UseOffice();

            string inpFile = Path.GetFullPath(@"..\..\example.html");
            string outFile = Path.GetFullPath("Result.txt");

            // Prepare UseOffice .Net, loads MS Word in memory
            int ret = u.InitWord();

            // Return values:
            // 0 - Loading successfully
            // 1 - Can't load MS Word library in memory 

            if (ret == 1)
            {
                Console.WriteLine("Error! Can't load MS Word library in memory");
                return;
            }

            // Perform the conversion.
            ret = u.ConvertFile(inpFile, outFile, SautinSoft.UseOffice.eDirection.HTML_to_TEXT);

            // Release MS Word from memory
            u.CloseWord();

            // 0 - Converting successfully
            // 1 - Can't open input file. Check that you are using full local path to input file, URL and relative path are not supported
            // 2 - Can't create output file. Please check that you have permissions to write by this path or probably this path already used by another application
            // 3 - Converting failed, please contact with our Support Team
            // 4 - MS Office isn't installed. The component requires that any of these versions of MS Office should be installed: 2000, XP, 2003, 2007, 2010, 2013, 2016 or 2019.
            if (ret == 0)
            {
                // Open the result.
                System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
            }
            else
                Console.WriteLine("Error! Please contact with SautinSoft support: support@sautinsoft.com.");
        }
    }
}

Download

Imports System
Imports System.IO
Imports System.Collections

Namespace Sample
    Friend Class Test
        Shared Sub Main(ByVal args() As String)
            ' Convert HTML file to Text file.
            ' If you need more information about UseOffice .Net email us at:
            ' support@sautinsoft.com.

            Dim u As New SautinSoft.UseOffice()

            Dim inpFile As String = Path.GetFullPath("..\example.html")
            Dim outFile As String = Path.GetFullPath("Result.txt")

            ' Prepare UseOffice .Net, loads MS Word in memory
            Dim ret As Integer = u.InitWord()

            ' Return values:
            ' 0 - Loading successfully
            ' 1 - Can't load MS Word library in memory 

            If ret = 1 Then
                Console.WriteLine("Error! Can't load MS Word library in memory")
                Return
            End If

            ' Perform the conversion.
            ret = u.ConvertFile(inpFile, outFile, SautinSoft.UseOffice.eDirection.HTML_to_TEXT)

            ' Release MS Word from memory
            u.CloseWord()

            ' 0 - Converting successfully
            ' 1 - Can't open input file. Check that you are using full local path to input file, URL and relative path are not supported
            ' 2 - Can't create output file. Please check that you have permissions to write by this path or probably this path already used by another application
            ' 3 - Converting failed, please contact with our Support Team
            ' 4 - MS Office isn't installed. The component requires that any of these versions of MS Office should be installed: 2000, XP, 2003, 2007, 2010, 2013, 2016 or 2019.
            If ret = 0 Then
                ' Open the result.
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            Else
                Console.WriteLine("Error! Please contact with SautinSoft support: support@sautinsoft.com.")
            End If
        End Sub
    End Class
End Namespace

Download

If you are looking for a standalone .Net Framework or .Net Core solution to convert HTML to Text without MS Office, see our Document .Net.


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.