Convert a document to another format within C# and .NET


Document .Net can help your application to convert a document from a one format to another.
You'll need only to Load() a document and Save() to a desired format:


            DocumentCore dc = DocumentCore.Load("...");
            dc.Save("....");

Document .Net supports these formats:

PDF DOCX RTF HTML Text Image
Create/Read/Write Create/Read/Write Create/Read/Write Create/Read/Write Create/Read/Write Create/Read(OCR)/Write

Conversion examples:

  1. Convert DOCX to PDF:

                DocumentCore dc = DocumentCore.Load(@"d:\Before.docx");
                dc.Save(@"d:\After.pdf");
    
  2. Convert PDF to DOCX:

                DocumentCore dc = DocumentCore.Load(@"d:\Before.pdf");
                dc.Save(@"d:\After.docx");
    
  3. Convert RTF to HTML:

                DocumentCore dc = DocumentCore.Load(@"d:\Before.rtf");
                dc.Save(@"d:\After.html");
    
  4. Convert DOCX to RTF (in memory):

                byte[] rtfBytes = ....; // Say, get RTF bytes from DB.
                byte[] docxBytes = null;
                using (MemoryStream msRtf = new MemoryStream(rtfBytes))
                {
                    DocumentCore dc = DocumentCore.Load(msRtf, new RtfLoadOptions());
                    using (MemoryStream msDocx = new MemoryStream())
                    {
                        dc.Save(msDocx, new DocxSaveOptions());
                        docxBytes = msDocx.ToArray();
                    }
                }
    
  5. Convert PDF to HTML (with options):

                DocumentCore dc = DocumentCore.Load(@"d:\Before.pdf",
                    new PdfLoadOptions()
                    {DetectTables=true,
                    ConversionMode = PdfConversionMode.Continuous,
                    PageIndex=0,
                    PageCount=1});
    
                dc.Save(@"d:\After.html", new HtmlFixedSaveOptions()
                {Version = HtmlVersion.Html5,
                CssExportMode = CssExportMode.Inline,
                EmbedImages = true});
    

Furthermore, during the conversion cycle you may replace any element in a document or add something new: digital signature, a watermark, page numbering etc.

Complete code

using System.IO;
using SautinSoft.Document;

namespace Example
{
    class Program
    {
        static void Main(string[] args)
        {
            // Get your free 30-day key here:   
            // https://sautinsoft.com/start-for-free/

            ConvertFromFile();
            ConvertFromStream();
        }

        /// <summary>
        /// Convert PDF to DOCX (file to file).
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-document.php
        /// </remarks>
        static void ConvertFromFile()
        {
            string inpFile = @"..\..\..\example.pdf";
            string outFile = @"Result.docx";

            DocumentCore dc = DocumentCore.Load(inpFile);
            dc.Save(outFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
        }

        /// <summary>
        /// Convert PDF to HTML (using Stream).
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-document.php
        /// </remarks>
        static void ConvertFromStream()
        {

            // We need files only for demonstration purposes.
            // The conversion process will be done completely in memory.
            string inpFile = @"..\..\..\example.pdf";
            string outFile = @"Result.html";
            byte[] inpData = File.ReadAllBytes(inpFile);
            byte[] outData = null;

            using (MemoryStream msInp = new MemoryStream(inpData))
            {

                // Load a document.
                DocumentCore dc = DocumentCore.Load(msInp, new PdfLoadOptions()
                {
                    PreserveGraphics = true,
                    DetectTables = true
                });

                // Save the document to HTML-fixed format.
                using (MemoryStream outMs = new MemoryStream())
                {
                    dc.Save(outMs, new HtmlFixedSaveOptions()
                    {
                        CssExportMode = CssExportMode.Inline,
                        EmbedImages = true
                    });
                    outData = outMs.ToArray();                    
                }
                // Show the result for demonstration purposes.
                if (outData != null)
                {
                    File.WriteAllBytes(outFile, outData);
                    System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
                }
            }
        }
    }
}

Download

Imports System
Imports System.IO
Imports SautinSoft.Document

Module Sample
    Sub Main()
        ConvertFromFile()
        ConvertFromStream()
    End Sub
    ''' Get your free 30-day key here:   
    ''' https://sautinsoft.com/start-for-free/
    ''' <summary>
    ''' Convert PDF to DOCX (file to file).
    ''' </summary>
    ''' <remarks>
    ''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-document.php
    ''' </remarks>
    Sub ConvertFromFile()
        Dim inpFile As String = "..\..\..\example.pdf"
        Dim outFile As String = "Result.docx"

        Dim dc As DocumentCore = DocumentCore.Load(inpFile)
        dc.Save(outFile)

        ' Open the result for demonstration purposes.
        System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
    End Sub

    ''' <summary>
    ''' Convert PDF to HTML (using Stream).
    ''' </summary>
    ''' <remarks>
    ''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-document.php
    ''' </remarks>
    Sub ConvertFromStream()

        ' We need files only for demonstration purposes.
        ' The conversion process will be done completely in memory.
        Dim inpFile As String = "..\..\..\example.pdf"
        Dim outFile As String = "Result.html"
        Dim inpData() As Byte = File.ReadAllBytes(inpFile)
        Dim outData() As Byte = Nothing

        Using msInp As New MemoryStream(inpData)

            ' Load a document.
            Dim dc As DocumentCore = DocumentCore.Load(msInp, New PdfLoadOptions() With {
                .PreserveGraphics = True,
                .DetectTables = True
            })

            ' Save the document to HTML-fixed format.
            Using outMs As New MemoryStream()
                dc.Save(outMs, New HtmlFixedSaveOptions() With {
                    .CssExportMode = CssExportMode.Inline,
                    .EmbedImages = True
                })
                outData = outMs.ToArray()
            End Using
            ' Show the result for demonstration purposes.
            If outData IsNot Nothing Then
                File.WriteAllBytes(outFile, outData)
                System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
            End If
        End Using
    End Sub
End Module

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.