Document .Net can help your application to convert a document from a one format to another.
You'll
need only to Load() a document
and Save() to a desired format.
DocumentCore dc = DocumentCore.Load("...");
dc.Save("....");
Document .Net supports these formats:
DOCX | RTF | HTML | Text | Image | |
---|---|---|---|---|---|
Create/Read/Write | Create/Read/Write | Create/Read/Write | Create/Read/Write | Create/Read/Write | Create/Read(OCR)/Write |
Complete code
using System.IO;
using SautinSoft.Document;
namespace Example
{
class Program
{
static void Main(string[] args)
{
// Get your free 100-day key here:
// https://sautinsoft.com/start-for-free/
ConvertFromFile();
ConvertFromStream();
}
/// <summary>
/// Convert PDF to HTML (file to file).
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-pdf-to-html-in-csharp-vb.php
/// </remarks>
static void ConvertFromFile()
{
string inpFile = @"..\..\..\example.pdf";
string outFile = @"Result.html";
// Specifying PdfLoadOptions we explicitly set that a loadable document is PDF.
PdfLoadOptions pdfLO = new PdfLoadOptions()
{
// 'false' - means to load vector graphics as is. Don't transform it to raster images.
RasterizeVectorGraphics = false,
// The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
// In case of 'true' the component will detect and recreate tables from graphic lines.
DetectTables = false,
// 'Disabled' - Never load embedded fonts in PDF. Use the fonts with the same name installed at the system or similar by font metrics.
// 'Enabled' - Always load embedded fonts in PDF.
// 'Auto' - Load only embedded fonts missing in the system. In other case, use the system fonts.
PreserveEmbeddedFonts = PropertyState.Auto
};
DocumentCore dc = DocumentCore.Load(inpFile, pdfLO);
dc.Save(outFile, new HtmlFixedSaveOptions());
// Important for Linux: Install MS Fonts
// sudo apt install ttf-mscorefonts-installer -y
// Open the result for demonstration purposes.
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
}
/// <summary>
/// Convert PDF to HTML (using Stream).
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-pdf-to-html-in-csharp-vb.php
/// </remarks>
static void ConvertFromStream()
{
// We need files only for demonstration purposes.
// The conversion process will be done completely in memory.
string inpFile = @"..\..\..\example.pdf";
string outFile = @"ResultStream.html";
byte[] inpData = File.ReadAllBytes(inpFile);
byte[] outData = null;
using (MemoryStream msInp = new MemoryStream(inpData))
{
// Specifying PdfLoadOptions we explicitly set that a loadable document is PDF.
PdfLoadOptions pdfLO = new PdfLoadOptions()
{
// 'false' - means to load vector graphics as is. Don't transform it to raster images.
RasterizeVectorGraphics = false,
// The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
// In case of 'true' the component will detect and recreate tables from graphic lines.
DetectTables = false,
// 'Disabled' - Never load embedded fonts in PDF. Use the fonts with the same name installed at the system or similar by font metrics.
// 'Enabled' - Always load embedded fonts in PDF.
// 'Auto' - Load only embedded fonts missing in the system. In other case, use the system fonts.
PreserveEmbeddedFonts = PropertyState.Auto
};
// Load a document.
DocumentCore dc = DocumentCore.Load(msInp, pdfLO);
// Save the document to HTML format.
using (MemoryStream outMs = new MemoryStream())
{
dc.Save(outMs, new HtmlFixedSaveOptions() );
outData = outMs.ToArray();
// Important for Linux: Install MS Fonts
// sudo apt install ttf-mscorefonts-installer -y
}
// Show the result for demonstration purposes.
if (outData != null)
{
File.WriteAllBytes(outFile, outData);
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true });
}
}
}
}
}
Imports System.IO
Imports SautinSoft.Document
Namespace Example
Friend Class Program
Shared Sub Main(ByVal args() As String)
ConvertFromFile()
ConvertFromStream()
End Sub
''' Get your free 100-day key here:
''' https://sautinsoft.com/start-for-free/
''' <summary>
''' Convert PDF to HTML (file to file).
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-pdf-to-html-in-csharp-vb.php
''' </remarks>
Private Shared Sub ConvertFromFile()
Dim inpFile As String = "..\..\..\example.pdf"
Dim outFile As String = "Result.html"
' Specifying PdfLoadOptions we explicitly set that a loadable document is PDF.
Dim pdfLO As New PdfLoadOptions()
With pdfLO
.RasterizeVectorGraphics = False
.DetectTables = False
' 'Disabled' - Never load embedded fonts in PDF. Use the fonts with the same name installed at the system or similar by font metrics.
' 'Enabled' - Always load embedded fonts in PDF.
' 'Auto' - Load only embedded fonts missing in the system. In other case, use the system fonts.
.PreserveEmbeddedFonts = PropertyState.Auto
End With
' RasterizeVectorGraphics = False
' This means to load vector graphics as is. Don't transform it to raster images.
' DetectTables = False
' This means don't detect tables.
' The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
' Set it to 'True' and the component will detect and recreate tables from graphic lines.
Dim dc As DocumentCore = DocumentCore.Load(inpFile, pdfLO)
dc.Save(outFile, New HtmlFixedSaveOptions())
' Important for Linux: Install MS Fonts
' sudo apt install ttf-mscorefonts-installer -y
' Open the result for demonstration purposes.
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
End Sub
''' <summary>
''' Convert PDF to HTML (using Stream).
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/convert-pdf-to-html-in-csharp-vb.php
''' </remarks>
Private Shared Sub ConvertFromStream()
' We need files only for demonstration purposes.
' The conversion process will be done completely in memory.
Dim inpFile As String = "..\..\..\example.pdf"
Dim outFile As String = "ResultStream.html"
Dim inpData() As Byte = File.ReadAllBytes(inpFile)
Dim outData() As Byte = Nothing
Using msInp As New MemoryStream(inpData)
' Specifying PdfLoadOptions we explicitly set that a loadable document is PDF.
Dim pdfLO As New PdfLoadOptions()
With pdfLO
.RasterizeVectorGraphics = False
.DetectTables = False
' 'Disabled' - Never load embedded fonts in PDF. Use the fonts with the same name installed at the system or similar by font metrics.
' 'Enabled' - Always load embedded fonts in PDF.
' 'Auto' - Load only embedded fonts missing in the system. In other case, use the system fonts.
.PreserveEmbeddedFonts = PropertyState.Auto
End With
' RasterizeVectorGraphics = False
' This means to load vector graphics as is. Don't transform it to raster images.
' DetectTables = False
' This means don't detect tables.
' The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
' Set it to 'True' and the component will detect and recreate tables from graphic lines.
' Load a document.
Dim dc As DocumentCore = DocumentCore.Load(msInp, pdfLO)
' Save the document to HTML format.
Using outMs As New MemoryStream()
dc.Save(outMs, New HtmlFixedSaveOptions())
outData = outMs.ToArray()
End Using
' Show the result for demonstration purposes.
If outData IsNot Nothing Then
File.WriteAllBytes(outFile, outData)
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(outFile) With {.UseShellExecute = True})
End If
End Using
End Sub
End Class
End Namespace
If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below: