Document .Net supports these formats:
DOCX | RTF | HTML | Text | Image | |
---|---|---|---|---|---|
Create/Read/Write | Create/Read/Write | Create/Read/Write | Create/Read/Write | Create/Read/Write | Create/Read(OCR)/Write |
To load a document from a file, a single line is enough:
//It's easy to load any document.
DocumentCore dc = DocumentCore.Load(@"d:\Book.pdf");
DocumentCore is root class, it represents a document itself.
In this example, the method Load() detects that a loadable document is PDF from the extension ".pdf".
You can also explicitly set the type of loadable document as second parameter. For example, PdfLoadOptions or DocxLoadOptions or another:
DocumentCore dc = DocumentCore.Load(@"d:\Book.pdf", new PdfLoadOptions()
{
// 'false' - means to load vector graphics as is. Don't transform it to raster images.
RasterizeVectorGraphics = false,
// The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
// In case of 'true' the component will detect and recreate tables from graphic lines.
DetectTables = false,
// 'Disabled' - Never load embedded fonts in PDF. Use the fonts with the same name installed at the system or similar by font metrics.
// 'Enabled' - Always load embedded fonts in PDF.
// 'Auto' - Load only embedded fonts missing in the system. In other case, use the system fonts.
PreserveEmbeddedFonts = PropertyState.Auto
});
All load options are derived from the base abstract class LoadOptions.
After loading you get the full Tree Of Objects and can do anything you want: Find, Replace, Remove, Insert, Modify, Save to another format.
Load from a Stream is also straightforward:
// Let us say we already have a DOCX document as array of bytes.
DocumentCore dc = null;
using (MemoryStream docxStream = new MemoryStream(docxBytes))
{
dc = DocumentCore.Load(docxStream, new DocxLoadOptions());
}
// Here we can do with our document 'dc' anything we need.
Complete code
using System.IO;
using SautinSoft.Document;
using System;
namespace Example
{
class Program
{
static void Main(string[] args)
{
// Get your free 100-day key here:
// https://sautinsoft.com/start-for-free/
LoadFromFile();
//LoadFromStream();
//LoadFromBytes()
}
/// <summary>
/// Loads a document into DocumentCore (dc) from a file.
/// </summary>
/// <remarks>
/// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-document.php
/// </remarks>
static void LoadFromFile()
{
string filePath = @"..\..\..\example.docx";
// The file format is detected automatically from the file extension: ".docx".
// But as shown in the example below, we can specify DocxLoadOptions as 2nd parameter
// to explicitly set that a loadable document has Docx format.
DocumentCore dc = DocumentCore.Load(filePath);
if (dc!=null)
Console.WriteLine("Loaded successfully!");
Console.ReadKey();
}
/// <summary>
/// Loads a document into DocumentCore (dc) from a Stream.
/// </summary>
/// <remarks>
/// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-document.php
/// </remarks>
static void LoadFromStream()
{
// We've knowingly created an empty DocumentCore instance before "Using {}"
// to continue work with it after stream will be closed.
DocumentCore dc = null;
using (FileStream fs = new FileStream(@"..\..\..\example.docx", FileMode.Open))
{
// Here we explicitly set that a loadable document is Docx.
dc = DocumentCore.Load(fs, new DocxLoadOptions());
}
if (dc != null)
Console.WriteLine("Loaded successfully!");
Console.ReadKey();
}
/// <summary>
/// Loads a document into DocumentCore (dc) from an array of bytes.
/// </summary>
/// <remarks>
/// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-document.php
/// </remarks>
static void LoadFromBytes()
{
// Get document bytes from a file.
byte[] fileBytes = File.ReadAllBytes(@"..\..\..\example.pdf");
DocumentCore dc = null;
using (MemoryStream ms = new MemoryStream(fileBytes))
{
// With PdfLoadOptions we explicitly set that a loadable document is PDF.
PdfLoadOptions pdfLO = new PdfLoadOptions()
{
// 'false' - means to load vector graphics as is. Don't transform it to raster images.
RasterizeVectorGraphics = false,
// The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
// In case of 'true' the component will detect and recreate tables from graphic lines.
DetectTables = false,
// 'Disabled' - Never load embedded fonts in PDF. Use the fonts with the same name installed at the system or similar by font metrics.
// 'Enabled' - Always load embedded fonts in PDF.
// 'Auto' - Load only embedded fonts missing in the system. In other case, use the system fonts.
PreserveEmbeddedFonts = PropertyState.Auto,
// Load only first 2 pages from the document.
PageIndex = 0,
PageCount = 2
};
dc = DocumentCore.Load(ms, pdfLO);
}
if (dc != null)
Console.WriteLine("Loaded successfully!");
Console.ReadKey();
}
}
}
Imports System
Imports System.IO
Imports SautinSoft.Document
Module Sample
Sub Main()
LoadFromFile()
'LoadFromStream()
'LoadFromBytes()
End Sub
''' Get your free 100-day key here:
''' https://sautinsoft.com/start-for-free/
''' <summary>
''' Loads a document into DocumentCore (dc) from a file.
''' </summary>
''' <remarks>
''' Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-document.php
''' </remarks>
Sub LoadFromFile()
Dim filePath As String = "..\..\..\example.docx"
' The file format is detected automatically from the file extension: ".docx".
' But as shown in the example below, we can specify DocxLoadOptions as 2nd parameter
' to explicitly set that a loadable document has Docx format.
Dim dc As DocumentCore = DocumentCore.Load(filePath)
If dc IsNot Nothing Then
Console.WriteLine("Loaded successfully!")
End If
Console.ReadKey()
End Sub
''' <summary>
''' Loads a document into DocumentCore (dc) from a Stream.
''' </summary>
''' <remarks>
''' Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-document.php
''' </remarks>
Sub LoadFromStream()
' We've knowingly created an empty DocumentCore instance before "Using {}"
' to continue work with it after stream will be closed.
Dim dc As DocumentCore = Nothing
Using fs As New FileStream("..\..\..\example.docx", FileMode.Open)
' Here we explicitly set that a loadable document is Docx.
dc = DocumentCore.Load(fs, New DocxLoadOptions())
End Using
If dc IsNot Nothing Then
Console.WriteLine("Loaded successfully!")
End If
Console.ReadKey()
End Sub
''' <summary>
''' Loads a document into DocumentCore (dc) from an array of bytes.
''' </summary>
''' <remarks>
''' Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/load-document.php
''' </remarks>
Sub LoadFromBytes()
' Get document bytes from a file.
Dim fileBytes() As Byte = File.ReadAllBytes("..\..\..\example.pdf")
Dim dc As DocumentCore = Nothing
Using ms As New MemoryStream(fileBytes)
' With PdfLoadOptions we explicitly set that a loadable document is PDF.
Dim pdfLO As New PdfLoadOptions()
With pdfLO
.RasterizeVectorGraphics = False
.DetectTables = False
' 'Disabled' - Never load embedded fonts in PDF. Use the fonts with the same name installed at the system or similar by font metrics.
' 'Enabled' - Always load embedded fonts in PDF.
' 'Auto' - Load only embedded fonts missing in the system. In other case, use the system fonts.
.PreserveEmbeddedFonts = PropertyState.Auto
.PageIndex = 0
.PageCount = 2
End With
' RasterizeVectorGraphics = False
' This means to load vector graphics as is. Don't transform it to raster images.
' DetectTables = False
' This means don't detect tables.
' The PDF format doesn't have real tables, in fact it's a set of orthogonal graphic lines.
' Set it to 'True' and the component will detect and recreate tables from graphic lines.
dc = DocumentCore.Load(ms, pdfLO)
End Using
If dc IsNot Nothing Then
Console.WriteLine("Loaded successfully!")
End If
Console.ReadKey()
End Sub
End Module
If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below: