Overview of all properties to convert PDF to Word in C# and .NET
This Console Application shows the all possible Settings acceptable to convert into Word formats: DOCX and RTF.
Complete code
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Before starting, we recommend to get a free 100-day key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");
string wordFile = "Result.docx";
// In this sample you will find a short overview of all properties of WordOptions.
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.OpenPdf(pdfFile);
if (f.PageCount > 0)
{
// You may choose output format between Docx and Rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx;
// As you may know all text in PDF positioned by (x,y) coordinates.
// In a Word document all text is placed inside paragraphs.
// Flowing - The most useful and common type of Word document for editing. The resulting Word document looks as if it was typed by human.
// The document layout created without using text boxes.
//
// Exact - The most precise and fastest mode. The resulting Word document looks exact as PDF pixel by pixel (x,y).
// The document layout created by using text boxes, this gives a monumental accuracy for PDF to Word conversion.
//
// Continuous - The document layout created by using text boxes grouped in blocks.
// A golden mean between Flowing and Exact.
f.WordOptions.RenderMode = SautinSoft.PdfFocus.CWordOptions.eRenderMode.Flowing;
// As you may know PDF format doesn't have such concept as tables.
// It's true, all tables in PDF represented using graphical lines.
// true - parse all graphic lines to detect and recreate tables.
// false - leave all graphic lines as is.
f.WordOptions.DetectTables = true;
// As you may know PDF contains embedded fonts with own symbol widths.
// But the resulting Word document will have fonts installed at your system.
// Sometimes their have different symbol width.
// true - scale width of symbols to make it the same as in PDF.
// false - don't scale width of symbols and use width of installed fonts.
f.WordOptions.KeepCharScaleAndSpacing = false;
// Sometimes a PDF document can contain a picture with a scanned text.
// Besides of this, this document can contain invisible text over this picture.
// In case you need to get only that text and skip picture, you may set 'PreserveImages' to false and
// set this property to true:
f.WordOptions.ShowInvisibleText = true;
//f.PreserveImages = false;
int result = f.ToWord(wordFile);
// Show the resulting Word document.
if (result == 0)
{
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(wordFile) { UseShellExecute = true });
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
' Before starting, we recommend to get a free 100-day key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
Dim wordFile As String = "Result.docx"
' In this sample you will find a short overview of all properties of WordOptions.
Dim f As New SautinSoft.PdfFocus()
f.OpenPdf(pdfFile)
If f.PageCount > 0 Then
' You may choose output format between Docx and Rtf.
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Docx
' As you may know all text in PDF positioned by (x,y) coordinates.
' In a Word document all text is placed inside paragraphs.
' Flowing - The most useful and common type of Word document for editing. The resulting Word document looks as if it was typed by human.
' The document layout created without using text boxes.
'
' Exact - The most precise and fastest mode. The resulting Word document looks exact as PDF pixel by pixel (x,y).
' The document layout created by using text boxes, this gives a monumental accuracy for PDF to Word conversion.
'
' Continuous - The document layout created by using text boxes grouped in blocks.
' A golden mean between Flowing and Exact.
f.WordOptions.RenderMode = SautinSoft.PdfFocus.CWordOptions.eRenderMode.Flowing
' As you may know PDF format doesn't have such concept as tables.
' It's true, all tables in PDF represented using graphical lines.
' true - parse all graphic lines to detect and recreate tables.
' false - leave all graphic lines as is.
f.WordOptions.DetectTables = True
' As you may know PDF contains embedded fonts with own symbol widths.
' But the resulting Word document will have fonts installed at your system.
' Sometimes their have different symbol width.
' true - scale width of symbols to make it the same as in PDF.
' false - don't scale width of symbols and use width of installed fonts.
f.WordOptions.KeepCharScaleAndSpacing = False
' Sometimes a PDF document can contain a picture with a scanned text.
' Besides of this, this document can contain invisible text over this picture.
' In case you need to get only that text and skip picture, you may set 'PreserveImages' to false and
' set this property to true:
f.WordOptions.ShowInvisibleText = True
'f.PreserveImages = false;
Dim result As Integer = f.ToWord(wordFile)
' Show the resulting Word document.
If result = 0 Then
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(wordFile) With {.UseShellExecute = True})
End If
End If
End Sub
End Module
If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below: