Convert PDF to XML file in C# and .NET
The application shows how to convert all tabular data from PDF into XML file, the other textual and graphical data will be skipped.
Complete code
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
string pathToPdf = Path.GetFullPath(@"..\..\..\Table.pdf");
string pathToXml = "Result.xml";
// Get your free 30-day key here:
// https://sautinsoft.com/start-for-free/
// Convert PDF file to XML file.
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
// Let's convert only tables to XML and skip all textual data.
f.XmlOptions.ConvertNonTabularDataToSpreadsheet = false;
f.OpenPdf(pathToPdf);
if (f.PageCount > 0)
{
int result = f.ToXml(pathToXml);
//Show XML document in browser
if (result==0)
{
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(pathToXml) { UseShellExecute = true });
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
Dim pathToPdf As String = Path.GetFullPath("..\..\..\Table.pdf")
Dim pathToXml As String = "Result.xml"
' Get your free 30-day key here:
' https://sautinsoft.com/start-for-free/
' Convert PDF file to XML file.
Dim f As New SautinSoft.PdfFocus()
' Let's convert only tables to XML and skip all textual data.
f.XmlOptions.ConvertNonTabularDataToSpreadsheet = False
f.OpenPdf(pathToPdf)
If f.PageCount > 0 Then
Dim result As Integer = f.ToXml(pathToXml)
'Show XML document in browser
If result = 0 Then
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(pathToXml) With {.UseShellExecute = True})
End If
End If
End Sub
End Module
If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below: