Convert PDF to XML in memory using C# and .NET
The application shows how to convert all tabular and even textual data from PDF to XML in memory. The output XML will be represented as System.String.
Complete code
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
// Before starting, we recommend to get a free 100-day key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
string pathToPdf = Path.GetFullPath(@"..\..\..\Table.pdf");
string pathToXml = "Result.xml";
byte[] pdf = File.ReadAllBytes(pathToPdf);
string xml = null;
// Convert PDF file to XML file.
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.OpenPdf(pdf);
if (f.PageCount > 0)
{
xml = f.ToXml();
//Show XML document in browser
if (!String.IsNullOrEmpty(xml))
{
File.WriteAllText(pathToXml,xml);
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(pathToXml) { UseShellExecute = true });
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
' Before starting, we recommend to get a free 100-day key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
Dim pathToPdf As String = Path.GetFullPath("..\..\..\Table.pdf")
Dim pathToXml As String = "Result.xml"
Dim pdf() As Byte = File.ReadAllBytes(pathToPdf)
Dim xml As String = Nothing
' Convert PDF file to XML file.
Dim f As New SautinSoft.PdfFocus()
f.OpenPdf(pdf)
If f.PageCount > 0 Then
xml = f.ToXml()
'Show XML document in browser
If Not String.IsNullOrEmpty(xml) Then
File.WriteAllText(pathToXml, xml)
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(pathToXml) With {.UseShellExecute = True})
End If
End If
End Sub
End Module
If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below: