Convert PDF to Word in memory using C# and .NET
Document processing in applications is a key task in modern programming. When automatically converting PDF files to editable formats like Word, a fast and reliable approach without the need to constantly save intermediate files to disk is essential. In this article, we'll take a detailed look at how to convert PDF to Word in memory in C# and .NET using the PDF Focus .NET component of the popular SautinSoft library.
When automating document processing, it's often necessary to process PDF files entirely in memory, without intermediate files on the hard drive. This provides:
- Faster performance: no IO overhead for reading/writing files.
- Increased security: data is not stored or transferred through the file system.
- Better resource management: avoids file system and permissions issues.
Using library APIs that support in-memory processing, developers can automatically convert files directly into RAM, making them ideal for server applications, APIs, and cloud solutions.
This example will be useful in the following scenarios:
- Automated document processing: for example, converting PDF reports to Word for further editing.
- Cloud services: where saving files to disk is impractical.
- Business applications: solutions where processing is performed entirely in memory.
- Document workflow creation: systems that automate information extraction.
This simple Console Application shows how to convert PDF to DOCX (RTF) in memory via two methods.
The first method converts PDF into DOCX format using arrays of bytes.
The second method shows how to convert PDF to RTF opearing with MemoryStream.
Complete code
using System;
using System.IO;
namespace Sample
{
class Sample
{
static void Main(string[] args)
{
ConvertPdfToDocxBytes();
//ConvertPdfToRtfStream();
}
private static void ConvertPdfToDocxBytes()
{
// Before starting, we recommend to get a free key:
// https://sautinsoft.com/start-for-free/
// Apply the key here:
// SautinSoft.PdfFocus.SetLicense("...");
string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");
// Assume that we already have a PDF document as array of bytes.
byte[] pdf = File.ReadAllBytes(pdfFile);
byte[] docx = null;
// Convert PDF to word in memory
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.OpenPdf(pdf);
if (f.PageCount > 0)
{
// Convert pdf to word in memory.
docx = f.ToWord();
// Save word document to a file only for demonstration purposes.
if (docx != null)
{
//3. Save to DOCX document to a file for demonstration purposes.
string wordFile = "Result.docx";
File.WriteAllBytes(wordFile, docx);
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(wordFile) { UseShellExecute = true });
}
}
}
private static void ConvertPdfToRtfStream()
{
string pdfFile = Path.GetFullPath(@"..\..\..\simple text.pdf");
MemoryStream rtfStream = new MemoryStream();
// Convert PDF to word in memory
// Get your free key here:
// https://sautinsoft.com/start-for-free/
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
// Assume that we already have a PDF document as stream.
using (FileStream pdfStream = new FileStream(pdfFile, FileMode.Open, FileAccess.Read))
{
f.OpenPdf(pdfStream);
if (f.PageCount > 0)
{
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf;
int res = f.ToWord(rtfStream);
// Save rtfStream to a file for demonstration purposes.
if (res == 0)
{
string rtfFile = "Result.rtf";
File.WriteAllBytes(rtfFile, rtfStream.ToArray());
System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(rtfFile) { UseShellExecute = true });
}
}
}
}
}
}
Imports System.IO
Imports System.Drawing.Imaging
Imports System.Collections.Generic
Imports SautinSoft
Module Sample
Sub Main()
' Before starting, we recommend to get a free key:
' https://sautinsoft.com/start-for-free/
' Apply the key here
' SautinSoft.PdfFocus.SetLicense("...");
ConvertPdfToDocxBytes()
'ConvertPdfToRtfStream()
End Sub
Private Sub ConvertPdfToDocxBytes()
Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
' Assume that we already have a PDF document as array of bytes.
Dim pdf() As Byte = File.ReadAllBytes(pdfFile)
Dim docx() As Byte = Nothing
' Convert PDF to word in memory
Dim f As New SautinSoft.PdfFocus()
f.OpenPdf(pdf)
If f.PageCount > 0 Then
' Convert pdf to word in memory.
docx = f.ToWord()
' Save word document to a file only for demonstration purposes.
If docx IsNot Nothing Then
'3. Save to DOCX document to a file for demonstration purposes.
Dim wordFile As String = "Result.docx"
File.WriteAllBytes(wordFile, docx)
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(wordFile) With {.UseShellExecute = True})
End If
End If
End Sub
Private Sub ConvertPdfToRtfStream()
Dim pdfFile As String = Path.GetFullPath("..\..\..\simple text.pdf")
Dim rtfStream As New MemoryStream()
' Convert PDF to word in memory
Dim f As New SautinSoft.PdfFocus()
'this property is necessary only for registered version
'f.Serial = "XXXXXXXXXXX"
' Assume that we already have a PDF document as stream.
Using pdfStream As New FileStream(pdfFile, FileMode.Open, FileAccess.Read)
f.OpenPdf(pdfStream)
If f.PageCount > 0 Then
f.WordOptions.Format = SautinSoft.PdfFocus.CWordOptions.eWordDocument.Rtf
Dim res As Integer = f.ToWord(rtfStream)
' Save rtfStream to a file for demonstration purposes.
If res = 0 Then
Dim rtfFile As String = "Result.rtf"
File.WriteAllBytes(rtfFile, rtfStream.ToArray())
System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(rtfFile) With {.UseShellExecute = True})
End If
End If
End Using
End Sub
End Module
If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below: