PDF Portfolio Data Extraction in C# and .NET

Complete code

using System;
using System.IO;
using System.IO.Compression;
using SautinSoft.Pdf.Objects;
using SautinSoft.Pdf.Portfolios;
using SautinSoft.Pdf;

class Program
{
    /// <summary>
    /// Create PDF Portfolios.
    /// </summary>
    /// <remarks>
    /// Details: https://sautinsoft.com/products/pdf/help/net/developer-guide/extract-portfolios.php
    /// </remarks>
    static void Main()
    {
        // Before starting this example, please get a free 100-day trial key:
        // https://sautinsoft.com/start-for-free/

        // Apply the key here:
        // PdfDocument.SetLicense("...");

        // Add to zip archive all files and folders from a PDF portfolio.
        using (var document = PdfDocument.Load(Path.GetFullPath(@"..\..\..\Portfolio.pdf")))
        using (var archiveStream = File.Create("Portfolio Files and Folders.zip"))
        using (var archive = new ZipArchive(archiveStream, ZipArchiveMode.Create, leaveOpen: true))
        {
            var portfolio = document.Portfolio;
            if (portfolio != null)
                ExtractFilesAndFoldersToArchive(portfolio.Files, portfolio.Folders, archive, string.Empty, PdfName.Create("FullName"));
        }

        System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo("Portfolio Files and Folders.zip") { UseShellExecute = true });
    }

    static void ExtractFilesAndFoldersToArchive(PdfPortfolioFileCollection files, PdfPortfolioFolderCollection folders, ZipArchive archive, string parentFolderFullName, PdfName portfolioFieldKey)
    {
        foreach (var fileSpecification in files)
        {
            // Use the FullName field value or the resolved full name as the relative path of the entry in the zip archive.
            string entryFullName;
            if (fileSpecification.PortfolioFieldValues.TryGet(portfolioFieldKey, out PdfPortfolioFieldValue fullNameValue))
                entryFullName = fullNameValue.ToString();
            else
                entryFullName = parentFolderFullName + fileSpecification.Name;

            var embeddedFile = fileSpecification.EmbeddedFile;

            // Create zip archive entry.
            // Zip archive entry is compressed if the portfolio embedded file's compressed size is less than its uncompressed size.
            bool compress = embeddedFile.Size == null || embeddedFile.CompressedSize < embeddedFile.Size.GetValueOrDefault();
            var entry = archive.CreateEntry(entryFullName, compress ? CompressionLevel.Optimal : CompressionLevel.NoCompression);

            // Set the modification date, if it is specified in the portfolio embedded file.
            var modificationDate = embeddedFile.ModificationDate;
            if (modificationDate != null)
                entry.LastWriteTime = modificationDate.GetValueOrDefault();

            // Copy embedded file contents to the zip archive entry.
            using (var embeddedFileStream = embeddedFile.OpenRead())
            using (var entryStream = entry.Open())
                embeddedFileStream.CopyTo(entryStream);
        }

        foreach (var folder in folders)
        {
            // Use the FullName field value or the resolved full name as the relative path of the entry in the zip archive.
            string folderFullName;
            if (folder.PortfolioFieldValues.TryGet(portfolioFieldKey, out PdfPortfolioFieldValue fullNameValue))
                folderFullName = fullNameValue.ToString();
            else
                folderFullName = parentFolderFullName + folder.Name + '/';

            // Set the modification date, if it is specified in the portfolio folder.
            var modificationDate = folder.ModificationDate;
            if (modificationDate.HasValue)
                archive.CreateEntry(folderFullName).LastWriteTime = modificationDate.GetValueOrDefault();

            // Recursively add to zip archive all files and folders underneath the current portfolio folder.
            ExtractFilesAndFoldersToArchive(folder.Files, folder.Folders, archive, folderFullName, portfolioFieldKey);
        }
    }
}

Download

Option Infer On

Imports System
Imports System.IO
Imports System.IO.Compression
Imports SautinSoft.Pdf.Objects
Imports SautinSoft.Pdf.Portfolios
Imports SautinSoft.Pdf

Friend Class Program
	''' <summary>
	''' Create PDF Portfolios.
	''' </summary>
	''' <remarks>
	''' Details: https://sautinsoft.com/products/pdf/help/net/developer-guide/extract-portfolios.php
	''' </remarks>
	Shared Sub Main()
		' Before starting this example, please get a free 100-day trial key:
		' https://sautinsoft.com/start-for-free/

		' Apply the key here:
		' PdfDocument.SetLicense("...");

		' Add to zip archive all files and folders from a PDF portfolio.
		Using document = PdfDocument.Load(Path.GetFullPath("..\..\..\Portfolio.pdf"))
		Using archiveStream = File.Create("Portfolio Files and Folders.zip")
		Using archive = New ZipArchive(archiveStream, ZipArchiveMode.Create, leaveOpen:= True)
			Dim portfolio = document.Portfolio
			If portfolio IsNot Nothing Then
				ExtractFilesAndFoldersToArchive(portfolio.Files, portfolio.Folders, archive, String.Empty, PdfName.Create("FullName"))
			End If
		End Using
		End Using
		End Using

		System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo("Portfolio Files and Folders.zip") With {.UseShellExecute = True})
	End Sub

	Private Shared Sub ExtractFilesAndFoldersToArchive(ByVal files As PdfPortfolioFileCollection, ByVal folders As PdfPortfolioFolderCollection, ByVal archive As ZipArchive, ByVal parentFolderFullName As String, ByVal portfolioFieldKey As PdfName)
		For Each fileSpecification In files
			' Use the FullName field value or the resolved full name as the relative path of the entry in the zip archive.
			Dim entryFullName As String
			Dim fullNameValue As PdfPortfolioFieldValue
			If fileSpecification.PortfolioFieldValues.TryGet(portfolioFieldKey, fullNameValue) Then
				entryFullName = fullNameValue.ToString()
			Else
				entryFullName = parentFolderFullName & fileSpecification.Name
			End If

			Dim embeddedFile = fileSpecification.EmbeddedFile

			' Create zip archive entry.
			' Zip archive entry is compressed if the portfolio embedded file's compressed size is less than its uncompressed size.
			Dim compress As Boolean = embeddedFile.Size Is Nothing OrElse embeddedFile.CompressedSize < embeddedFile.Size.GetValueOrDefault()
			Dim entry = archive.CreateEntry(entryFullName,If(compress, CompressionLevel.Optimal, CompressionLevel.NoCompression))

			' Set the modification date, if it is specified in the portfolio embedded file.
			Dim modificationDate = embeddedFile.ModificationDate
			If modificationDate IsNot Nothing Then
				entry.LastWriteTime = modificationDate.GetValueOrDefault()
			End If

			' Copy embedded file contents to the zip archive entry.
			Using embeddedFileStream = embeddedFile.OpenRead()
			Using entryStream = entry.Open()
				embeddedFileStream.CopyTo(entryStream)
			End Using
			End Using
		Next fileSpecification

		For Each folder In folders
			' Use the FullName field value or the resolved full name as the relative path of the entry in the zip archive.
			Dim folderFullName As String
			Dim fullNameValue As PdfPortfolioFieldValue
			If folder.PortfolioFieldValues.TryGet(portfolioFieldKey, fullNameValue) Then
				folderFullName = fullNameValue.ToString()
			Else
				folderFullName = parentFolderFullName & folder.Name & "/"c
			End If

			' Set the modification date, if it is specified in the portfolio folder.
			Dim modificationDate = folder.ModificationDate
			If modificationDate.HasValue Then
				archive.CreateEntry(folderFullName).LastWriteTime = modificationDate.GetValueOrDefault()
			End If

			' Recursively add to zip archive all files and folders underneath the current portfolio folder.
			ExtractFilesAndFoldersToArchive(folder.Files, folder.Folders, archive, folderFullName, portfolioFieldKey)
		Next folder
	End Sub
End Class

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.