How to extract all pictures from document using C# and .NET

  1. Add SautinSoft.Document from Nuget.
  2. Load a PDF document.
  3. Extract all images from the document.
  4. Save all images.

Complete code

using System;
using System.IO;
using System.Collections.Generic;
using SautinSoft.Document;
using SautinSoft.Document.Drawing;

namespace Sample
{
    class Sample
    {
        
        static void Main(string[] args)
        {
            // Get your free 100-day key here:   
            // https://sautinsoft.com/start-for-free/

            ExtractPictures();
        }
		
        /// <summary>
        /// Extract all pictures from document (PDF, DOCX, RTF, HTML).
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/extract-pictures.php
        /// </remarks>
        public static void ExtractPictures()
        {
            // Path to a document where to extract pictures.
            string filePath = @"..\..\..\example.pdf";
           
            // Directory to store extracted pictures:
            DirectoryInfo imgDir = new DirectoryInfo("Extracted Pictures");
            imgDir.Create();
            string imgTemplateName = "Picture";

            // Here we store extracted images.
            List<ImageData> imgInventory = new List<ImageData>();

            // Load the document.
            DocumentCore dc = DocumentCore.Load(filePath);

            // Extract all images from document, skip duplicates.
            foreach (Picture pict in dc.GetChildElements(true, ElementType.Picture))
            {
                // Let's avoid the adding of duplicates.
                if (imgInventory.Exists((img => (img.GetStream().Length == pict.ImageData.GetStream().Length))) == false)
                    imgInventory.Add(pict.ImageData);
            }
            
            // Save all images.
            for (int i = 0; i < imgInventory.Count; i++)
            {
                string imagePath = Path.Combine(imgDir.FullName, String.Format("{0}{1}.{2}", imgTemplateName, i + 1, imgInventory[i].Format.ToString().ToLower()));
                File.WriteAllBytes(imagePath, imgInventory[i].GetStream().ToArray());                
            }

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(imgDir.FullName) { UseShellExecute = true });

        }
    }
}

Download

Imports System
Imports System.IO
Imports System.Collections.Generic
Imports SautinSoft.Document
Imports SautinSoft.Document.Drawing

Module Sample
    Sub Main()
        ExtractPictures()
    End Sub
    ''' Get your free 100-day key here:   
    ''' https://sautinsoft.com/start-for-free/
    ''' <summary>
    ''' Extract all pictures from document (PDF, DOCX, RTF, HTML).
    ''' </summary>
    ''' <remarks>
    ''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/extract-pictures.php
    ''' </remarks>
    Sub ExtractPictures()
        ' Path to a document where to extract pictures.
        Dim filePath As String = "..\..\..\example.pdf"

        ' Directory to store extracted pictures:
        Dim imgDir As New DirectoryInfo("Extracted Pictures")
        imgDir.Create()
        Dim imgTemplateName As String = "Picture"

        ' Here we store extracted images.
        Dim imgInventory As New List(Of ImageData)()

        ' Load the document.
        Dim dc As DocumentCore = DocumentCore.Load(filePath)

        ' Extract all images from document, skip duplicates.
        For Each pict As Picture In dc.GetChildElements(True, ElementType.Picture)
            ' Let's avoid the adding of duplicates.
            If imgInventory.Exists((Function(img) (img.GetStream().Length = pict.ImageData.GetStream().Length))) = False Then
                imgInventory.Add(pict.ImageData)
            End If
        Next pict

        ' Save all images.
        For i As Integer = 0 To imgInventory.Count - 1
            Dim imagePath As String = Path.Combine(imgDir.FullName, String.Format("{0}{1}.{2}", imgTemplateName, i + 1, imgInventory(i).Format.ToString().ToLower()))
            File.WriteAllBytes(imagePath, imgInventory(i).GetStream().ToArray())
        Next i

        ' Open the result for demonstration purposes.
        System.Diagnostics.Process.Start(New System.Diagnostics.ProcessStartInfo(imgDir.FullName) With {.UseShellExecute = True})
    End Sub
End Module

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.