How to find pages with specified text using C# and .NET


    In this code example, we will find out on which pages of the document the required word "Invoice" is located.

Complete code

´╗┐using SautinSoft.Document;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Xml.Linq;

namespace Sample
{
    class Program
    {
        static void Main(string[] args)
        {
            FindPagesSpecifiedText();       
        }
        /// <summary>
        /// How to find out on which pages of the document the required word is located.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-find-pages-with-specified-text-net-csharp-vb.php
        /// </remarks>
        public static void FindPagesSpecifiedText()
        {
            // The path for input files or directory.
            string inpFile = @"..\..\..\example.docx";

            // What we need to search.
            var searchText = "Invoice";
            int quantity = 0;
            
            // Load our documument in Document's engine.
            DocumentCore dc = DocumentCore.Load(inpFile);
            
            // Regex https://en.wikipedia.org/wiki/Regular_expression
            Regex regex = new Regex(searchText, RegexOptions.IgnoreCase);

            // Document paginator allows you to calculate of pages.
            DocumentPaginator dp = dc.GetPaginator();
            
            // We will search "searchText" on each pages (enumeration).
            for (int page = 0; page < dp.Pages.Count; page++)
            {
                foreach (ContentRange item in dp.Pages[page].Content.Find(regex).Reverse())
                {
                    Console.WriteLine($"I see the [{searchText}] on the page # {page + 1}");
                    quantity++;
                }
            }
            Console.WriteLine();
            Console.WriteLine($"I met [{searchText}] {quantity} times.  Please click on any button");
            Console.ReadKey();
        }
    }
}

Download

Option Infer On

Imports SautinSoft.Document
Imports System
Imports System.Collections.Generic
Imports System.Linq
Imports System.Text
Imports System.Text.RegularExpressions
Imports System.Threading.Tasks
Imports System.Xml.Linq

Namespace Sample
	Friend Class Program
		Shared Sub Main(ByVal args() As String)
			FindPagesSpecifiedText()
		End Sub
		''' <summary>
		''' How to find out on which pages of the document the required word is located.
		''' </summary>
		''' <remarks>
		''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-find-pages-with-specified-text-net-csharp-vb.php
		''' </remarks>
		Public Shared Sub FindPagesSpecifiedText()
			' The path for input files or directory.
			Dim inpFile As String = "..\..\..\example.docx"

			' What we need to search.
			Dim searchText = "Invoice"
			Dim quantity As Integer = 0

			' Load our documument in Document's engine.
			Dim dc As DocumentCore = DocumentCore.Load(inpFile)

			' Regex https://en.wikipedia.org/wiki/Regular_expression
			Dim regex As New Regex(searchText, RegexOptions.IgnoreCase)

			' Document paginator allows you to calculate of pages.
			Dim dp As DocumentPaginator = dc.GetPaginator()

			' We will search "searchText" on each pages (enumeration).
			For page As Integer = 0 To dp.Pages.Count - 1
				For Each item As ContentRange In dp.Pages(page).Content.Find(regex).Reverse()
					Console.WriteLine($"I see the [{searchText}] on the page # {page + 1}")
					quantity += 1
				Next item
			Next page
			Console.WriteLine()
			Console.WriteLine($"I met [{searchText}] {quantity} times.  Please click on any button")
			Console.ReadKey()
		End Sub
	End Class
End Namespace

Download


If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below:



Questions and suggestions from you are always welcome!

We are developing .Net components since 2002. We know PDF, DOCX, RTF, HTML, XLSX and Images formats. If you need any assistance with creating, modifying or converting documents in various formats, we can help you. We will write any code example for you absolutely free.