There are cases when using keywords you need to find in which paragraphs these words occur. This text data can be stored in PDF, DOCX or RTF formats.
In this code example, we will output to the console all paragraphs entirely containing the word "company".
Complete code
using System;
using System.IO;
using SautinSoft.Document;
using SautinSoft.Document.Drawing;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace Example
{
class Program
{
static void Main(string[] args)
{
{
FindWordInParagraph();
}
}
/// <summary>
/// Find any "word" in a folder with PDF files inside and show a paragraph, where this word will be found.
/// You may change the extension: pdf, docx, rtf.
/// </summary>
/// <remarks>
/// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-show-paragraph-containing-required-word-in-csharp-vb-net.php
/// </remarks>
static void FindWordInParagraph()
{
// A regular expression (shortened as regex or regexp; sometimes referred to as rational expression) is a sequence of characters that specifies a search pattern in text.
Regex regex = new Regex(@"\bcompany\b", RegexOptions.IgnoreCase);
// Loop through all PDF files in a directory.
foreach (string file in Directory.EnumerateFiles(@"..\..\files\", "*.pdf", SearchOption.AllDirectories))
{
DocumentCore dc = DocumentCore.Load(file);
// Provides a functionality to paginate the document content.
DocumentPaginator dp = dc.GetPaginator();
foreach (ContentRange content in dc.Content.Find(regex))
{
ElementFrame ef = dp.GetElementFrames().FirstOrDefault(e => content.Start.Equals(e.Content.Start));
Paragraph paragraph = content.Start.Parent.Parent as Paragraph;
// We are looking for a sentence in which this word was found.
string sentence = paragraph.Content.ToString().Trim();
Console.WriteLine("Filename: " + file + "\r\n" + sentence);
// The coordinates of the found word.
Console.WriteLine("Info:" + ef.Bounds.ToString());
Console.WriteLine("Next paragraph?");
Console.ReadKey();
}
}
}
}
}
Imports Microsoft.VisualBasic
Imports System
Imports System.IO
Imports SautinSoft.Document
Imports SautinSoft.Document.Drawing
Imports System.Collections.Generic
Imports System.Linq
Imports System.Text
Imports System.Text.RegularExpressions
Namespace Example
Friend Class Program
Shared Sub Main(ByVal args() As String)
If True Then
FindWordInParagraph()
End If
End Sub
''' <summary>
''' Find any "word" in a folder with PDF files inside and show a paragraph, where this word will be found.
''' You may change the extension: pdf, docx, rtf.
''' </summary>
''' <remarks>
''' Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-show-paragraph-containing-required-word-in-csharp-vb-net.php
''' </remarks>
Private Shared Sub FindWordInParagraph()
' A regular expression (shortened as regex or regexp; sometimes referred to as rational expression) is a sequence of characters that specifies a search pattern in text.
Dim regex As New Regex("\bcompany\b", RegexOptions.IgnoreCase)
' Loop through all PDF files in a directory.
For Each file As String In Directory.EnumerateFiles("..\files\", "*.pdf", SearchOption.AllDirectories)
Dim dc As DocumentCore = DocumentCore.Load(file)
' Provides a functionality to paginate the document content.
Dim dp As DocumentPaginator = dc.GetPaginator()
For Each content As ContentRange In dc.Content.Find(regex)
Dim ef As ElementFrame = dp.GetElementFrames().FirstOrDefault(Function(e) content.Start.Equals(e.Content.Start))
Dim paragraph As Paragraph = TryCast(content.Start.Parent.Parent, Paragraph)
' We are looking for a sentence in which this word was found.
Dim sentence As String = paragraph.Content.ToString().Trim()
Console.WriteLine("Filename: " & file & vbCrLf & sentence)
' The coordinates of the found word.
Console.WriteLine("Info:" & ef.Bounds.ToString())
Console.WriteLine("Next paragraph?")
Console.ReadKey()
Next content
Next file
End Sub
End Class
End Namespace
If you need a new code example or have a question: email us at support@sautinsoft.com or ask at Online Chat (right-bottom corner of this page) or use the Form below: