This article demonstrates how to find the text that matches a specific regular expression in a PDF document using Spire.PDF for Java.
import com.spire.pdf.general.find.PdfTextFind;
import java.awt.*;
public class FindByRegularExpression {
public static void main(String[] args) throws Exception {
//Load a PDF document
PdfDocument pdf = new PdfDocument();
pdf.loadFromFile("C:\\Users\\Administrator\\Desktop\\test.pdf");
//Create a object of PdfTextFind collection
PdfTextFind[] results;
//Loop through the pages
for (Object page : (Iterable) pdf.getPages()) {
PdfPageBase pageBase = (PdfPageBase) page;
//Define a regular expression
String pattern = "\\#\\w+\\b";
//Find all results that match the pattern
results = pageBase.findText(pattern).getFinds();
//Highlight the search results with yellow
for (PdfTextFind find : results) {
find.applyHighLight(Color.yellow);
}
}
//Save to file
pdf.saveToFile("FindByPattern.pdf");
}
}

