Redacts a PDF document stored in a file.
Public Shared Sub Redact(ByVal fileName As String,ByVal password As String,ByVal redacts As IList(Of PDFRedact))
fileName
Name of the file containing an existing PDF document to be redacted.
password
The password to use if fileName contains an encrypted PDF file.
redacts
One or more PDF redact objects.
Redaction can be used to remove sensitive information from an existing PDF document.
This method quickly redacts an existing PDF document in place by removing any character, image, or shape that intersects with any of the PDFRedact.Bounds of redacts. The resulting PDF is not re-generated and therefore will maintain the same exact compression, metadata, fonts and any other resources.
Use the following code to redact all data in a PDF page at location 0, 0 to 100,100:
// Create a PDF redaction objectvar redact = new PDFRedact(0, 0, 100, 100);// Redact the file:PDFFile.Redact(pdfFileName, new List<PDFRedact> { redact });
This example will parse the text of a PDF file, find the locations of all items containing the word "LEADTOOLS" and redacts them.
using Leadtools;using Leadtools.Codecs;using Leadtools.Controls;using Leadtools.Drawing;using Leadtools.ImageProcessing;using Leadtools.Pdf;using Leadtools.Svg;using Leadtools.WinForms;private static void RedactExample(){const string toRedact = "LEADTOOLS";// Make a copy of 'leadtools.pdf' installed with LEADTOOLSstring imagesDir = @"C:\Users\Public\Documents\LEADTOOLS Images";string pdfFileName = Path.Combine(imagesDir, "leadtools-redacted.pdf");File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, true);// We will use PDFDocument to find the position of the words to redact// Find any text containing the word "LEADTOOLS" in the documentvar allWords = new List<MyPDFWord>();using (var pdfDocument = new PDFDocument(pdfFileName)){pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1);// Build the words for each page from PDFDocumentPage.Objectsforeach (PDFDocumentPage pdfPage in pdfDocument.Pages){IList<MyPDFWord> words = GetPageWords(pdfPage);allWords.AddRange(words);}}// Now create a PDFRedact object for each word that contains the value we want to redactstring toRedactLower = toRedact.ToLowerInvariant();var pdfRedacts = new List<PDFRedact>();foreach (MyPDFWord word in allWords){if (word.Value.ToLowerInvariant().Contains(toRedactLower)){Console.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}");var pdfRedact = new PDFRedact();pdfRedact.PageNumber = word.PageNumber;pdfRedact.Bounds = new PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom);pdfRedacts.Add(pdfRedact);}}// Redact the documentPDFFile.Redact(pdfFileName, null, pdfRedacts);// Finally, verify that the redact PDF does not have the redacted words anymoreusing (var pdfDocument = new PDFDocument(pdfFileName)){pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1);// Build the words for each page from PDFDocumentPage.Objectsforeach (PDFDocumentPage pdfPage in pdfDocument.Pages){IList<MyPDFWord> words = GetPageWords(pdfPage);foreach (MyPDFWord word in words){Debug.Assert(!word.Value.ToLowerInvariant().Contains(toRedactLower));}}}}// Class to define a word in a PDF pageclass MyPDFWord{// Page numberpublic int PageNumber;// The value as a stringpublic string Value;// Its location in the PDF coordinatepublic LeadRectD Bounds;}private static IList<MyPDFWord> GetPageWords(PDFDocumentPage pdfPage){var words = new List<MyPDFWord>();IList<PDFObject> objects = pdfPage.Objects;if (objects == null || objects.Count == 0)return words;int objectIndex = 0;int objectCount = objects.Count;double pageHeight = pdfPage.Height;// Loop through all the objectswhile (objectIndex < objectCount){// Find the total bounding rectangle, begin and end index of the next wordLeadRectD wordBounds = LeadRectD.Empty;int firstObjectIndex = objectIndex;// Loop till we reach EndOfWord or reach the end of the objectsbool more = true;while (more){PDFObject pdfObject = objects[objectIndex];// Is it text?if (pdfObject.ObjectType == PDFObjectType.Text){PDFRect pdfBounds = pdfObject.Bounds;// objectBounds are in bottom-left coordinate, convert it to top-leftLeadRectD objectBounds = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom);// Add the bounding rectangle of this objectif (wordBounds.IsEmpty)wordBounds = objectBounds;elsewordBounds = LeadRectD.UnionRects(wordBounds, objectBounds);}else{firstObjectIndex = objectIndex + 1;}objectIndex++;more = (objectIndex < objectCount) && !pdfObject.TextProperties.IsEndOfWord && !pdfObject.TextProperties.IsEndOfLine;}if (firstObjectIndex == objectIndex){continue;}// From the begin and end index, collect the characters into a stringStringBuilder sb = new StringBuilder();for (int i = firstObjectIndex; i < objectIndex; i++){if (objects[i].ObjectType == PDFObjectType.Text)sb.Append(objects[i].Code);}// Add this word to the listPDFObject lastObject = objects[objectIndex - 1];var word = new MyPDFWord();word.PageNumber = pdfPage.PageNumber;word.Value = sb.ToString();word.Bounds = wordBounds;words.Add(word);}return words;}
Imports LeadtoolsImports Leadtools.CodecsImports Leadtools.PdfImports Leadtools.WinFormsImports Leadtools.SvgImports Leadtools.ImageProcessingPrivate Shared Sub RedactExample()Const toRedact As String = "LEADTOOLS"' Make a copy of 'leadtools.pdf' installed with LEADTOOLSDim imagesDir As String = "C:\Users\Public\Documents\LEADTOOLS Images"Dim pdfFileName As String = Path.Combine(imagesDir, "leadtools-redacted.pdf")File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, True)' We will use PDFDocument to find the position of the words to redact' Find any text containing the word "LEADTOOLS" in the documentDim allWords As New List(Of MyPDFWord)()Using pdfDocument As New PDFDocument(pdfFileName)pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1)' Build the words for each page from PDFDocumentPage.ObjectsFor Each pdfPage As PDFDocumentPage In pdfDocument.PagesDim words As IList(Of MyPDFWord) = GetPageWords(pdfPage)allWords.AddRange(words)NextEnd Using' Now create a PDFRedact object for each word that contains the value we want to redactDim toRedactLower As String = toRedact.ToLowerInvariant()Dim pdfRedacts As New List(Of PDFRedact)()For Each word As MyPDFWord In allWordsIf word.Value.ToLowerInvariant().Contains(toRedactLower) ThenConsole.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}")Dim PDFRedact As New PDFRedact()PDFRedact.PageNumber = word.PageNumberPDFRedact.Bounds = New PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom)pdfRedacts.Add(PDFRedact)End IfNext' Redact the documentPDFFile.Redact(pdfFileName, Nothing, pdfRedacts)' Finally, verify that the redact PDF does not have the redacted words anymoreUsing pdfDocument As New PDFDocument(pdfFileName)pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1)' Build the words for each page from PDFDocumentPage.ObjectsFor Each pdfPage As PDFDocumentPage In pdfDocument.PagesDim words As IList(Of MyPDFWord) = GetPageWords(pdfPage)For Each word As MyPDFWord In wordsDebug.Assert(Not word.Value.ToLowerInvariant().Contains(toRedactLower))NextNextEnd UsingEnd Sub' Class to define a word in a PDF pageClass MyPDFWord' Page numberPublic PageNumber As Integer' The value as a stringPublic Value As String' Its location in the PDF coordinatePublic Bounds As LeadRectDEnd ClassPrivate Shared Function GetPageWords(pdfPage As PDFDocumentPage) As IList(Of MyPDFWord)Dim words As New List(Of MyPDFWord)()Dim objects As IList(Of PDFObject) = pdfPage.ObjectsIf (IsNothing(objects) OrElse objects.Count = 0) ThenReturn wordsEnd IfDim objectIndex As Integer = 0Dim objectCount As Integer = objects.CountDim pageHeight As Double = pdfPage.Height' Loop through all the objectsWhile objectIndex < objectCount' Find the total bounding rectangle, begin and end index of the next wordDim wordBounds As LeadRectD = LeadRectD.EmptyDim firstObjectIndex As Integer = objectIndex' Loop till we reach EndOfWord or reach the end of the objectsDim more As Boolean = TrueWhile moreDim pdfObject As PDFObject = objects(objectIndex)' Is it text?If pdfObject.ObjectType = PDFObjectType.Text ThenDim pdfBounds As PDFRect = pdfObject.Bounds' objectBounds are in bottom-left coordinate, convert it to top-leftDim objectBounds As LeadRectD = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom)' Add the bounding rectangle of this objectIf wordBounds.IsEmpty ThenwordBounds = objectBoundsElsewordBounds = LeadRectD.UnionRects(wordBounds, objectBounds)End IfElsefirstObjectIndex = objectIndex + 1End IfobjectIndex = objectIndex + 1more = (objectIndex < objectCount) AndAlso Not pdfObject.TextProperties.IsEndOfWord AndAlso Not pdfObject.TextProperties.IsEndOfLineEnd WhileIf firstObjectIndex = objectIndex ThenContinue WhileEnd If' From the begin and end index, collect the characters into a stringDim sb As New StringBuilder()For i As Integer = firstObjectIndex To objectIndex - 1If objects(i).ObjectType = PDFObjectType.Text Thensb.Append(objects(i).Code)End IfNext' Add this word to the listDim lastObject As PDFObject = objects(objectIndex - 1)Dim word As New MyPDFWord()word.PageNumber = pdfPage.PageNumberword.Value = sb.ToString()word.Bounds = wordBoundswords.Add(word)End WhileReturn wordsEnd Function
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document
