Updates or deletes a word in the recognized words list.
Sub UpdateWord( _ByVal words As IList(Of OcrWord), _ByVal zoneIndex As Integer, _ByVal wordIndex As Integer, _ByVal newValue As String _)
words
A list of OcrWord. In most cases, the same list obtained from IOcrZoneCharacters.GetWords.
zoneIndex
The 0-based zone index of the words.
wordIndex
The 0-based index of the word in words to update.
newValue
The value of the new word. Use null to delete the word.
You can use UpdateWord to modify the OCR recognition results by updating or deleting the words before optionally saving the results to the final output document. The C# and VB OCR Edit Demo uses this technique as well as the example below.
This example will recognize a document, then: capitalize all "the", delete all "a" and replaces all instance of "color" with "water".
using Leadtools;using Leadtools.Codecs;using Leadtools.Forms.Ocr;using Leadtools.Forms;using Leadtools.Forms.DocumentWriters;using Leadtools.WinForms;using Leadtools.Drawing;using Leadtools.ImageProcessing;using Leadtools.ImageProcessing.Color;public void OcrUpdateWordExample(){string tifFileName = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif");string pdfFileName1 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Original.pdf");string pdfFileName2 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Modified.pdf");using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false)){ocrEngine.Startup(null, null, null, LEAD_VARS.OcrAdvantageRuntimeDir);using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument()){// Recognize the TIFF fileIOcrPage ocrPage = ocrDocument.Pages.AddPage(tifFileName, null);ocrPage.Recognize(null);// Save the original recognition results to compare with the results// we will modifyocrDocument.Save(pdfFileName1, DocumentFormat.Pdf, null);// Get the recognized wordsIOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters();foreach (IOcrZoneCharacters zoneCharacters in pageCharacters){IList<OcrWord> words = zoneCharacters.GetWords(ocrPage.DpiX, ocrPage.DpiY, LogicalUnit.Pixel);// Check for our words// Note, we should not loop through words and change the collection. Remember, collections cannot// be modified this way, instead, we have a helper method that returns the index of the word// we are looking for then change the word. Repeat while no more words are foundint index;// Capitalize all "the"do{index = FindWord(words, "the", false);if (index != -1){// We have one, update itpageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "THE");}}while (index != -1);// Delete all "a"do{index = FindWord(words, "a", true);if (index != -1){// We have one, update itpageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, null);}}while (index != -1);// Replace all "color" with "water"do{index = FindWord(words, "color", true);if (index != -1){// We have one, update itpageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "water");}}while (index != -1);}// We are done, update the page recognized resultsocrPage.SetRecognizedCharacters(pageCharacters);// Save this new resultsocrDocument.Save(pdfFileName2, DocumentFormat.Pdf, null);}}}private static int FindWord(IList<OcrWord> words, string value, bool ignoreCase){if (words == null || words.Count == 0){return -1;}for (int i = 0; i < words.Count; i++){if (string.Compare(words[i].Value, value, ignoreCase) == 0){// Found itreturn i;}}// Not foundreturn -1;}static class LEAD_VARS{public const string ImagesDir = @"C:\Users\Public\Documents\LEADTOOLS Images";public const string OcrAdvantageRuntimeDir = @"C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime";}
Imports LeadtoolsImports Leadtools.CodecsImports Leadtools.Forms.OcrImports Leadtools.FormsImports Leadtools.Forms.DocumentWritersImports Leadtools.WinFormsImports Leadtools.DrawingImports Leadtools.ImageProcessingImports Leadtools.ImageProcessing.Color<TestMethod>Public Sub OcrUpdateWordExample()Dim tifFileName As String = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif")Dim pdfFileName1 As String = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Original.pdf")Dim pdfFileName2 As String = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Modified.pdf")Using ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False)ocrEngine.Startup(Nothing, Nothing, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir)Using ocrDocument As IOcrDocument = ocrEngine.DocumentManager.CreateDocument()' Recognize the TIFF fileDim ocrPage As IOcrPage = ocrDocument.Pages.AddPage(tifFileName, Nothing)ocrPage.Recognize(Nothing)' Save the original recognition results to compare with the results' we will modifyocrDocument.Save(pdfFileName1, DocumentFormat.Pdf, Nothing)' Get the recognized wordsDim pageCharacters As IOcrPageCharacters = ocrPage.GetRecognizedCharacters()For Each zoneCharacters As IOcrZoneCharacters In pageCharactersDim words As IList(Of OcrWord) = zoneCharacters.GetWords(ocrPage.DpiX, ocrPage.DpiY, LogicalUnit.Pixel)' Check for our words' Note, we should not loop through words and change the collection. Remember, collections cannot' be modified this way, instead, we have a helper method that returns the index of the word' we are looking for then change the word. Repeat while no more words are foundDim index As Integer' Capitilize all "the"Doindex = FindWord(words, "the", False)If index <> -1 Then' We have one, update itpageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "THE")End IfLoop While index <> -1' Delete all "a"Doindex = FindWord(words, "a", True)If index <> -1 Then' We have one, update itpageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, Nothing)End IfLoop While index <> -1' Replace all "color" with "water"Doindex = FindWord(words, "color", True)If index <> -1 Then' We have one, update itpageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "water")End IfLoop While index <> -1Next' We are done, update the page recognized resultsocrPage.SetRecognizedCharacters(pageCharacters)' Save this new resultsocrDocument.Save(pdfFileName2, DocumentFormat.Pdf, Nothing)End UsingEnd UsingEnd SubPrivate Shared Function FindWord(words As IList(Of OcrWord), value As String, ignoreCase As Boolean) As IntegerIf words Is Nothing OrElse words.Count = 0 ThenReturn -1End IfFor i As Integer = 0 To words.Count - 1If String.Compare(words(i).Value, value, ignoreCase) = 0 Then' Found itReturn iEnd IfNext' Not foundReturn -1End FunctionPublic NotInheritable Class LEAD_VARSPublic Const ImagesDir As String = "C:\Users\Public\Documents\LEADTOOLS Images"Public Const OcrAdvantageRuntimeDir As String = "C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime"End Class
|
Products |
Support |
Feedback: UpdateWord Method - Leadtools.Forms.Ocr |
Introduction |
Help Version 19.0.2017.6.6
|

Raster .NET | C API | C++ Class Library | JavaScript HTML5
Document .NET | C API | C++ Class Library | JavaScript HTML5
Medical .NET | C API | C++ Class Library | JavaScript HTML5
Medical Web Viewer .NET
Your email has been sent to support! Someone should be in touch! If your matter is urgent please come back into chat.
Chat Hours:
Monday - Friday, 8:30am to 6pm ET
Thank you for your feedback!
Please fill out the form again to start a new chat.
All agents are currently offline.
Chat Hours:
Monday - Friday
8:30AM - 6PM EST
To contact us please fill out this form and we will contact you via email.