OcrAutoRecognizeManagerJobOperation Enumeration

Summary

IOcrAutoRecognizeManager job operations.

Syntax

Objective-C

C++/CLI

Java

Python

public enum OcrAutoRecognizeManagerJobOperation

typedef NS_ENUM(NSInteger, LTOcrAutoRecognizeManagerJobOperation) { 
 LTOcrAutoRecognizeManagerJobOperationOther,  
 LTOcrAutoRecognizeManagerJobOperationCreateDocument,  
 LTOcrAutoRecognizeManagerJobOperationPrepareDocument,  
 LTOcrAutoRecognizeManagerJobOperationLoadPage,  
 LTOcrAutoRecognizeManagerJobOperationPreprocessPage,  
 LTOcrAutoRecognizeManagerJobOperationZonePage,  
 LTOcrAutoRecognizeManagerJobOperationRecognizePage,  
 LTOcrAutoRecognizeManagerJobOperationSavePage,  
 LTOcrAutoRecognizeManagerJobOperationAppendLtd,  
 LTOcrAutoRecognizeManagerJobOperationSaveDocument,  
 LTOcrAutoRecognizeManagerJobOperationConvertDocument 
};

public enum OcrAutoRecognizeManagerJobOperation

public enum class OcrAutoRecognizeManagerJobOperation

class OcrAutoRecognizeManagerJobOperation(Enum): 
   Other = 0 
   CreateDocument = 1 
   PrepareDocument = 2 
   LoadPage = 3 
   PreprocessPage = 4 
   ZonePage = 5 
   RecognizePage = 6 
   SavePage = 7 
   AppendLtd = 8 
   SaveDocument = 9 
   ConvertDocument = 10

Members

Value	Member	Description
0	Other	Other operation, such as unexpected and unrecoverable errors.
1	CreateDocument	The IOcrAutoRecognizeManager is creating a document with IOcrDocumentManager.CreateDocument.
2	PrepareDocument	Preparing the IOcrDocument, in certain situations, by clearing all the pages inside it.
3	LoadPage	Loading the image of a page using RasterCodecs.Load. The page is then created using IOcrEngine.CreatePage.
4	PreprocessPage	Preprocessing a page using IOcrPage.AutoPreprocess.
5	ZonePage	Zoning a page using IOcrPage.AutoZone or IOcrPage.LoadZone.Note that when using the IOcrAutoRecognizeManager.JobOperation event, you can use this operation to zone the page manually as shown in the OcrAutoRecognizeJobOperationEventArgs example.
6	RecognizePage	Recognizing a page using IOcrPage.Recognize.
7	SavePage	Saving a page using IOcrDocument.Save.
8	AppendLtd	Appending multiple LTD's using DocumentWriter.AppendLtd.
9	SaveDocument	Saving a document using IOcrDocument.Save.
10	ConvertDocument	Converting a document to its final format using DocumentWriter.Convert.

Remarks

Used as type for the following properties:

The OcrAutoRecognizeManagerJobError.Operation property to indicate the operation that cause the error.

You can set IOcrAutoRecognizeManager.JobErrorMode to OcrAutoRecognizeManagerJobErrorMode.Continue to log and continue when a none critical error occurs during the recognition process. The following errors are considered unrecoverable and the recognition will fail regardless on the current error mode: Other, CreateDocument, PrepareDocument, LoadPage, SavePage, AppendLtd, SaveDocument and Convert.
The OcrAutoRecognizeJobOperationEventArgs.Operation property to indicate the operation being run.

IOcrAutoRecognizeManager allows you to modify the raster image, OCR page or OCR document during some parts of the operation. Refer to OcrAutoRecognizeJobOperationEventArgs.PageImage for more information an example.

The OcrAutoRecognizeJobOperationEventArgs.Status property can be set to OcrAutoRecognizeJobStatus.Abort inside the event handler to abort the current operation. Aborting the operation will cause the whole job to be canceled. The only exception is with LoadPage. If Status is set to Abort, then the page will be skipped and not added to the final document. The following code performs custom processing (in this example, blank page detection) to skip specific pages from being added to the final document:

// Our IOcrAutoRecognizeManager.JobOperation event handler 
EventHandler<OcrAutoRecognizeJobOperationEventArgs> jobOperation = (object sender, OcrAutoRecognizeJobOperationEventArgs e) => 
{ 
   if (e.Operation == OcrAutoRecognizeManagerJobOperation.LoadPage && !e.PostOperation) 
   { 
      // IOcrPage has been created with an image loaded, check if it is empty 
      // Get the image 
      using (RasterImage image = e.Page.GetRasterImage(OcrPageType.Original)) 
      { 
         // Run the blank bage detector command 
         var blankPageDetector = new BlankPageDetectorCommand(BlankPageDetectorCommandFlags.None, 0, 0, 0, 0); 
         blankPageDetector.Run(image); 
         if (blankPageDetector.IsBlank) 
         { 
            // The image is blank, inform the OCR auto recognize manager to skip it 
            e.Status = OcrAutoRecognizeManagerJobStatus.Abort; 
         } 
      } 
   } 
}; 
             
IOcrAutoRecognizeManager ocrAutoRecognizeManager = ocrEngine.AutoRecognizeManager; 
             
// Create the job 
var jobData = new OcrAutoRecognizeJobData(inputFile, DocumentFormat.Pdf, outputFile); 
IOcrAutoRecognizeJob ocrJob = ocrAutoRecognizeManager.CreateJob(jobData); 
             
// Add our event 
ocrAutoRecognizeManager.JobOperation += jobOperation; 
// Run the job 
ocrAutoRecognizeManager.RunJob(ocrJob); 
ocrAutoRecognizeManager.JobOperation -= jobOperation;

Example

Java

using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Ocr; 
using Leadtools.Document.Writer; 
using Leadtools.Forms.Common; 
using Leadtools.WinForms; 
 
public class RunJobExample 
{ 
   // Number of documents that are pending 
   private int _documentsPending; 
   // Event to trigger when all documents are finished 
   private AutoResetEvent _allDocumentsFinishedEvent; 
 
      public void Start() 
   { 
      string imagesDirectory = LEAD_VARS.ImagesDir; 
      string documentsDirectory = Path.Combine(LEAD_VARS.ImagesDir, "RunJobExample"); 
 
      // Create the output (documents) directory 
      if (!Directory.Exists(documentsDirectory)) 
      { 
         Directory.CreateDirectory(documentsDirectory); 
      } 
 
      // Get all TIF files in input (images) directory 
      string[] imageFileNames = Directory.GetFiles(imagesDirectory, "*.tif"); 
      if (imageFileNames.Length == 0) 
      { 
         Console.WriteLine("No images to OCR"); 
         return; 
      } 
 
      // Create a new OCR engine instance 
      OcrEngineType engineType = OcrEngineType.LEAD; 
      Console.WriteLine(string.Format("Starting up {0} engine", engineType)); 
      using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(engineType)) 
      { 
         ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir); 
 
         // Setup document PDF save options: Image/Text with CCITT G4 encoding for B/W 
         DocumentWriter docWriter = ocrEngine.DocumentWriterInstance; 
         PdfDocumentOptions pdfOptions = docWriter.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions; 
         pdfOptions.ImageOverText = true; 
         pdfOptions.DocumentType = PdfDocumentType.Pdf; 
         pdfOptions.FontEmbedMode = DocumentFontEmbedMode.None; 
         pdfOptions.OneBitImageCompression = OneBitImageCompressionType.FaxG4; 
         docWriter.SetOptions(DocumentFormat.Pdf, pdfOptions); 
 
         // We are going to use multiple-threads, so disable threading in 
         // IOcrAutoRecognizeManager 
         IOcrAutoRecognizeManager autoRecognizeManager = ocrEngine.AutoRecognizeManager; 
         autoRecognizeManager.MaximumThreadsPerJob = 1; 
 
         // Tell the recognize manager to continue on errors 
         autoRecognizeManager.JobErrorMode = OcrAutoRecognizeManagerJobErrorMode.Continue; 
 
         // Instead of using events to trigger when documents are done, 
         // we will use the JobCompleted events of IOcrAutoRecognizeManager 
         // to decrement a counter and trigger one event when the counter reaches 0 
         autoRecognizeManager.JobStarted += new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobStarted); 
         autoRecognizeManager.JobCompleted += new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobCompleted); 
 
         int count = imageFileNames.Length; 
         _documentsPending = count; 
         _allDocumentsFinishedEvent = new AutoResetEvent(false); 
 
         for (int i = 0; i < count; i++) 
         { 
            // Create the job data 
            string imageFileName = imageFileNames[i]; 
            string name = "Document " + (i + 1).ToString(); 
            Console.WriteLine("Queuing {0} file {1}", name, imageFileName); 
 
            JobData data = new JobData(); 
            data.AutoRecognizeManager = autoRecognizeManager; 
            data.ImageFileName = imageFileName; 
            data.DocumentFileName = Path.Combine(documentsDirectory, Path.GetFileNameWithoutExtension(imageFileName) + ".pdf"); 
            data.JobName = name; 
 
            // Queue this job 
            ThreadPool.QueueUserWorkItem(new WaitCallback(RunJob), data); 
         } 
 
         // Wait for all documents to finish 
         _allDocumentsFinishedEvent.WaitOne(); 
         _allDocumentsFinishedEvent.Close(); 
 
         autoRecognizeManager.JobStarted -= new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobStarted); 
         autoRecognizeManager.JobCompleted -= new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobCompleted); 
 
         Console.WriteLine("All documents finished, check the result files in {0}", documentsDirectory); 
      } 
   } 
   private void autoRecognizeManager_JobStarted(object sender, OcrAutoRecognizeRunJobEventArgs e) 
   { 
      // This is not strictly needed in this example, we will 
      // use it to show information 
      Console.WriteLine("{0} started...", e.Job.JobData.JobName); 
 
      // Check if we need to abort 
      if (AbortJobs(e.Job)) 
      { 
         // Yes, abort all jobs 
         e.Job.AutoRecognizeManager.AbortAllJobs(); 
      } 
   } 
 
   private void autoRecognizeManager_JobCompleted(object sender, OcrAutoRecognizeRunJobEventArgs e) 
   { 
      string message = string.Format("{0} completed ", e.Job.JobData.JobName); 
 
      IOcrAutoRecognizeJob job = e.Job; 
 
      // Show any errors 
      if (job.Errors.Count == 0) 
      { 
         message += "successfully..."; 
      } 
      else 
      { 
         message += "with errors, first error is " + job.Errors[0].Exception.Message; 
 
         // And save the errors to a text file in the document directory 
         string documentFileName = job.JobData.DocumentFileName; 
         string textPathName = Path.Combine(Path.GetDirectoryName(documentFileName), Path.GetFileNameWithoutExtension(documentFileName) + "_errors.txt"); 
         using (StreamWriter writer = File.CreateText(textPathName)) 
         { 
            writer.WriteLine(job.JobData.JobName); 
            writer.WriteLine("Data:"); 
            writer.WriteLine(" Image file name: " + job.JobData.ImageFileName); 
            writer.WriteLine(" First page number: " + job.JobData.FirstPageNumber); 
            writer.WriteLine(" Last page number: " + job.JobData.LastPageNumber); 
            writer.WriteLine(" Format:" + job.JobData.Format); 
            writer.WriteLine(" Document file name: " + job.JobData.DocumentFileName); 
            writer.WriteLine("Errors:"); 
 
            foreach (OcrAutoRecognizeManagerJobError error in job.Errors) 
            { 
               writer.WriteLine(" Page: {0} during {1}. Error: {2}", error.ImagePageNumber, error.Operation, error.Exception.Message); 
            } 
         } 
      } 
 
      Console.WriteLine(message); 
 
      // Decrement the documents count, when we reach 0, we are done 
      // Since this will be called from multiple threads, we need 
      // to use a thread-safety procedure 
      int pending = Interlocked.Decrement(ref _documentsPending); 
 
      // If we are the last document, wait up main thread 
      if (pending == 0) 
      { 
         _allDocumentsFinishedEvent.Set(); 
      } 
   } 
 
   private class JobData 
   { 
      public IOcrAutoRecognizeManager AutoRecognizeManager; 
      public string ImageFileName; 
      public string DocumentFileName; 
      public string JobName; 
   } 
 
   private void RunJob(object state) 
   { 
      JobData data = state as JobData; 
 
      Console.WriteLine("Running {0}", data.JobName); 
 
      // Run it 
      OcrAutoRecognizeJobData jobData = new OcrAutoRecognizeJobData(data.ImageFileName, DocumentFormat.Pdf, data.DocumentFileName); 
      jobData.JobName = data.JobName; 
      IOcrAutoRecognizeJob job = data.AutoRecognizeManager.CreateJob(jobData); 
      data.AutoRecognizeManager.RunJob(job); 
   } 
 
   private bool AbortJobs(IOcrAutoRecognizeJob ocrJob) 
   { 
      // In your application, you can check if abortion is required, for example, if the user 
      // has pressed the Cancel button on a progress bar or if your service is shutting down. 
 
      // In this example, we will never abort, but you can change this code to return true 
      // upon any condition (or when a specific job is about to start) 
      // and the engine will abort all current and pending jobs 
      return false; 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images"; 
   public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime"; 
}

 
import java.io.File; 
import java.io.FileNotFoundException; 
import java.io.FileWriter; 
import java.io.FilenameFilter; 
import java.io.IOException; 
import java.nio.file.Files; 
import java.nio.file.Path; 
import java.nio.file.Paths; 
import java.util.ArrayList; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.atomic.AtomicInteger; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.document.writer.*; 
import leadtools.internal.AutoResetEvent; 
import leadtools.ocr.*; 
 
 
// Number of documents that are pending 
private int _documentsPending; 
// Event to trigger when all documents are finished 
private AutoResetEvent _allDocumentsFinishedEvent; 
// Thread usage 
private final static AtomicInteger at = new AtomicInteger(); 
 
public void OcrAutoRecognizeManagerRunJobExample() throws IOException { 
   String LEAD_VARS_ImagesDir = "C:\\LEADTOOLS23\\Resources\\Images"; 
   String LEAD_VARS_OcrLEADRuntimeDir = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime"; 
   String docsDir = combine(LEAD_VARS_ImagesDir, "RunJobExample"); 
   String imageDir = LEAD_VARS_ImagesDir; 
 
   // Create the output (documents) directory 
   Path docsPath = Paths.get(docsDir); 
   Files.createDirectories(docsPath); 
 
   // Get all TIF files in input (images) directory 
   Path imagePath = Paths.get(imageDir); 
   Files.createDirectories(imagePath); 
 
   FilenameFilter tifFileFilter = (d, s) -> { 
      return s.toLowerCase().endsWith(".tif"); 
   }; 
 
   File imageFolder = new File(imageDir); 
   String[] imageFileNames = imageFolder.list(tifFileFilter); 
   if (imageFileNames.length == 0) { 
      System.out.println("No images to OCR"); 
      return; 
   } 
 
   // Create a new OCR engine instance 
   OcrEngineType engineType = OcrEngineType.LEAD; 
   System.out.println("Starting up " + engineType + " engine"); 
   OcrEngine ocrEngine = OcrEngineManager.createEngine(engineType); 
 
   ocrEngine.startup(null, null, null, LEAD_VARS_OcrLEADRuntimeDir); 
 
   // Setup document PDF save options: Image/Text with CCITT G4 encoding for B/W 
   DocumentWriter docWriter = ocrEngine.getDocumentWriterInstance(); 
   PdfDocumentOptions pdfOptions = (PdfDocumentOptions) docWriter.getOptions(DocumentFormat.PDF); 
   pdfOptions.setImageOverText(true); 
   pdfOptions.setDocumentType(PdfDocumentType.PDF); 
   pdfOptions.setFontEmbedMode(DocumentFontEmbedMode.NONE); 
   pdfOptions.setOneBitImageCompression(OneBitImageCompressionType.FAX_G4); 
   docWriter.setOptions(DocumentFormat.PDF, pdfOptions); 
 
   // We are going to use multiple-threads, so disable threading in IOcrAutoRecognizeManager 
   OcrAutoRecognizeManager autoRecognizeManager = ocrEngine.getAutoRecognizeManager(); 
   autoRecognizeManager.setMaximumThreadsPerJob(1); 
 
   // Tell the recognize manager to continue on errors 
   autoRecognizeManager.setJobErrorMode(OcrAutoRecognizeManagerJobErrorMode.CONTINUE); 
 
   // Instead of using events to trigger when documents are done, 
   // we will use the JobCompleted events of IOcrAutoRecognizeManager 
   // to decrement a counter and trigger one event when the counter reaches 0 
   autoRecognizeManager.addJobStartedListener(autoRecognizeManager_JobStarted); 
   autoRecognizeManager.addJobCompletedListener(autoRecognizeManager_JobCompleted); 
   int count = imageFileNames.length; 
   _documentsPending = count; 
   at.set(_documentsPending); 
   _allDocumentsFinishedEvent = new AutoResetEvent(); 
 
   ExecutorService executorService = Executors.newFixedThreadPool(1); 
   System.out.println("Starting the threads and waiting..."); 
 
   for (int i = 0; i < count; i++) { 
      // Create the job data 
      String imageFileName = imageFileNames[i]; 
      String name = "Document " + (i + 1); 
      System.out.println("Queuing " + name + " file " + imageFileName); 
 
      JobData data = new JobData(); 
      data.AutoRecognizeManager = autoRecognizeManager; 
      data.ImageFileName = combine(LEAD_VARS_ImagesDir, imageFileName); 
      data.DocumentFileName = combine(docsDir,imageFileName.substring(0, imageFileName.indexOf(".")) + ".pdf"); 
      data.JobName = name; 
      File dataFile = new File(combine(docsDir,imageFileName.substring(0, imageFileName.indexOf(".")) + ".pdf")); 
      if (!dataFile.exists()) dataFile.createNewFile(); 
 
      Runnable runnableTask = new Runnable(){ 
 
         @Override 
         public void run(){ 
            RunJob(data); 
         } 
 
      }; 
  
      executorService.submit(runnableTask); 
   } 
 
   // Wait for all documents to finish 
   _allDocumentsFinishedEvent.waitOne(); 
   _allDocumentsFinishedEvent.close(); 
 
   System.out.println("All documents finished, check the result files in " + docsDir); 
   ocrEngine.dispose(); 
} 
 
OcrAutoRecognizeRunJobListener autoRecognizeManager_JobStarted = new OcrAutoRecognizeRunJobListener(){ 
 
   @Override public void onJob(OcrAutoRecognizeRunJobEvent e) { 
      // This is not strictly needed in this example, we will 
      // use it to show information 
      System.out.println(e.getJob().getJobData().getJobName()+" started..."); 
 
      // Check if we need to abort 
      if(AbortJobs(e.getJob())){ 
      // Yes, abort all jobs 
      e.getJob().getAutoRecognizeManager().abortAllJobs();} 
   } 
 
}; 
 
OcrAutoRecognizeRunJobListener autoRecognizeManager_JobCompleted = new OcrAutoRecognizeRunJobListener() { 
 
   @Override 
   public void onJob(OcrAutoRecognizeRunJobEvent e) { 
      OcrAutoRecognizeJob job = e.getJob(); 
      String message = job.getJobData().getJobName() + " completed "; 
 
      // Show any errors 
      if (job.getErrors().size()== 0) { 
         message += "successfully..."; 
      } 
      else { 
         message += " with errors, first error is " + job.getErrors().get(0).getException().getMessage(); 
 
         // And save the errors to a text file in the document directory 
         String documentFileName = job.getJobData().getDocumentFileName(); 
         File doc = new File(documentFileName); 
         String textPathName = combine(doc.getParent(), documentFileName.substring(0,documentFileName.indexOf(".")) + "_errors.txt"); 
 
         try (FileWriter writer = new FileWriter(textPathName)) { 
            writer.write(job.getJobData().getJobName()); 
            writer.write("Data:"+"\n"); 
            writer.write(" Image file name: " + job.getJobData().getImageFileName()+"\n"); 
            writer.write(" First page number: " + job.getJobData().getFirstPageNumber()+"\n"); 
            writer.write(" Last page number: " + job.getJobData().getLastPageNumber()+"\n"); 
            writer.write(" Format:" + job.getJobData().getFormat()+"\n"); 
            writer.write(" Document file name: " + job.getJobData().getDocumentFileName()+"\n"); 
            writer.write("Errors:"+"\n"); 
                
            for (OcrAutoRecognizeManagerJobError error : job.getErrors()) 
            { 
               writer.write(" Page: "+ error.getImagePageNumber() + " during " + error.getOperation() + ". Error: " + error.getException().getMessage() + "\n"); 
            } 
            writer.close(); 
         } catch (IOException e1) { 
            e1.printStackTrace(); 
         } 
      } 
 
      System.out.println(message); 
 
      // Decrement the documents count, when we reach 0, we are done 
      // Since this will be called from multiple threads, we need 
      // to use a thread-safety procedure 
      int pending = at.decrementAndGet(); 
      System.out.println(pending); 
          
      // If we are the last document, wait up main thread 
      if (pending == 0) 
         _allDocumentsFinishedEvent.set(); 
   } 
 
}; 
 
class JobData { 
   public OcrAutoRecognizeManager AutoRecognizeManager; 
   public String ImageFileName; 
   public String DocumentFileName; 
   public String JobName; 
} 
 
private void RunJob(JobData state) { 
   JobData data = state; 
   System.out.println("Running " + data.JobName); 
 
   // Run it 
   OcrAutoRecognizeJobData jobData = new OcrAutoRecognizeJobData( 
      data.ImageFileName,  
      DocumentFormat.PDF, 
      data.DocumentFileName 
   ); 
   jobData.setJobName(data.JobName); 
   OcrAutoRecognizeJob job = data.AutoRecognizeManager.createJob(jobData); 
   data.AutoRecognizeManager.runJob(job); 
} 
 
private boolean AbortJobs(OcrAutoRecognizeJob ocrJob) { 
   // In your application, you can check if abortion is required, for example, if the user 
   // has pressed the Cancel button on a progress bar or if your service is shutting down. 
 
   // In this example, we will never abort, but you can change this code to return true 
   // upon any condition (or when a specific job is about to start) 
   // and the engine will abort all current and pending jobs 
   return false; 
} 
 
public String combine(String path1, String path2) { 
   File file = new File(path1, path2); 
   return file.getPath(); 
}

Requirements

Target Platforms

Reference

Leadtools.Ocr Namespace

Programming with the LEADTOOLS .NET OCR

Download our FREE evaluation

Help Version 23.0.2024.4.19

Leadtools.Ocr Assembly

Introduction

Getting Started

Namespaces

Leadtools.Ocr Namespace

Assemblies