←Select platform

DocumentManager Property

Summary
Allows you to create new OCR documents that are used to create final documents such as PDF or Microsoft Word from the recognition results.
Syntax
C#
Objective-C
C++/CLI
Java
Python
public IOcrDocumentManager DocumentManager { get; } 
@property (nonatomic, strong, readonly) LTOcrDocumentManager *documentManager 
public OcrDocumentManager getDocumentManager() 
property IOcrDocumentManager^ DocumentManager { 
   IOcrDocumentManager^ get(); 
} 
DocumentManager # get  (IOcrEngine) 

Property Value

The instance of the IOcrDocumentManager object that allow you to create final documents such as PDF or Microsoft Word from the recognition results.

Remarks

The DocumentManager allows you to do the following:

  • Create instance of IOcrDocument objects that encapsulate an OCR'ed document. Each IOcrDocument contains an IOcrDocument.Pages property that is an implementation of standard .NET collection of IOcrPage objects. Use this member to add, remove or update image (raster) pages in the OCR document. Pages can be image files on disk, memory or even in a remote URL. Any file format supported by LEADTOOLS (TIFF, JPEG, BMP, etc) can be loaded into the OCR document. At any time, use the various IOcrPage methods to zone the page (or pages) and recognize the objects in them in preparation to saved as a document. For more information refer to IOcrDocument, IOcrPageCollection and IOcrPage. Once you are done with adding and preparing the pages, you can use the save methods of the IOcrDocument object to save the document into its final format. LEADTOOLS supports saving to various standard document formats such as PDF, Microsoft Word, HTML and several others. For more information, refer to IOcrDocumentManager, IOcrDocument and DocumentFormat.
  • Get or set the options to use when creating new documents such as the font names.

For more information, refer to IOcrDocumentManager.

You must call the Startup method before you can use the DocumentManager.

Example
C#
Java
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Ocr; 
using Leadtools.Forms.Common; 
using Leadtools.Document.Writer; 
using Leadtools.WinForms; 
 
public void OcrDocumentManagerExample() 
{ 
   string tifFileName1 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif"); 
   string tifFileName2 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr2.tif"); 
   string outputDirectory = Path.Combine(LEAD_VARS.ImagesDir, "OutputDirectory"); 
 
   // Create the output directory 
   if (Directory.Exists(outputDirectory)) 
      Directory.Delete(outputDirectory, true); 
   Directory.CreateDirectory(outputDirectory); 
 
   // Create an instance of the engine 
   using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)) 
   { 
      // Start the engine using default parameters 
      Console.WriteLine("Starting up the engine..."); 
      ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir); 
 
      // Create the OCR document 
      Console.WriteLine("Creating the OCR document..."); 
      IOcrDocumentManager ocrDocumentManager = ocrEngine.DocumentManager; 
      using (IOcrDocument ocrDocument = ocrDocumentManager.CreateDocument()) 
      { 
         // Add the pages to the document 
         Console.WriteLine("Adding the pages..."); 
         ocrDocument.Pages.AddPage(tifFileName1, null); 
         ocrDocument.Pages.AddPage(tifFileName2, null); 
 
         // Recognize the pages to this document. Note, we did not call AutoZone, it will explicitly be called by Recognize 
         Console.WriteLine("Recognizing all the pages..."); 
         ocrDocument.Pages.Recognize(null); 
 
         // Save to all the formats supported by this OCR engine 
         Array formats = Enum.GetValues(typeof(DocumentFormat)); 
         foreach (DocumentFormat format in formats) 
         { 
            string friendlyName = DocumentWriter.GetFormatFriendlyName(format); 
            Console.WriteLine("Saving (using default options) to {0}...", friendlyName); 
 
            // Construct the output file name (output_directory + document_format_name + . + extension) 
            string extension = DocumentWriter.GetFormatFileExtension(format); 
            string outputFileName = Path.Combine(outputDirectory, format.ToString() + "." + extension); 
 
            // Save the document 
            ocrDocument.Save(outputFileName, format, null); 
 
            // If this is the LTD format, convert it to PDF 
            if (format == DocumentFormat.Ltd) 
            { 
               Console.WriteLine("Converting the LTD file to PDF..."); 
               string pdfFileName = Path.Combine(outputDirectory, format.ToString() + "_pdf.pdf"); 
 
               DocumentWriter docWriter = ocrEngine.DocumentWriterInstance; 
               docWriter.Convert(outputFileName, pdfFileName, DocumentFormat.Pdf); 
            } 
         } 
 
         // Now save to all the engine native formats (if any) supported by the engine 
         string[] engineFormats = ocrDocumentManager.GetSupportedEngineFormats(); 
         foreach (string engineFormat in engineFormats) 
         { 
            string friendlyName = ocrDocumentManager.GetEngineFormatFriendlyName(engineFormat); 
            Console.WriteLine("Saving to engine native format {0}...", friendlyName); 
 
            // Construct the output file name (output_directory + "engine" + engine_format_name + . + extension) 
            string extension = ocrDocumentManager.GetEngineFormatFileExtension(engineFormat); 
            string outputFileName = Path.Combine(outputDirectory, "engine_" + engineFormat + "." + extension); 
 
            // To use this format, set it in the IOcrDocumentManager.EngineFormat and do a normal save using DocumentFormat.User 
 
            // Save the document 
            ocrDocumentManager.EngineFormat = engineFormat; 
            ocrDocument.Save(outputFileName, DocumentFormat.User, null); 
         } 
      } 
 
      // Shutdown the engine 
      // Note: calling Dispose will also automatically shutdown the engine if it has been started 
      Console.WriteLine("Shutting down..."); 
      ocrEngine.Shutdown(); 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images"; 
   public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime"; 
} 
 
import java.io.File; 
import java.io.IOException; 
import java.nio.file.Files; 
import java.nio.file.Path; 
import java.nio.file.Paths; 
 
import java.util.ArrayList; 
import java.util.List; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.assertTrue; 
 
import leadtools.*; 
import leadtools.codecs.*; 
import leadtools.document.writer.*; 
import leadtools.ocr.*; 
 
 
public void OcrDocumentManagerExample() throws IOException { 
   final var LEAD_VARS_ImagesDir = "C:\\LEADTOOLS23\\Resources\\Images"; 
   final String LEAD_VARS_OcrLEADRuntimeDir = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime"; 
   ILeadStream leadStream1 = LeadStreamFactory.create("C:\\LEADTOOLS23\\Resources\\Images\\Ocr1.tif"); 
   ILeadStream leadStream2 = LeadStreamFactory.create("C:\\LEADTOOLS23\\Resources\\Images\\Ocr2.tif"); 
   String outDir = combine(LEAD_VARS_ImagesDir, "OcrDocumentManagerOutput"); 
       
   // Create the output directory 
   Path outPath = Paths.get(outDir); 
   Files.createDirectories(outPath); 
 
   // Create an instance of the engine 
   var ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD); 
       
   // Start the engine using default parameters 
   System.out.println("Starting up the engine..."); 
   ocrEngine.startup(null, null, null, LEAD_VARS_OcrLEADRuntimeDir); 
 
   // Create the OCR document 
   System.out.println("Creating the OCR document..."); 
   OcrDocumentManager ocrDocumentManager = ocrEngine.getDocumentManager(); 
   OcrDocument ocrDocument = ocrDocumentManager.createDocument(); 
       
   // Add the pages to the document 
   System.out.println("Adding the pages..."); 
   ocrDocument.getPages().addPage(leadStream1, null); 
   ocrDocument.getPages().addPage(leadStream2, null); 
 
   // Recognize the pages to this document. Note, we did not call AutoZone, it will explicitly be called by Recognize 
   System.out.println("Recognizing all the pages..."); 
   ocrDocument.getPages().recognize(null); 
 
   // Save to all the formats supported by this OCR engine 
   // DocumentFormat[] formats = Enum.GetValues(DocumentFormat); 
   DocumentFormat[] formats = DocumentFormat.values(); 
   for(var format : formats) { 
      // USER & XLSX formats are not supported 
      if (format.equals(DocumentFormat.USER) || format.equals(DocumentFormat.XLSX)) 
         continue; 
      String friendlyName = DocumentWriter.getFormatFriendlyName(format); 
      System.out.printf("Saving (using default options) to %s...", friendlyName); 
 
      // Construct the output file name (output_directory + document_format_name + . + extension) 
      String extension = DocumentWriter.getFormatFileExtension(format); 
      String outputFileName = combine(outDir, format.toString() + "." + extension); 
      assertTrue((new File(outputFileName)).exists()); 
 
      // Save the document 
      ocrDocument.save(outputFileName, format, null); 
 
      // If this is the LTD format, convert it to PDF 
      if (format == DocumentFormat.LTD) 
      { 
         System.out.println("Converting the LTD file to PDF..."); 
         var pdfFileName = combine(outDir, format.toString() + "_pdf.pdf"); 
 
         var docWriter = ocrEngine.getDocumentWriterInstance(); 
         docWriter.convert(outputFileName, pdfFileName, DocumentFormat.PDF); 
      } 
   } 
 
   // Now save to all the engine native formats (if any) supported by the engine 
   List<String> engineFormats = ocrDocumentManager.getSupportedEngineFormats(); 
   for (var engineFormat : engineFormats) 
   { 
      var friendlyName = ocrDocumentManager.getEngineFormatFriendlyName(engineFormat); 
      System.out.printf("Saving to engine native format {0}...", friendlyName); 
 
      // Construct the output file name (output_directory + "engine" + engine_format_name + . + extension) 
      var extension = ocrDocumentManager.getEngineFormatFileExtension(engineFormat); 
      var outputFileName = combine(outDir, "engine_" + engineFormat + "." + extension); 
 
      // To use this format, set it in the IOcrDocumentManager.EngineFormat and do a normal save using DocumentFormat.User 
 
      // Save the document 
      ocrDocumentManager.setEngineFormat(engineFormat); 
      ocrDocument.save(outputFileName, DocumentFormat.USER, null); 
   } 
 
   // Shutdown the engine 
   // Note: calling Dispose will also automatically shutdown the engine if it has been started 
   System.out.println("Shutting down..."); 
   ocrEngine.dispose(); 
} 
Requirements

Target Platforms

Help Version 23.0.2024.3.3
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2024 LEAD Technologies, Inc. All Rights Reserved.

Leadtools.Ocr Assembly

Products | Support | Contact Us | Intellectual Property Notices
© 1991-2023 LEAD Technologies, Inc. All Rights Reserved.