←Select platform

DocumentText Class

Summary

Manages the text options of the document.

Syntax
C#
VB
C++
Java
[DataContractAttribute()] 
public class DocumentText 
<DataContractAttribute()>  
Public Class DocumentText 
public [DataContractAttribute] 
   ref class DocumentText 
public class DocumentText implements Serializable 

Remarks

DocumentText manages the text of the document and can be accessed through the Text property of LEADDocument.

The text of a document page can be extracted using the DocumentPage.GetText method. This will return a DocumentPageText instance that contains the text characters found in the page with location and size properties. Furthermore, DocumentPageText supports building the words or the text as string for easy processing.

The framework can use either SVG or OCR technologies to extract the text data. Which method to use can be controlled using the TextExtractionMode property.

For more information, refer to Parsing Text with the Document Library.

Example
C#
VB
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
using Leadtools.Svg; 
using LeadtoolsExamples.Common; 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public static void DocumentTextExample() 
{ 
   var options = new LoadDocumentOptions(); 
   using (var document = DocumentFactory.LoadFromFile(Path.Combine(ImagesPath.Path, "Leadtools.tif"), options)) 
   { 
      //for the TIF file we need an OCR engine 
      var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD, false); 
      var rasterCodecs = new RasterCodecs(); 
      var documentWriter = new DocumentWriter(); 
      ocrEngine.Startup(rasterCodecs, documentWriter, null, LEAD_VARS.OcrLEADRuntimeDir); 
 
      document.Text.OcrEngine = ocrEngine; 
 
      // get text  
      var page = document.Pages[0]; 
      var pageText = page.GetText(); 
      if (pageText != null) 
      { 
         pageText.BuildText(); 
         var text = pageText.Text; 
 
         Console.WriteLine(text); 
      } 
      else 
      { 
         Console.WriteLine("Failed!"); 
      } 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS 20\Bin\Common\OcrLEADRuntime"; 
} 
Imports Leadtools 
Imports Leadtools.Codecs 
Imports Leadtools.Document.Writer 
Imports Leadtools.Svg 
Imports Leadtools.Document 
Imports Leadtools.Caching 
Imports Leadtools.Annotations.Engine 
Imports Leadtools.Barcode 
Imports Leadtools.Ocr 
Imports LeadtoolsDocumentExamples.LeadtoolsExamples.Common 
Imports Leadtools.Document.Converter 
 
Public Shared Sub DocumentTextExample() 
   Dim options As New LoadDocumentOptions() 
   Using document As Leadtools.Document.LEADDocument = DocumentFactory.LoadFromFile(Path.Combine(ImagesPath.Path, "Leadtools.tif"), options) 
      'for the TIF file we need an OCR engine 
      Dim ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD, False) 
      Dim rasterCodecs As New RasterCodecs() 
      Dim documentWriter As New DocumentWriter() 
      ocrEngine.Startup(rasterCodecs, documentWriter, Nothing, LEAD_VARS.OcrLEADRuntimeDir) 
 
      document.Text.OcrEngine = ocrEngine 
 
      ' get text  
      Dim page As Leadtools.Document.DocumentPage = document.Pages(0) 
      Dim pageText As DocumentPageText = page.GetText() 
      If Not pageText Is Nothing Then 
         pageText.BuildText() 
         Dim text As String = pageText.Text 
 
         Console.WriteLine(text) 
      Else 
         Console.WriteLine("Failed!") 
      End If 
   End Using 
End Sub 
 
Public NotInheritable Class LEAD_VARS 
   Public Const OcrLEADRuntimeDir As String = "C:\LEADTOOLS 20\Bin\Common\OcrLEADRuntime" 
End Class 

Requirements

Target Platforms

Help Version 20.0.2020.4.3
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2020 LEAD Technologies, Inc. All Rights Reserved.

Leadtools.Document Assembly