Optional OCR engine instance to use when extracting text.
public IOcrEngine OcrEngine { get; set; }
public:
property IOcrEngine^ OcrEngine
{
IOcrEngine^ get()
void set(IOcrEngine^ value)
}
public OcrEngine getOcrEngine()
public void setOcrEngine(OcrEngine value)
OcrEngine # get and set (DocumentText)
The optional OCR engine instance to use when extracting text. Default value is null.
This property is not used if the value of TextExtractionMode is DocumentTextExtractionMode.SvgOnly.
The text of the document can be extracted using OCR technologies, in this mode, a valid IOcrEngine instance must be initialized (created and started) and set in the OcrEngine property before DocumentPage.GetText is called.
OcrEngine can be setup with any extra options from outside (such as enable languages, spell checker, accuracy tradeoffs, etc.). The Document class will use the engine as is.
The Document object will not delete the value of OcrEngine when it is disposed. IOcrEngine is thread-safe by nature and the same instance can be set into multiple Document instances and used at the same time.
For more information, refer to Parsing Text with the Document Library.
Note that setting this property to a value will update the same value in each child document.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void DocumentTextExample()
{
var options = new LoadDocumentOptions();
using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "slice.tif"), options))
{
//for the TIF file we need an OCR engine
var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD);
var rasterCodecs = new RasterCodecs();
var documentWriter = new DocumentWriter();
ocrEngine.Startup(rasterCodecs, documentWriter, null, LEAD_VARS.OcrLEADRuntimeDir);
// DocumentText reference
document.Text.OcrEngine = ocrEngine;
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference)
DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode));
foreach (var modes in textExtractionModes)
{
Console.WriteLine($"Text extraction mode: {modes}");
}
// get text
var page = document.Pages[0];
var pageText = page.GetText();
if (pageText != null)
{
pageText.BuildText();
var text = pageText.Text;
Console.WriteLine(text);
}
else
{
Console.WriteLine("Failed!");
}
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";
}
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import org.junit.*;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
import static org.junit.Assert.*;
import leadtools.*;
import leadtools.annotations.engine.*;
import leadtools.barcode.*;
import leadtools.caching.*;
import leadtools.codecs.*;
import leadtools.document.*;
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler;
import leadtools.document.converter.*;
import leadtools.document.writer.*;
import leadtools.ocr.*;
public void documentTextExample() {
final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images";
final String OCR_LEAD_RUNTIME_DIR = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime";
LoadDocumentOptions options = new LoadDocumentOptions();
LEADDocument document = DocumentFactory.loadFromFile(combine(LEAD_VARS_IMAGES_DIR, "slice.tif"), options);
// for the TIF file we need an OCR engine
OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD);
RasterCodecs rasterCodecs = new RasterCodecs();
DocumentWriter documentWriter = new DocumentWriter();
ocrEngine.startup(rasterCodecs, documentWriter, null, OCR_LEAD_RUNTIME_DIR);
// DocumentText reference
document.getText().setOcrEngine(ocrEngine);
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode
// reference)
DocumentTextExtractionMode[] textExtractionModes = DocumentTextExtractionMode.values();
for (DocumentTextExtractionMode modes : textExtractionModes) {
System.out.println("Text extraction mode: " + modes);
}
// get text
DocumentPage page = document.getPages().get(0);
DocumentPageText pageText = page.getText();
if (pageText != null) {
pageText.buildText();
String text = pageText.getText();
System.out.println(text);
} else {
System.out.println("Failed!");
}
assertTrue(pageText != null);
}
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document
Your email has been sent to support! Someone should be in touch! If your matter is urgent please come back into chat.
Chat Hours:
Monday - Friday, 8:30am to 6pm ET
Thank you for your feedback!
Please fill out the form again to start a new chat.
All agents are currently offline.
Chat Hours:
Monday - Friday
8:30AM - 6PM EST
To contact us please fill out this form and we will contact you via email.