←Select platform

DocumentPageText Class

Summary

Contains the text characters and words found in a document page.

Syntax

C#
VB
C++
[SerializableAttribute()] 
[DataContractAttribute(Name="DocumentPageText")] 
public class DocumentPageText 
  
<SerializableAttribute()> 
Public Class DocumentPageText  
[SerializableAttribute()] 
public ref class DocumentPageText  

Remarks

The text of a document page can be read by using the DocumentObjectManager.ParsePageText method. The text characters found in the page will be set in the in Characters property of the returned DocumentPageText object.

The text words are created from the characters found in the document based on the DocumentCharacter.IsEndOfWord returned by document reader engine. Whenever an "end of word" is found, the last set of characters are grouped together and stored as an item in the DocumentPageText.Words list.

The overall text string (with no extra properties) can be obtained using the DocumentPageText.BuildText method.

Example

C#
VB
Public Sub DocumentPageTextExample() 
   Dim documentFileName As String 
   Using dlg As New OpenFileDialog() 
      If dlg.ShowDialog() <> System.Windows.Forms.DialogResult.OK Then 
         Return 
      End If 
 
      documentFileName = dlg.FileName 
   End Using 
 
   ' Load the document at 200 DPI 
   Dim loadOptions As New DocumentReaderLoadOptions() 
   loadOptions.Resolution = 200 
   Dim reader As DocumentReader = DocumentReader.Create(documentFileName, loadOptions) 
 
   ' If this is a Raster document such as TIFF or JPEG, we must use an OCR engine 
   Dim ocrEngine As IOcrEngine = Nothing 
 
   If reader.ReaderType = DocumentReaderType.Raster Then 
      ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False) 
      ocrEngine.Startup(Nothing, Nothing, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir) 
   End If 
 
   reader.ObjectManager.BeginParse(ocrEngine) 
 
   ' Get the text of the first page 
   Dim page As DocumentReaderPage = reader.Pages(0) 
   Dim pageText As DocumentPageText = reader.ObjectManager.ParsePageText(page) 
 
   ' Create the bitmap to draw the objects to 
   Using btmp As New Bitmap(page.PixelWidth, page.PixelHeight) 
      btmp.SetResolution(CType(page.DpiX, Single), CType(page.DpiY, Single)) 
      Using g As Graphics = Graphics.FromImage(btmp) 
         g.Clear(Color.White) 
 
         ' Render the objects 
 
         ' Text is a word at a time 
         Dim textRect As LogicalRectangle = LogicalRectangle.Empty 
         Dim textFontHeight As Double = 0 
         Dim textWord As New StringBuilder() 
 
         For Each character As DocumentCharacter In pageText.Characters 
            ' Add the text code and rects together 
            textWord.Append(character.Code) 
            If textRect.IsEmpty Then 
               textRect = character.Bounds 
            Else 
               textRect = LogicalRectangle.Union(textRect, character.Bounds) 
            End If 
 
            textFontHeight = Math.Max(textFontHeight, character.FontSize) 
 
            ' If this is the last object in a word, render it 
            If character.IsEndOfWord OrElse character.IsEndOfLine OrElse character.IsEndOfParagraph OrElse character.IsEndOfPage Then 
               RenderText(g, pageText, textWord.ToString(), textRect, character, textFontHeight) 
 
               textWord = New StringBuilder() 
               textRect = LogicalRectangle.Empty 
            End If 
         Next 
 
         ' Save the result as PNG 
         Using saveDlg As New SaveFileDialog() 
            saveDlg.Filter = "PNG files|*.png" 
            If saveDlg.ShowDialog() = System.Windows.Forms.DialogResult.OK Then 
               btmp.Save(saveDlg.FileName, System.Drawing.Imaging.ImageFormat.Png) 
            End If 
         End Using 
      End Using 
   End Using 
 
   reader.ObjectManager.EndParse() 
 
   If Not IsNothing(ocrEngine) Then 
      ocrEngine.Dispose() 
   End If 
 
   reader.Dispose() 
End Sub 
 
Private Shared Sub RenderText(ByVal g As Graphics, ByVal pageText As DocumentPageText, _ 
                              ByVal text As String, ByVal textRect As LogicalRectangle, _ 
                              ByVal character As DocumentCharacter, ByVal textFontHeight _ 
                              As Double) 
   ' Create the font 
   Dim font As DocumentFont = pageText.Fonts(character.FontIndex) 
   Dim faceName As String = font.FaceName 
   If String.IsNullOrEmpty(faceName) Then 
      ' Could be an embedded font, use Arial 
      faceName = "Arial" 
   End If 
 
   Dim fontStyle As FontStyle = fontStyle.Regular 
 
   If (font.FontStyle And DocumentFontStyle.Bold) = DocumentFontStyle.Bold Then 
      fontStyle = fontStyle Or fontStyle.Bold 
   End If 
 
   If (font.FontStyle And DocumentFontStyle.Italic) = DocumentFontStyle.Italic Then 
      fontStyle = fontStyle Or fontStyle.Italic 
   End If 
 
   If (font.FontStyle And DocumentFontStyle.Underline) = DocumentFontStyle.Underline Then 
      fontStyle = fontStyle Or fontStyle.Underline 
   End If 
 
   Using f As New Font(faceName, CType(textFontHeight * 72 / g.DpiY, Single), fontStyle) 
      Dim rect As New Rectangle(CType(textRect.X, Integer), CType(textRect.Y, Integer), _ 
                                CType(textRect.Width, Integer), CType(textRect.Height, _ 
                                Integer)) 
 
      Using sf As New StringFormat() 
         sf.Alignment = StringAlignment.Center 
         sf.LineAlignment = StringAlignment.Center 
         sf.FormatFlags = sf.FormatFlags Or StringFormatFlags.NoClip Or StringFormatFlags.NoWrap 
 
         g.DrawString(text, f, Brushes.Black, rect, sf) 
      End Using 
   End Using 
End Sub 
public void DocumentPageTextExample() 
{ 
   string documentFileName; 
   using(OpenFileDialog dlg = new OpenFileDialog()) 
   { 
      if(dlg.ShowDialog() != DialogResult.OK) 
      { 
         return; 
      } 
 
      documentFileName = dlg.FileName; 
   } 
 
   // Load the document at 200 DPI 
   DocumentReaderLoadOptions loadOptions = new DocumentReaderLoadOptions(); 
   loadOptions.Resolution = 200; 
   DocumentReader reader = DocumentReader.Create(documentFileName, loadOptions); 
 
   // If this is a Raster document such as TIFF or JPEG, we must use an OCR engine 
   IOcrEngine ocrEngine = null; 
 
   if(reader.ReaderType == DocumentReaderType.Raster) 
   { 
      ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false); 
      ocrEngine.Startup(null, null, null, LEAD_VARS.OcrAdvantageRuntimeDir); 
   } 
 
   reader.ObjectManager.BeginParse(ocrEngine); 
 
   // Get the text of the first page 
   DocumentReaderPage page = reader.Pages[0]; 
   DocumentPageText pageText = reader.ObjectManager.ParsePageText(page); 
 
   // Create the bitmap to draw the objects to 
   using(Bitmap btmp = new Bitmap(page.PixelWidth, page.PixelHeight)) 
   { 
      btmp.SetResolution((float)page.DpiX, (float)page.DpiY); 
      using(Graphics g = Graphics.FromImage(btmp)) 
      { 
         g.Clear(Color.White); 
 
         // Render the objects 
 
         // Text is a word at a time 
         LogicalRectangle textRect = LogicalRectangle.Empty; 
         double textFontHeight = 0; 
         StringBuilder textWord = new StringBuilder(); 
 
         foreach(DocumentCharacter character in pageText.Characters) 
         { 
            // Add the text code and rects together 
            textWord.Append(character.Code); 
            if(textRect.IsEmpty) 
            { 
               textRect = character.Bounds; 
            } 
            else 
            { 
               textRect = LogicalRectangle.Union(textRect, character.Bounds); 
            } 
 
            textFontHeight = Math.Max(textFontHeight, character.FontSize); 
 
            // If this is the last object in a word, render it 
            if(character.IsEndOfWord ||   character.IsEndOfLine ||   character.IsEndOfParagraph  
 
                ||   character.IsEndOfPage) 
            { 
               RenderText(g, pageText, textWord.ToString(), textRect, character, textFontHeight); 
 
               textWord = new StringBuilder(); 
               textRect = LogicalRectangle.Empty; 
            } 
         } 
 
         // Save the result as PNG 
         using(SaveFileDialog saveDlg = new SaveFileDialog()) 
         { 
            saveDlg.Filter = "PNG files|*.png"; 
            if(saveDlg.ShowDialog() == DialogResult.OK) 
            { 
               btmp.Save(saveDlg.FileName, System.Drawing.Imaging.ImageFormat.Png); 
            } 
         } 
      } 
   } 
 
   reader.ObjectManager.EndParse(); 
 
   if(ocrEngine != null) 
   { 
      ocrEngine.Dispose(); 
   } 
 
   reader.Dispose(); 
} 
 
private static void RenderText(Graphics g, DocumentPageText pageText, string text,  
                               LogicalRectangle textRect, DocumentCharacter character,  
                               double textFontHeight) 
{ 
   // Create the font 
   DocumentFont font = pageText.Fonts[character.FontIndex]; 
   string faceName = font.FaceName; 
   if(string.IsNullOrEmpty(faceName)) 
   { 
      // Could be an embedded font, use Arial 
      faceName = "Arial"; 
   } 
 
   FontStyle fontStyle = FontStyle.Regular; 
 
   if((font.FontStyle & DocumentFontStyle.Bold) == DocumentFontStyle.Bold) 
   { 
      fontStyle |= FontStyle.Bold; 
   } 
 
   if((font.FontStyle & DocumentFontStyle.Italic) == DocumentFontStyle.Italic) 
   { 
      fontStyle |= FontStyle.Italic; 
   } 
 
   if((font.FontStyle & DocumentFontStyle.Underline) == DocumentFontStyle.Underline) 
   { 
      fontStyle |= FontStyle.Underline; 
   } 
 
   using(Font f = new Font(faceName, (float)textFontHeight * 72 / g.DpiY, fontStyle)) 
   { 
      Rectangle rect = new Rectangle((int)textRect.X, (int)textRect.Y, (int)textRect.Width,  
                                     (int)textRect.Height); 
 
      using(StringFormat sf = new StringFormat()) 
      { 
         sf.Alignment = StringAlignment.Center; 
         sf.LineAlignment = StringAlignment.Center; 
         sf.FormatFlags |= StringFormatFlags.NoClip |   StringFormatFlags.NoWrap; 
 
         g.DrawString(text, f, Brushes.Black, rect, sf); 
      } 
   } 
} 

Requirements

Target Platforms: Windows 7, Windows Vista SP1 or later, Windows XP SP3, Windows Server 2008 (Server Core not supported), Windows Server 2008 R2 (Server Core supported with SP1 or later), Windows Server 2003 SP2

Help Version 19.0.2017.10.27
Products | Support | Contact Us | Copyright Notices
© 1991-2017 LEAD Technologies, Inc. All Rights Reserved.

Leadtools.Forms.DocumentReaders Assembly