←Select platform

AutoParseLinks Property

Summary

Indicates whether to parse links from the text of the pages.

Syntax
C#
C++/CLI
Java
Python
public bool AutoParseLinks { get; set; } 
public:  
   property bool AutoParseLinks 
   { 
      bool get() 
      void set(bool value) 
   } 
public boolean getAutoParseLinks() 
public void setAutoParseLinks(boolean value) 
AutoParseLinks # get and set (DocumentText) 

Property Value

true to parse links from the text of the pages; otherwise, false. Default value is true.

Remarks

The document class supports automatic parsing of two types of links

  • Page links: These are links that are found in the original document and they contain a hot spot area that when clicked, invoke a target; usually, a jump to a different location or page in the document. These links have a value of DocumentLink.LinkType set to DocumentLinkType.TargetPage.

  • Hyperlinks: These are links that are found in the text of each page. The links are parsed when the text is first obtained and they contain the text bounding box, the value of the link (the text itself) and have a value of LinkType set to DocumentLinkType.Value.

The page links are parsed from the original document when DocumentStructure.Parse is called and the value of DocumentStructure.ParsePageLinks is set to true. Any links found are stored inside the page and can be retrieved using DocumentPage.GetLinks.

The hyperlinks are parsed the first time DocumentPage.GetText is called and the value of DocumentText.AutoParseLinks is set to true (the default value). The document object will use the regular expressions stored in LinkPatterns to find any matches in the text of the page with each match added as a link of type DocumentLinkType.Value to the page. These can also be retrieved with DocumentPage.GetLinks after GetText returns.

The link parsing is performed by calling ParseLinks on the page using the regular expressions stored in LinkPatterns.

If the value of AutoParseLinks is false then no automatic parsing for links will occur.

Note that setting this property to a value will update the same value in each child document.

Example
C#
Java
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public void DocumentPageGetLinksExample() 
{ 
   var cache = GetCache(); 
   var options = new LoadDocumentOptions(); 
   options.Cache = cache; 
   using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.pdf"), options)) 
   { 
      document.IsReadOnly = false; 
      // Show the links before parsing the URL in the text 
      Console.WriteLine("Before get text"); 
      Console.WriteLine("---------"); 
      var page = document.Pages[0]; 
      page.SetLinks(page.GetLinks()); 
      ShowLinks(page); 
 
      // Get all of the DocumentPageFitTypes 
      DocumentPageFitType[] pageFitType = (DocumentPageFitType[])Enum.GetValues(typeof(DocumentPageFitType)); 
      foreach (var type in pageFitType) 
      { 
         Console.WriteLine($"Page fit type: {type}"); 
      } 
 
      // Make sure we will parse the hyper links 
      // DocumentText reference 
      document.Text.AutoParseLinks = true; 
      document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto; 
 
      // Show the regular expressions 
      Console.WriteLine("Parsing links from the text using these regular expressions:"); 
      foreach (var regex in DocumentText.LinkPatterns) 
      { 
         Console.WriteLine(regex.ToString()); 
      } 
 
      // Now, get the text to parse the links from it 
      page.GetText(); 
 
      page.IsLinksModified = false; 
 
      // Show the links before parsing the URL in the text. It should now show the original plus any parsed URLs from the text 
      Console.WriteLine("After get text"); 
      Console.WriteLine("---------"); 
      ShowLinks(page); 
   } 
} 
 
private static void ShowLinks(Leadtools.Document.DocumentPage page) 
{ 
   // DocumentLink reference 
   var links = page.GetLinks(); 
   if (links != null) 
   { 
      int index = 0; 
      Console.WriteLine("Page " + page.PageNumber); 
      foreach (var link in links) 
      { 
         Console.WriteLine(index++); 
         Console.WriteLine("  Bounds:" + link.Bounds); 
         Console.WriteLine("  LinkType:" + link.LinkType); 
         if (link.LinkType == DocumentLinkType.Value) 
         { 
            Console.WriteLine("  Value:" + link.Value); 
         } 
         else 
         { 
            // DocumentLinkTarget reference 
            Console.WriteLine("  Target.PageFitType:" + link.Target.PageFitType); 
            Console.WriteLine("  Target.PageNumber:" + link.Target.PageNumber); 
            Console.WriteLine("  Target.Position:" + link.Target.Position); 
            Console.WriteLine("  Target.ZoomPercent:" + link.Target.ZoomPercent); 
         } 
         Console.WriteLine(); 
      } 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images"; 
} 
 
import java.io.File; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.net.MalformedURLException; 
import java.net.URI; 
import java.net.URISyntaxException; 
import java.net.URL; 
import java.nio.file.Files; 
import java.nio.file.Paths; 
import java.util.ArrayList; 
import java.util.Calendar; 
import java.util.List; 
import java.util.concurrent.Callable; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
import java.util.regex.Pattern; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.annotations.engine.*; 
import leadtools.barcode.*; 
import leadtools.caching.*; 
import leadtools.codecs.*; 
import leadtools.document.*; 
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler; 
import leadtools.document.converter.*; 
import leadtools.document.writer.*; 
import leadtools.ocr.*; 
 
 
public void documentPageGetLinksExample() { 
   final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images"; 
   FileCache cache = getCache(); 
   LoadDocumentOptions options = new LoadDocumentOptions(); 
   options.setCache(cache); 
   LEADDocument document = DocumentFactory.loadFromFile(combine(LEAD_VARS_IMAGES_DIR, "Leadtools.pdf"), options); 
   document.setReadOnly(false); 
   // Show the links before parsing the URL in the text 
   System.out.println("Before get text"); 
   System.out.println("---------"); 
   DocumentPage page = document.getPages().get(0); 
   page.setLinks(page.getLinks()); 
   showLinks(page); 
 
   // Get all of the DocumentPageFitTypes 
   DocumentPageFitType[] pageFitType = DocumentPageFitType.values(); 
   for (DocumentPageFitType type : pageFitType) { 
      System.out.println("Page fit type: " + type); 
   } 
 
   // Make sure we will parse the hyper links 
   // DocumentText reference 
   document.getText().setAutoParseLinks(true); 
   document.getText().setTextExtractionMode(DocumentTextExtractionMode.AUTO); 
 
   // Show the regular expressions 
   System.out.println("Parsing links from the text using these regular expressions:"); 
   for (Pattern regex : DocumentText.getLinkPatterns()) { 
      System.out.println(regex.toString()); 
   } 
 
   // Now, get the text to parse the links from it 
   page.getText(); 
 
   page.setLinkedModified(false); 
 
   // Show the links before parsing the URL in the text. It should now show the 
   // original plus any parsed URLs from the text 
   System.out.println("After get text"); 
   System.out.println("---------"); 
   showLinks(page); 
   assertTrue(page.getLinks() != null); 
} 
 
private void showLinks(DocumentPage page) { 
   // DocumentLink reference 
   DocumentLink[] links = page.getLinks(); 
   if (links != null) { 
      int index = 0; 
      System.out.println("Page " + page.getPageNumber()); 
      for (DocumentLink link : links) { 
         System.out.println("Index: " + index++); 
         System.out.println("  Bounds:" + link.getBounds()); 
         System.out.println("  LinkType:" + link.getLinkType()); 
         if (link.getLinkType() == DocumentLinkType.VALUE) { 
            System.out.println("  Value:" + link.getValue()); 
         } else { 
            // DocumentLinkTarget reference 
            System.out.println("  Target.PageFitType:" + link.getTarget().getPageFitType()); 
            System.out.println("  Target.PageNumber:" + link.getTarget().getPageNumber()); 
            System.out.println("  Target.Position:" + link.getTarget().getPosition()); 
            System.out.println("  Target.ZoomPercent:" + link.getTarget().getZoomPercent()); 
         } 
      } 
   } 
} 
Requirements

Target Platforms

Help Version 23.0.2024.2.29
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2024 LEAD Technologies, Inc. All Rights Reserved.

Leadtools.Document Assembly

Products | Support | Contact Us | Intellectual Property Notices
© 1991-2023 LEAD Technologies, Inc. All Rights Reserved.