Send comments on this topic. | Back to Introduction - All Topics | Help Version 15.12.21
Scan to Searchable PDF
Take the following steps to create and run a program to access a TWAIN source of your choice and to save a scanned document as a searchable PDF file.
  1. Start Visual Studio .NET.
  2. Choose File->New->Project... from the menu.
  3. In the New Project dialog box, choose either "Visual C# Projects" or "Visual Basic Projects" in the Projects Type List, and choose "Windows Application" in the Templates List.
  4. Type the project name as "ScanToSearchablePDF" in the Project Name field, and then choose OK. If desired, type a new location for your project or select a directory using the Browse button, and then choose OK.
  5. In the "Solution Explorer" window, right-click on the "References" folder, and select "Add Reference..." from the context menu. In the "Add Reference..." dialog box, select the ".NET" tab and browse to LEADTOOLS For .NET "\LEAD Technologies\LEADTOOLS 15\Bin\DotNet\Win32 " folder and select the following DLLs:
    • Leadtools.dll
    • Leadtools.Codecs.dll
    • Leadtools.Document.dll
    • Leadtools.ImageProcessing.Core.dll
    • Leadtools.Twain.dll
    • Leadtools.WinForms.dll
    Click Select and then click OK to add the above DLLs to the application.
  6. Make sure Form1 is in design view. From the toolbox (View->Toolbox) add three buttons and ensure the buttons are functional, naming the events as listed:
    Text Name Event
    Select Output Directory buttonOutputDirectory _miOutputDir_Click
    Select Scanning Device buttonScanningDevice _miSelectScanner_Click
    Acquire buttonAcquire _miScan_Click
  7. Switch to Form1 code view (right-click Form1 in the solution explorer then select View Code) and add the following lines at the beginning of the file:

    [Visual Basic]

     
    Imports Leadtools 
    Imports Leadtools.Document 
    Imports Leadtools.Twain 
    Imports Leadtools.WinForms 
    Imports Leadtools.ImageProcessing.Core 
    
    [C#]
     
    using Leadtools; 
    using Leadtools.Document; 
    using Leadtools.Twain; 
    using Leadtools.WinForms; 
    using Leadtools.ImageProcessing.Core;
    
  8. In Form1.vb for Visual Basic or Form1.cs for C# class, declare the global variables. Each object is created globally to avoid destroying and recreating each object for each time it is used.

    [Visual Basic]

    
    Public m_strSavePath As String = "C:\"
    Public WithEvents twain As TwainSession
    Public document As RasterDocumentEngine
    Dim deskew As New DeskewCommand
    Dim despeckle As New DespeckleCommand
    Dim dotRemove As New DotRemoveCommand
    Dim holepunchRemove As New HolePunchRemoveCommand
    Dim lineRemove As New LineRemoveCommand
    
    [C#]
     
    private System.ComponentModel.Container components = null;
    public string m_strSavePath = @"C:\";
    public TwainSession twain; 
    public RasterDocumentEngine document;
    DeskewCommand deskew = new DeskewCommand();
    DespeckleCommand despeckle = new DespeckleCommand();
    DotRemoveCommand dotRemove = new DotRemoveCommand();
    HolePunchRemoveCommand holepunchRemove = new HolePunchRemoveCommand();
    LineRemoveCommand lineRemove = new LineRemoveCommand();
    
  9. Next, in the Form1_Load event create and initialize the objects. The user can now select an output directory (_miOutputDir_Click). If no output directory is selected the program will default to the C:\. The user can also choose the device to be used to scan the document (_miSelectScanner_Click). Also note that this tutorial is using "RasterDocumentFormatType.Pdf" output format. Alternatively the "RasterDocumentFormatType.PdfImageOnText" format could be used.

    [Visual Basic]

     
    Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
       'Unlock support for these features
       RasterSupport.Unlock(RasterSupportType.Ocr, "")
       RasterSupport.Unlock(RasterSupportType.Document, "")
       RasterSupport.Unlock(RasterSupportType.OcrPdfOutput, "")
    
       'Initialize OCR object
       'Change this path to your local machine path to locate the OCR component
       RasterDocumentEngine.EnginePath = "C:\Program Files\LEAD
       Technologies\LEADTOOLS OCR Runtime 15\Bin\Common\OCR"
       document = RasterDocumentEngine.Instance
       document.Startup()
       document.RecognitionDataFileName = Application.StartupPath + "\DataFile.rdf"
       Dim sro As Document.RasterDocumentResultOptions =
       document.SaveResultOptions
       sro.Format = RasterDocumentFormatType.Pdf
       sro.FormatLevel = RasterDocumentFormatLevel.Full
       document.SaveResultOptions = sro
    
       'Initalize Twain object
       twain = New TwainSession
       twain.Startup(Me, "Manufacturer", "Product Family", "Version", "Application",TwainStartupFlags.None)
    
       'Initialize DotRemove
       dotRemove.Flags =
       DotRemoveCommandFlags.UseDiagonals Or
       DotRemoveCommandFlags.UseSize
       dotRemove.MaximumDotHeight = 8
       dotRemove.MaximumDotWidth = 8
       dotRemove.MinimumDotHeight = 2
       dotRemove.MinimumDotWidth = 2
    
       'Initialize HolePunchRemove
       holepunchRemove.Flags = HolePunchRemoveCommandFlags.UseDpi Or
       HolePunchRemoveCommandFlags.UseCount Or _
       HolePunchRemoveCommandFlags.UseLocation
       holepunchRemove.Location = HolePunchRemoveCommandLocation.Left
    
       'Initialize LineRemove
       lineRemove.MaximumLineWidth = 9
       lineRemove.MinimumLineLength = 400
       lineRemove.Wall = 15
       lineRemove.MaximumWallPercent = 10
       lineRemove.Variance = 3
       lineRemove.GapLength = 3
    End Sub
    
    [C#]
     
    private void Form1_Load(object sender, System.EventArgs e)
    {
       //Unlock support for these features
       RasterSupport.Unlock(RasterSupportType.Ocr, "");
       RasterSupport.Unlock(RasterSupportType.Document, "");
       RasterSupport.Unlock(RasterSupportType.OcrPdfOutput, "");
    
       //Initialize OCR object 
       //Change this path to your local machine path to locate the OCR component
       RasterDocumentEngine.EnginePath = @"C:\Program Files\LEAD Technologies\LEADTOOLS EVAL 15\Bin\Common\OCR";
       document = RasterDocumentEngine.Instance;
       document.Startup();
       document.RecognitionDataFileName = Application.StartupPath + "\\DataFile.rdf";
       RasterDocumentResultOptions sro = document.SaveResultOptions;
       sro.Format = RasterDocumentFormatType.Pdf;
       sro.FormatLevel = RasterDocumentFormatLevel.Full;
       document.SaveResultOptions = sro;
    
       //Initalize Twain object
       twain = new TwainSession();
       twain.Startup(this, "Manufacturer", "Product Family", "Version", "Application", TwainStartupFlags.None);
       twain.AcquirePage += new EventHandler<TwainAcquirePageEventArgs>(twain_AcquirePage);
    
       //Initialize DotRemove
       //In case your local machine halts with an exception
       //asking for ScanSoft dlls, refer to online support pages
       //on how to clear the error
       dotRemove.Flags = DotRemoveCommandFlags.UseDiagonals | DotRemoveCommandFlags.UseSize;
       dotRemove.MaximumDotHeight = 8;
       dotRemove.MaximumDotWidth = 8;
       dotRemove.MinimumDotHeight = 2;
       dotRemove.MinimumDotWidth = 2;
    
       //Initialize HolePunchRemove
       holepunchRemove.Flags = HolePunchRemoveCommandFlags.UseDpi | HolePunchRemoveCommandFlags.UseCount |HolePunchRemoveCommandFlags.UseLocation;
       holepunchRemove.Location = HolePunchRemoveCommandLocation.Left;
    
       //Initialize LineRemove
       lineRemove.MaximumLineWidth = 9;
       lineRemove.MinimumLineLength = 400;
       lineRemove.Wall = 15; 
       lineRemove.MaximumWallPercent = 10;
       lineRemove.Variance = 3;
       lineRemove.GapLength = 3;
    
    }
    
  10. Add code to the Form1_FormClosing to allow closing the panel, as follows:

    [Visual Basic]

    
    Private Sub Form1_FormClosing(ByVal sender As System.Object, ByVal e As System.Windows.Forms.FormClosingEventArgs) Handles MyBase.FormClosing
       'Free resources
       document.Shutdown()
       twain.Shutdown()
    End Sub
    
    [C#]
    
    private void Form1_FormClosing(object sender, System.Windows.Forms.FormClosingEventArgs e)
    {    
       //Free resources
       document.Shutdown();
       twain.Shutdown();
    }
    
  11. Add code to select the output directory using the _miOutputDir_Click method, as follows:

    [Visual Basic]

    
    Private Sub _miOutputDir_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles _miOutputDir.Click
       Dim dlg As New System.Windows.Forms.FolderBrowserDialog
       If (dlg.ShowDialog() = Windows.Forms.DialogResult.OK) Then
          m_strSavePath = dlg.SelectedPath + "\"
       End If
    End Sub
    
    [C#]
    
    private void _miOutputDir_Click(object sender, System.EventArgs e)
    {
       System.Windows.Forms.FolderBrowserDialog dlg = new System.Windows.Forms.FolderBrowserDialog();
       if(dlg.ShowDialog() == System.Windows.Forms.DialogResult.OK)
       {
          m_strSavePath = dlg.SelectedPath + "\\";
       }
    
    }
    
  12. Add code to select the scanner device using twain.SelectSource in _miSelectScanner_Click, as follows:

    [Visual Basic]

    
    Private Sub _miSelectScanner_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles _miSelectScanner.Click
       twain.SelectSource(String.Empty)
    End Sub
    
    [C#]
    
    private void _miSelectScanner_Click(object sender, System.EventArgs e)
    {
       twain.SelectSource(String.Empty);
    }
    
  13. Add code to initiate the scanning process. In the _miScan_Click event, any previous metafiles created by the OCR will be deleted and any previous pages added to the OCR will be removed. Once twain.Acquire is called, the scanner interface is exposed, the user makes a selection, and the scanning process starts.

    For each page acquired by the scanner the twain_AcquirePage event will be called. In this event each page is cleaned during scanning, then added to the OCR object. Once all pages are scanned, the twain.Acquire function will return. Then it OCR's all the pages (document.Recognize) and saves the results to a searchable PDF (document.SaveResultsToFile).

    Passing TwainUserInterfaceFlags.Show as the only parameter will show the TWAIN driver's user interface. Pass TwainUserInterfaceFlags.None so as not to show the TWAIN driver's interface. Opt to do this if the user is to be able to adjust the scanner settings or if a customized user interface is to be displayed.

    [Visual Basic]

     
    Private Sub _miScan_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles _miScan.Click
       'Delete previous OCR meta file.
       If (System.IO.File.Exists(document.RecognitionDataFileName))  Then
          System.IO.File.Delete(document.RecognitionDataFileName)
       End If
    
       'Remove all previous pages
       While document.PageCount > 0
          document.RemovePage(0)
       End While
    
       'Scan images
       twain.Acquire(TwainUserInterfaceFlags.Show)
    
       'OCR all the images
       document.Recognize(0, document.PageCount, Nothing)
    
       'Save the results to PDF
       document.SaveResultsToFile(m_strSavePath + "Results.pdf")
    End Sub
    
    [C#]
     
    private void _miScan_Click(object sender, System.EventArgs e)
    {   
       //Delete previous OCR meta file.
       if(System.IO.File.Exists(document.RecognitionDataFileName))
       {
          System.IO.File.Delete(document.RecognitionDataFileName);
       } 
    
       //Remove all previous pages
       while(document.PageCount > 0)
       {
          document.RemovePage(0);
       }                            
    
       //Scan images
       twain.Acquire(TwainUserInterfaceFlags.Show); 
    
       //OCR all the images
       document.Recognize(0, document.PageCount, null);
    
       //Save the results to PDF
       document.SaveResultsToFile(m_strSavePath + "Results.pdf");
    }
    
  14. Add code to acquire the document using twain_AcquirePage, as follows:

    [Visual Basic]

     
    Private Sub twain_AcquirePage(ByVal sender As Object, ByVal e As Leadtools.Twain.TwainAcquirePageEventArgs) Handles twain.AcquirePage 
       'Deskew
       deskew.Run(e.Image)
    
       'Despeckle
       despeckle.Run(e.Image)
    
       'Dot Remove
       dotRemove.Run(e.Image)
    
       'Hole Punch Remove
       holepunchRemove.Run(e.Image)
    
       'Vertical Line Remove
       lineRemove.Type = LineRemoveCommandType.Vertical
       lineRemove.Run(e.Image)
    
       'Horizontal Line Remove
       lineRemove.Type = LineRemoveCommandType.Horizontal
       lineRemove.Run(e.Image)
    
       'Add page to the OCR engine
       document.AddPage(e.Image, 1)
    End Sub
    
    [C#]
     
    private void twain_AcquirePage(object sender, Leadtools.Twain.TwainAcquirePageEventArgs e)
    {        
       //Deskew
       deskew.Run(e.Image);       
    
       //Despeckle
       despeckle.Run(e.Image);
    
       //Dot Remove
       dotRemove.Run(e.Image);
    
       //Hole Punch Remove
       holepunchRemove.Run(e.Image);
    
       //Vertical Line Remove
       lineRemove.Type = LineRemoveCommandType.Vertical;
       lineRemove.Run(e.Image);
    
       //Horizontal Line Remove
       lineRemove.Type = LineRemoveCommandType.Horizontal;
       lineRemove.Run(e.Image); 
    
       //Add page to the OCR engine
       document.AddPage(e.Image, 1);
    
       //clean up call
       document.CleanupPages(true);
    }
    
    
  15. Build, and Run the program to test it.