tesseract  4.1.1
baseapi.h
Go to the documentation of this file.
1 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_API_BASEAPI_H_
20 #define TESSERACT_API_BASEAPI_H_
21 
22 #include <cstdio>
23 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
24 // complexity of includes here. Use forward declarations wherever possible
25 // and hide includes of complex types in baseapi.cpp.
26 #include "apitypes.h"
27 #include "pageiterator.h"
28 #include "platform.h"
29 #include "publictypes.h"
30 #include "resultiterator.h"
31 #include "serialis.h"
32 #include "tess_version.h"
33 #include "tesscallback.h"
34 #include "thresholder.h"
35 #include "unichar.h"
36 
37 template <typename T> class GenericVector;
38 class PAGE_RES;
39 class PAGE_RES_IT;
40 class ParagraphModel;
41 struct BlamerBundle;
42 class BLOCK_LIST;
43 class DENORM;
44 class MATRIX;
45 class ROW;
46 class STRING;
47 class WERD;
48 struct Pix;
49 struct Box;
50 struct Pixa;
51 struct Boxa;
52 class ETEXT_DESC;
53 struct OSResults;
54 class TBOX;
55 class UNICHARSET;
56 class WERD_CHOICE_LIST;
57 
58 struct INT_FEATURE_STRUCT;
60 struct TBLOB;
61 
62 namespace tesseract {
63 
64 class Dawg;
65 class Dict;
66 class EquationDetect;
67 class PageIterator;
68 class LTRResultIterator;
69 class ResultIterator;
70 class MutableIterator;
71 class TessResultRenderer;
72 class Tesseract;
73 class Trie;
74 class Wordrec;
75 
76 using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const;
77 using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *, int);
78 using ParamsModelClassifyFunc = float (Dict::*)(const char *, void *);
79 using FillLatticeFunc = void (Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *);
82 
92  public:
93  TessBaseAPI();
94  virtual ~TessBaseAPI();
95 
99  static const char* Version();
100 
108  static size_t getOpenCLDevice(void **device);
109 
114  static void CatchSignals();
115 
120  void SetInputName(const char* name);
128  const char* GetInputName();
129  // Takes ownership of the input pix.
130  void SetInputImage(Pix *pix);
131  Pix* GetInputImage();
132  int GetSourceYResolution();
133  const char* GetDatapath();
134 
136  void SetOutputName(const char* name);
137 
151  bool SetVariable(const char* name, const char* value);
152  bool SetDebugVariable(const char* name, const char* value);
153 
158  bool GetIntVariable(const char *name, int *value) const;
159  bool GetBoolVariable(const char *name, bool *value) const;
160  bool GetDoubleVariable(const char *name, double *value) const;
161 
166  const char *GetStringVariable(const char *name) const;
167 
171  void PrintVariables(FILE *fp) const;
172 
176  bool GetVariableAsString(const char *name, STRING *val);
177 
215  int Init(const char* datapath, const char* language, OcrEngineMode mode,
216  char **configs, int configs_size,
217  const GenericVector<STRING> *vars_vec,
218  const GenericVector<STRING> *vars_values,
219  bool set_only_non_debug_params);
220  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
221  return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
222  }
223  int Init(const char* datapath, const char* language) {
224  return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
225  }
226  // In-memory version reads the traineddata file directly from the given
227  // data[data_size] array, and/or reads data via a FileReader.
228  int Init(const char* data, int data_size, const char* language,
229  OcrEngineMode mode, char** configs, int configs_size,
230  const GenericVector<STRING>* vars_vec,
231  const GenericVector<STRING>* vars_values,
232  bool set_only_non_debug_params, FileReader reader);
233 
242  const char* GetInitLanguagesAsString() const;
243 
249  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
250 
254  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
255 
262  int InitLangMod(const char* datapath, const char* language);
263 
268  void InitForAnalysePage();
269 
276  void ReadConfigFile(const char* filename);
278  void ReadDebugConfigFile(const char* filename);
279 
285  void SetPageSegMode(PageSegMode mode);
286 
288  PageSegMode GetPageSegMode() const;
289 
307  char* TesseractRect(const unsigned char* imagedata,
308  int bytes_per_pixel, int bytes_per_line,
309  int left, int top, int width, int height);
310 
315  void ClearAdaptiveClassifier();
316 
323  /* @{ */
324 
332  void SetImage(const unsigned char* imagedata, int width, int height,
333  int bytes_per_pixel, int bytes_per_line);
334 
343  void SetImage(Pix* pix);
344 
349  void SetSourceResolution(int ppi);
350 
356  void SetRectangle(int left, int top, int width, int height);
357 
365  void SetThresholder(ImageThresholder* thresholder) {
366  delete thresholder_;
367  thresholder_ = thresholder;
368  ClearResults();
369  }
370 
376  Pix* GetThresholdedImage();
377 
383  Boxa* GetRegions(Pixa** pixa);
384 
396  Boxa* GetTextlines(bool raw_image, int raw_padding,
397  Pixa** pixa, int** blockids, int** paraids);
398  /*
399  Helper method to extract from the thresholded image. (most common usage)
400  */
401  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
402  return GetTextlines(false, 0, pixa, blockids, nullptr);
403  }
404 
413  Boxa* GetStrips(Pixa** pixa, int** blockids);
414 
420  Boxa* GetWords(Pixa** pixa);
421 
430  Boxa* GetConnectedComponents(Pixa** cc);
431 
445  Boxa* GetComponentImages(PageIteratorLevel level,
446  bool text_only, bool raw_image,
447  int raw_padding,
448  Pixa** pixa, int** blockids, int** paraids);
449  // Helper function to get binary images with no padding (most common usage).
451  const bool text_only,
452  Pixa** pixa, int** blockids) {
453  return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
454  }
455 
462  int GetThresholdedImageScaleFactor() const;
463 
479  PageIterator* AnalyseLayout();
480  PageIterator* AnalyseLayout(bool merge_similar_words);
481 
488  int Recognize(ETEXT_DESC* monitor);
489 
495  #ifndef DISABLED_LEGACY_ENGINE
496 
497  int RecognizeForChopTest(ETEXT_DESC* monitor);
498  #endif
499 
522  bool ProcessPages(const char* filename, const char* retry_config,
523  int timeout_millisec, TessResultRenderer* renderer);
524  // Does the real work of ProcessPages.
525  bool ProcessPagesInternal(const char* filename, const char* retry_config,
526  int timeout_millisec, TessResultRenderer* renderer);
527 
537  bool ProcessPage(Pix* pix, int page_index, const char* filename,
538  const char* retry_config, int timeout_millisec,
539  TessResultRenderer* renderer);
540 
549  ResultIterator* GetIterator();
550 
559  MutableIterator* GetMutableIterator();
560 
565  char* GetUTF8Text();
566 
576  char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
577 
584  char* GetHOCRText(int page_number);
585 
590  char* GetAltoText(ETEXT_DESC* monitor, int page_number);
591 
592 
597  char* GetAltoText(int page_number);
598 
604  char* GetTSVText(int page_number);
605 
612  char* GetLSTMBoxText(int page_number);
613 
621  char* GetBoxText(int page_number);
622 
629  char* GetWordStrBoxText(int page_number);
630 
636  char* GetUNLVText();
637 
647  bool DetectOrientationScript(int* orient_deg, float* orient_conf,
648  const char** script_name, float* script_conf);
649 
655  char* GetOsdText(int page_number);
656 
658  int MeanTextConf();
665  int* AllWordConfidences();
666 
667 #ifndef DISABLED_LEGACY_ENGINE
668 
678  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
679 #endif // ndef DISABLED_LEGACY_ENGINE
680 
687  void Clear();
688 
695  void End();
696 
704  static void ClearPersistentCache();
705 
712  int IsValidWord(const char *word);
713  // Returns true if utf8_character is defined in the UniCharset.
714  bool IsValidCharacter(const char *utf8_character);
715 
716 
717  bool GetTextDirection(int* out_offset, float* out_slope);
718 
720  void SetDictFunc(DictFunc f);
721 
725  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
726 
731  bool DetectOS(OSResults*);
732 
737  void GetBlockTextOrientations(int** block_orientation,
738  bool** vertical_writing);
739 
740 
741  #ifndef DISABLED_LEGACY_ENGINE
742 
744  void SetFillLatticeFunc(FillLatticeFunc f);
745 
747  BLOCK_LIST* FindLinesCreateBlockList();
748 
754  static void DeleteBlockList(BLOCK_LIST* block_list);
755 
757  static ROW *MakeTessOCRRow(float baseline, float xheight,
758  float descender, float ascender);
759 
761  static TBLOB *MakeTBLOB(Pix *pix);
762 
768  static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
769 
771  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
772  int* num_features, int* feature_outline_index);
773 
778  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
779  int right, int bottom);
780 
785  void RunAdaptiveClassifier(TBLOB* blob,
786  int num_max_matches,
787  int* unichar_ids,
788  float* ratings,
789  int* num_matches_returned);
790 #endif // ndef DISABLED_LEGACY_ENGINE
791 
793  const char* GetUnichar(int unichar_id);
794 
796  const Dawg *GetDawg(int i) const;
797 
799  int NumDawgs() const;
800 
801  Tesseract* tesseract() const { return tesseract_; }
802 
803  OcrEngineMode oem() const { return last_oem_requested_; }
804 
805  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
806 
807  void set_min_orientation_margin(double margin);
808  /* @} */
809 
810  protected:
811 
813  TESS_LOCAL bool InternalSetImage();
814 
819  TESS_LOCAL virtual bool Threshold(Pix** pix);
820 
825  TESS_LOCAL int FindLines();
826 
828  void ClearResults();
829 
835  TESS_LOCAL LTRResultIterator* GetLTRIterator();
836 
843  TESS_LOCAL int TextLength(int* blob_count);
844 
846  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
847 
848  #ifndef DISABLED_LEGACY_ENGINE
849 
851  /* @{ */
852 
857  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
858  int length,
859  float baseline,
860  float xheight,
861  float descender,
862  float ascender);
863 
865  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
866 
867  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
868  PAGE_RES* pass1_result);
869 
874  TESS_LOCAL static int TesseractExtractResult(char** text,
875  int** lengths,
876  float** costs,
877  int** x0,
878  int** y0,
879  int** x1,
880  int** y1,
881  PAGE_RES* page_res);
882 
883  TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
884  /* @} */
885 #endif // ndef DISABLED_LEGACY_ENGINE
886 
887  protected:
894  BLOCK_LIST* block_list_;
903 
908  /* @{ */
915  /* @} */
916 
917  private:
918  // A list of image filenames gets special consideration
919  bool ProcessPagesFileList(FILE *fp,
920  STRING *buf,
921  const char* retry_config, int timeout_millisec,
922  TessResultRenderer* renderer,
923  int tessedit_page_number);
924  // TIFF supports multipage so gets special consideration.
925  bool ProcessPagesMultipageTiff(const unsigned char *data,
926  size_t size,
927  const char* filename,
928  const char* retry_config,
929  int timeout_millisec,
930  TessResultRenderer* renderer,
931  int tessedit_page_number);
932  // There's currently no way to pass a document title from the
933  // Tesseract command line, and we have multiple places that choose
934  // to set the title to an empty string. Using a single named
935  // variable will hopefully reduce confusion if the situation changes
936  // in the future.
937  const char *unknown_title_ = "";
938 }; // class TessBaseAPI.
939 
941 STRING HOcrEscape(const char* text);
942 } // namespace tesseract.
943 
944 #endif // TESSERACT_API_BASEAPI_H_
int UNICHAR_ID
Definition: unichar.h:34
int Init(const char *datapath, const char *language)
Definition: baseapi.h:223
Tesseract * tesseract() const
Definition: baseapi.h:801
STRING * language_
Last initialized language.
Definition: baseapi.h:899
void InitTruthCallback(TruthCallback *cb)
Definition: baseapi.h:805
TESS_LOCAL const PAGE_RES * GetPageRes() const
Definition: baseapi.h:883
void(Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *) FillLatticeFunc
Definition: baseapi.h:79
OcrEngineMode oem() const
Definition: baseapi.h:803
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:889
TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
Definition: baseapi.h:81
struct TessBaseAPI TessBaseAPI
Definition: capi.h:93
Definition: matrix.h:578
Definition: blobs.h:284
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel *> *models)
struct TessResultRenderer TessResultRenderer
Definition: capi.h:87
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:450
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
#define TESS_API
Definition: platform.h:54
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:401
Definition: ocrrow.h:36
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:220
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
Definition: baseapi.h:76
bool(*)(const STRING &, GenericVector< char > *) FileReader
Definition: serialis.h:49
#define TESS_LOCAL
Definition: platform.h:55
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:898
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:893
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Definition: strngs.h:45
float(Dict::*)(const char *, void *) ParamsModelClassifyFunc
Definition: baseapi.h:78
Definition: rect.h:34
TruthCallback * truth_cb_
Definition: baseapi.h:902
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
Definition: baseapi.h:77
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:891
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:900
void SetThresholder(ImageThresholder *thresholder)
Definition: baseapi.h:365
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:890
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2310
Definition: werd.h:56
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901