tesseract  4.1.1
dawg.h
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: dawg.h
5  * Description: Definition of a class that represents Directed Acyclic Word
6  * Graph (DAWG), functions to build and manipulate the DAWG.
7  * Author: Mark Seaman, SW Productivity
8  *
9  * (c) Copyright 1987, Hewlett-Packard Company.
10  ** Licensed under the Apache License, Version 2.0 (the "License");
11  ** you may not use this file except in compliance with the License.
12  ** You may obtain a copy of the License at
13  ** http://www.apache.org/licenses/LICENSE-2.0
14  ** Unless required by applicable law or agreed to in writing, software
15  ** distributed under the License is distributed on an "AS IS" BASIS,
16  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  ** See the License for the specific language governing permissions and
18  ** limitations under the License.
19  *
20  *********************************************************************************/
21 
22 #ifndef DICT_DAWG_H_
23 #define DICT_DAWG_H_
24 
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 
29 #include <cinttypes> // for PRId64
30 #include <memory>
31 #include "elst.h"
32 #include "params.h"
33 #include "ratngs.h"
34 #include "tesscallback.h"
35 
36 #ifndef __GNUC__
37 #ifdef _WIN32
38 #define NO_EDGE (int64_t) 0xffffffffffffffffi64
39 #endif /*_WIN32*/
40 #else
41 #define NO_EDGE (int64_t) 0xffffffffffffffffll
42 #endif /*__GNUC__*/
43 
44 /*----------------------------------------------------------------------
45  T y p e s
46 ----------------------------------------------------------------------*/
47 class UNICHARSET;
48 
49 using EDGE_RECORD = uint64_t;
51 using EDGE_REF = int64_t;
52 using NODE_REF = int64_t;
53 using NODE_MAP = EDGE_REF *;
54 
55 namespace tesseract {
56 
57 struct NodeChild {
61  NodeChild(): unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {}
62 };
63 
67 
68 enum DawgType {
73 
74  DAWG_TYPE_COUNT // number of enum entries
75 };
76 
77 /*----------------------------------------------------------------------
78  C o n s t a n t s
79 ----------------------------------------------------------------------*/
80 
81 #define FORWARD_EDGE (int32_t) 0
82 #define BACKWARD_EDGE (int32_t) 1
83 #define MAX_NODE_EDGES_DISPLAY (int64_t) 100
84 #define MARKER_FLAG (int64_t) 1
85 #define DIRECTION_FLAG (int64_t) 2
86 #define WERD_END_FLAG (int64_t) 4
87 #define LETTER_START_BIT 0
88 #define NUM_FLAG_BITS 3
89 #define REFFORMAT "%" PRId64
90 
91 static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = {
92  { false, true, true, false }, // for DAWG_TYPE_PUNCTUATION
93  { true, false, false, false }, // for DAWG_TYPE_WORD
94  { true, false, false, false }, // for DAWG_TYPE_NUMBER
95  { false, false, false, false }, // for DAWG_TYPE_PATTERN
96 };
97 
98 static const char kWildcard[] = "*";
99 
100 
101 /*----------------------------------------------------------------------
102  C l a s s e s a n d S t r u c t s
103 ----------------------------------------------------------------------*/
104 //
114 //
115 class Dawg {
116  public:
118  static const int16_t kDawgMagicNumber = 42;
122  static const UNICHAR_ID kPatternUnicharID = 0;
123 
124  inline DawgType type() const { return type_; }
125  inline const STRING &lang() const { return lang_; }
126  inline PermuterType permuter() const { return perm_; }
127 
128  virtual ~Dawg();
129 
131  bool word_in_dawg(const WERD_CHOICE &word) const;
132 
133  // Returns true if the given word prefix is not contraindicated by the dawg.
134  // If requires_complete is true, then the exact complete word must be present.
135  bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const;
136 
139  int check_for_words(const char *filename,
140  const UNICHARSET &unicharset,
141  bool enable_wildcard) const;
142 
143  // For each word in the Dawg, call the given (permanent) callback with the
144  // text (UTF-8) version of the word.
145  void iterate_words(const UNICHARSET &unicharset,
147 
148  // For each word in the Dawg, call the given (permanent) callback with the
149  // text (UTF-8) version of the word.
150  void iterate_words(const UNICHARSET &unicharset,
151  TessCallback1<const char *> *cb) const;
152 
153  // Pure virtual function that should be implemented by the derived classes.
154 
156  virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
157  bool word_end) const = 0;
158 
161  virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
162  bool word_end) const = 0;
163 
166  virtual NODE_REF next_node(EDGE_REF edge_ref) const = 0;
167 
170  virtual bool end_of_word(EDGE_REF edge_ref) const = 0;
171 
173  virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const = 0;
174 
177  virtual void print_node(NODE_REF node, int max_num_edges) const = 0;
178 
181  virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
182  const UNICHARSET &unicharset,
183  GenericVector<UNICHAR_ID> *vec) const {
184  (void)unichar_id;
185  (void)unicharset;
186  (void)vec;
187  }
188 
193  EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const {
194  (void)edge_ref;
195  (void)unichar_id;
196  (void)word_end;
197  return false;
198  }
199 
200  protected:
201  Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
202  : lang_(lang),
203  type_(type),
204  perm_(perm),
205  unicharset_size_(0),
206  debug_level_(debug_level) {}
207 
209  inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
210  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);
211  }
213  inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const {
214  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;
215  }
217  inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
218  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ?
220  }
222  inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
223  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
224  }
227  const EDGE_RECORD &edge_rec) const {
228  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
229  }
232  EDGE_RECORD *edge_rec, EDGE_REF value) {
233  *edge_rec &= (~next_node_mask_);
234  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);
235  }
237  inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) {
238  *edge_rec |= (MARKER_FLAG << flag_start_bit_);
239  }
248  bool word_end,
249  UNICHAR_ID unichar_id,
250  const EDGE_RECORD &edge_rec) const {
251  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
252  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
253  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
254  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
255  curr_word_end, curr_unichar_id)) return 0;
256  if (unichar_id > curr_unichar_id) return 1;
257  if (unichar_id == curr_unichar_id) {
258  if (next_node > curr_next_node) return 1;
259  if (next_node == curr_next_node) {
260  if (word_end > curr_word_end) return 1;
261  }
262  }
263  return -1;
264  }
269  bool word_end,
270  UNICHAR_ID unichar_id,
271  NODE_REF other_next_node,
272  bool other_word_end,
273  UNICHAR_ID other_unichar_id) const {
274  return ((unichar_id == other_unichar_id) &&
275  (next_node == NO_EDGE || next_node == other_next_node) &&
276  (!word_end || (word_end == other_word_end)));
277  }
278 
281  void init(int unicharset_size);
282 
288  bool match_words(WERD_CHOICE *word, int32_t index,
289  NODE_REF node, UNICHAR_ID wildcard) const;
290 
291  // Recursively iterate over all words in a dawg (see public iterate_words).
292  void iterate_words_rec(const WERD_CHOICE &word_so_far,
293  NODE_REF to_explore,
295 
296  // Member Variables.
301  // Variables to construct various edge masks. Formerly:
302  // #define NEXT_EDGE_MASK (int64_t) 0xfffffff800000000i64
303  // #define FLAGS_MASK (int64_t) 0x0000000700000000i64
304  // #define LETTER_MASK (int64_t) 0x00000000ffffffffi64
305  uint64_t next_node_mask_;
306  uint64_t flags_mask_;
307  uint64_t letter_mask_;
311  // Level of debug statements to print to stdout.
313 };
314 
315 //
316 // DawgPosition keeps track of where we are in the primary dawg we're searching
317 // as well as where we may be in the "punctuation dawg" which may provide
318 // surrounding context.
319 //
320 // Example:
321 // punctuation dawg -- space is the "pattern character"
322 // " " // no punctuation
323 // "' '" // leading and trailing apostrophes
324 // " '" // trailing apostrophe
325 // word dawg:
326 // "cat"
327 // "cab"
328 // "cat's"
329 //
330 // DawgPosition(dawg_index, dawg_ref, punc_index, punc_ref, rtp)
331 //
332 // DawgPosition(-1, NO_EDGE, p, pe, false)
333 // We're in the punctuation dawg, no other dawg has been started.
334 // (1) If there's a pattern edge as a punc dawg child of us,
335 // for each punc-following dawg starting with ch, produce:
336 // Result: DawgPosition(k, w, p', false)
337 // (2) If there's a valid continuation in the punc dawg, produce:
338 // Result: DawgPosition(-k, NO_EDGE, p', false)
339 //
340 // DawgPosition(k, w, -1, NO_EDGE, false)
341 // We're in dawg k. Going back to punctuation dawg is not an option.
342 // Follow ch in dawg k.
343 //
344 // DawgPosition(k, w, p, pe, false)
345 // We're in dawg k. Continue in dawg k and/or go back to the punc dawg.
346 // If ending, check that the punctuation dawg is also ok to end here.
347 //
348 // DawgPosition(k, w, p, pe true)
349 // We're back in the punctuation dawg. Continuing there is the only option.
350 struct DawgPosition {
351  DawgPosition() = default;
352  DawgPosition(int dawg_idx, EDGE_REF dawgref,
353  int punc_idx, EDGE_REF puncref,
354  bool backtopunc)
355  : dawg_ref(dawgref), punc_ref(puncref),
356  dawg_index(dawg_idx), punc_index(punc_idx),
357  back_to_punc(backtopunc) {
358  }
359  bool operator==(const DawgPosition &other) {
360  return dawg_index == other.dawg_index &&
361  dawg_ref == other.dawg_ref &&
362  punc_index == other.punc_index &&
363  punc_ref == other.punc_ref &&
364  back_to_punc == other.back_to_punc;
365  }
366 
367  EDGE_REF dawg_ref = NO_EDGE;
368  EDGE_REF punc_ref = NO_EDGE;
369  int8_t dawg_index = -1;
370  int8_t punc_index = -1;
371  // Have we returned to the punc dawg at the end of the word?
372  bool back_to_punc = false;
373 };
374 
375 class DawgPositionVector : public GenericVector<DawgPosition> {
376  public:
379  void clear() { size_used_ = 0; }
383  inline bool add_unique(const DawgPosition &new_pos,
384  bool debug,
385  const char *debug_msg) {
386  for (int i = 0; i < size_used_; ++i) {
387  if (data_[i] == new_pos) return false;
388  }
389  push_back(new_pos);
390  if (debug) {
391  tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n",
392  debug_msg, new_pos.dawg_index, new_pos.dawg_ref,
393  new_pos.punc_ref, new_pos.back_to_punc ? " returned" : "");
394  }
395  return true;
396  }
397 };
398 
399 //
406 //
407 class SquishedDawg : public Dawg {
408  public:
410  int debug_level)
411  : Dawg(type, lang, perm, debug_level) {}
412  SquishedDawg(const char *filename, DawgType type, const STRING &lang,
413  PermuterType perm, int debug_level)
414  : Dawg(type, lang, perm, debug_level) {
415  TFile file;
416  ASSERT_HOST(file.Open(filename, nullptr));
417  ASSERT_HOST(read_squished_dawg(&file));
418  num_forward_edges_in_node0 = num_forward_edges(0);
419  }
420  SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
421  const STRING &lang, PermuterType perm, int unicharset_size,
422  int debug_level)
423  : Dawg(type, lang, perm, debug_level),
424  edges_(edges),
425  num_edges_(num_edges) {
426  init(unicharset_size);
427  num_forward_edges_in_node0 = num_forward_edges(0);
428  if (debug_level > 3) print_all("SquishedDawg:");
429  }
430  ~SquishedDawg() override;
431 
432  // Loads using the given TFile. Returns false on failure.
433  bool Load(TFile *fp) {
434  if (!read_squished_dawg(fp)) return false;
435  num_forward_edges_in_node0 = num_forward_edges(0);
436  return true;
437  }
438 
439  int NumEdges() { return num_edges_; }
440 
442  EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
443  bool word_end) const override;
444 
448  bool word_end) const override {
449  EDGE_REF edge = node;
450  if (!edge_occupied(edge) || edge == NO_EDGE) return;
451  assert(forward_edge(edge)); // we don't expect any backward edges to
452  do { // be present when this function is called
453  if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
454  vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
455  }
456  } while (!last_edge(edge++));
457  }
458 
461  NODE_REF next_node(EDGE_REF edge) const override {
462  return next_node_from_edge_rec((edges_[edge]));
463  }
464 
467  bool end_of_word(EDGE_REF edge_ref) const override {
468  return end_of_word_from_edge_rec((edges_[edge_ref]));
469  }
470 
472  UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override {
473  return unichar_id_from_edge_rec((edges_[edge_ref]));
474  }
475 
478  void print_node(NODE_REF node, int max_num_edges) const override;
479 
481  bool write_squished_dawg(TFile *file);
482 
485  bool write_squished_dawg(const char *filename) {
486  TFile file;
487  file.OpenWrite(nullptr);
488  if (!this->write_squished_dawg(&file)) {
489  tprintf("Error serializing %s\n", filename);
490  return false;
491  }
492  if (!file.CloseWrite(filename, nullptr)) {
493  tprintf("Error writing file %s\n", filename);
494  return false;
495  }
496  return true;
497  }
498 
499  private:
501  inline void set_next_node(EDGE_REF edge_ref, EDGE_REF value) {
502  set_next_node_in_edge_rec(&(edges_[edge_ref]), value);
503  }
505  inline void set_empty_edge(EDGE_REF edge_ref) {
506  (edges_[edge_ref] = next_node_mask_);
507  }
509  inline void clear_all_edges() {
510  for (int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge);
511  }
513  inline void clear_marker_flag(EDGE_REF edge_ref) {
514  (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_));
515  }
517  inline bool forward_edge(EDGE_REF edge_ref) const {
518  return (edge_occupied(edge_ref) &&
519  (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
520  }
522  inline bool backward_edge(EDGE_REF edge_ref) const {
523  return (edge_occupied(edge_ref) &&
524  (BACKWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
525  }
527  inline bool edge_occupied(EDGE_REF edge_ref) const {
528  return (edges_[edge_ref] != next_node_mask_);
529  }
531  inline bool last_edge(EDGE_REF edge_ref) const {
532  return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0;
533  }
534 
536  int32_t num_forward_edges(NODE_REF node) const;
537 
539  bool read_squished_dawg(TFile *file);
540 
542  void print_edge(EDGE_REF edge) const;
543 
545  void print_all(const char* msg) {
546  tprintf("\n__________________________\n%s\n", msg);
547  for (int i = 0; i < num_edges_; ++i) print_edge(i);
548  tprintf("__________________________\n");
549  }
551  std::unique_ptr<EDGE_REF[]> build_node_map(int32_t *num_nodes) const;
552 
553  // Member variables.
554  EDGE_ARRAY edges_ = nullptr;
555  int32_t num_edges_ = 0;
556  int num_forward_edges_in_node0 = 0;
557 };
558 
559 } // namespace tesseract
560 
561 #endif // DICT_DAWG_H_
int UNICHAR_ID
Definition: unichar.h:34
SquishedDawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:409
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:268
#define FORWARD_EDGE
Definition: dawg.h:81
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
virtual ~Dawg()
virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const =0
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
PermuterType
Definition: ratngs.h:232
void OpenWrite(GenericVector< char > *data)
Definition: serialis.cpp:296
static const int16_t kDawgMagicNumber
Magic number to determine endianness when reading the Dawg from file.
Definition: dawg.h:118
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override
Definition: dawg.h:447
void print_node(NODE_REF node, int max_num_edges) const override
Definition: dawg.cpp:240
~SquishedDawg() override
Definition: dawg.cpp:193
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:300
const STRING & lang() const
Definition: dawg.h:125
int next_node_start_bit_
Definition: dawg.h:310
bool write_squished_dawg(const char *filename)
Definition: dawg.h:485
DawgType type_
Definition: dawg.h:298
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
uint64_t next_node_mask_
Definition: dawg.h:305
bool operator==(const DawgPosition &other)
Definition: dawg.h:359
PermuterType permuter() const
Definition: dawg.h:126
void init(int unicharset_size)
Definition: dawg.cpp:176
bool Open(const STRING &filename, FileReader reader)
Definition: serialis.cpp:197
STRING lang_
Definition: dawg.h:297
int64_t EDGE_REF
Definition: dawg.h:51
virtual void print_node(NODE_REF node, int max_num_edges) const =0
void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec)
Sets this edge record to be the last one in a sequence of edges.
Definition: dawg.h:237
virtual bool end_of_word(EDGE_REF edge_ref) const =0
bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg)
Definition: dawg.h:383
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.h:420
EDGE_REF dawg_ref
Definition: dawg.h:367
DawgType type() const
Definition: dawg.h:124
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
bool CloseWrite(const STRING &filename, FileWriter writer)
Definition: serialis.cpp:311
int unicharset_size_
Definition: dawg.h:308
uint64_t flags_mask_
Definition: dawg.h:306
EDGE_REF * NODE_MAP
Definition: dawg.h:53
bool write_squished_dawg(TFile *file)
Writes the squished/reduced Dawg to a file.
Definition: dawg.cpp:368
int check_for_words(const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
Definition: dawg.cpp:69
#define DIRECTION_FLAG
Definition: dawg.h:85
int flag_start_bit_
Definition: dawg.h:309
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Definition: dawg.h:472
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:226
uint64_t letter_mask_
Definition: dawg.h:307
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:209
#define WERD_END_FLAG
Definition: dawg.h:86
static const UNICHAR_ID kPatternUnicharID
Definition: dawg.h:122
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
Definition: dawg.h:181
EDGE_RECORD * EDGE_ARRAY
Definition: dawg.h:50
EDGE_REF punc_ref
Definition: dawg.h:368
bool Load(TFile *fp)
Definition: dawg.h:433
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
Definition: dawg.h:213
EDGE_REF edge_ref
Definition: dawg.h:59
bool word_in_dawg(const WERD_CHOICE &word) const
Returns true if the given word is in the Dawg.
Definition: dawg.cpp:65
int64_t NODE_REF
Definition: dawg.h:52
Definition: strngs.h:45
int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
Definition: dawg.h:247
Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:201
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:222
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const override
Returns the edge that corresponds to the letter out of this node.
Definition: dawg.cpp:195
NODE_REF next_node(EDGE_REF edge) const override
Definition: dawg.h:461
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
Definition: dawg.h:217
int push_back(DawgPosition object)
#define MARKER_FLAG
Definition: dawg.h:84
bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const
Definition: dawg.cpp:44
int debug_level_
Definition: dawg.h:312
UNICHAR_ID unichar_id
Definition: dawg.h:58
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
Definition: dawg.h:192
bool end_of_word(EDGE_REF edge_ref) const override
Definition: dawg.h:467
#define REFFORMAT
Definition: dawg.h:89
#define LETTER_START_BIT
Definition: dawg.h:87
bool match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const
Definition: dawg.cpp:144
SquishedDawg(const char *filename, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:412
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
Definition: dawg.h:352
#define ASSERT_HOST(x)
Definition: errcode.h:88
DawgType
Definition: dawg.h:68
void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value)
Sets the next node link for this edge in the Dawg.
Definition: dawg.h:231
NodeChild(UNICHAR_ID id, EDGE_REF ref)
Definition: dawg.h:60
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE *> *cb) const
Definition: dawg.cpp:126
uint64_t EDGE_RECORD
Definition: dawg.h:49
void iterate_words(const UNICHARSET &unicharset, TessCallback1< const WERD_CHOICE *> *cb) const
Definition: dawg.cpp:105
#define BACKWARD_EDGE
Definition: dawg.h:82