SphinxBase 5prealpha
lm_trie.h
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2015 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#ifndef __LM_TRIE_H__
39#define __LM_TRIE_H__
40
41#include <sphinxbase/pio.h>
42#include <sphinxbase/bitarr.h>
43
44#include "ngram_model_internal.h"
45#include "lm_trie_quant.h"
46
47typedef struct unigram_s {
48 float prob;
49 float bo;
50 uint32 next;
51} unigram_t;
52
53typedef struct node_range_s {
54 uint32 begin;
55 uint32 end;
57
58typedef struct base_s {
59 uint8 word_bits;
60 uint8 total_bits;
61 uint32 word_mask;
62 uint8 *base;
63 uint32 insert_index;
64 uint32 max_vocab;
65} base_t;
66
67typedef struct middle_s {
68 base_t base;
69 bitarr_mask_t next_mask;
70 uint8 quant_bits;
71 void *next_source;
72} middle_t;
73
74typedef struct longest_s {
75 base_t base;
76 uint8 quant_bits;
77} longest_t;
78
79typedef struct lm_trie_s {
80 uint8 *ngram_mem;
81 size_t ngram_mem_size;
82 unigram_t *unigrams;
83 middle_t *middle_begin;
84 middle_t *middle_end;
85 longest_t *longest;
86 lm_trie_quant_t *quant;
87
88 float backoff_cache[NGRAM_MAX_ORDER];
89 uint32 hist_cache[NGRAM_MAX_ORDER - 1];
90} lm_trie_t;
91
95lm_trie_t *lm_trie_create(uint32 unigram_count, int order);
96
97lm_trie_t *lm_trie_read_bin(uint32 * counts, int order, FILE * fp);
98
99void lm_trie_write_bin(lm_trie_t * trie, uint32 unigram_count, FILE * fp);
100
101void lm_trie_free(lm_trie_t * trie);
102
103void lm_trie_build(lm_trie_t * trie, ngram_raw_t ** raw_ngrams,
104 uint32 * counts, uint32 *out_counts, int order);
105
106void lm_trie_fill_raw_ngram(lm_trie_t * trie,
107 ngram_raw_t * raw_ngrams, uint32 * raw_ngram_idx,
108 uint32 * counts, node_range_t range, uint32 * hist,
109 int n_hist, int order, int max_order);
110
111float lm_trie_score(lm_trie_t * trie, int order, int32 wid, int32 * hist,
112 int32 n_hist, int32 * n_used);
113
114#endif /* __LM_TRIE_H__ */
An implementation bit array - memory efficient storage for digit int and float data.
file IO related operations.
Structure that specifies bits required to efficiently store certain data.
Definition bitarr.h:65