PocketSphinx 5prealpha
pocketsphinx_internal.h
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2008 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
44#ifndef __POCKETSPHINX_INTERNAL_H__
45#define __POCKETSPHINX_INTERNAL_H__
46
47/* SphinxBase headers. */
48#include <sphinxbase/cmd_ln.h>
49#include <sphinxbase/fe.h>
50#include <sphinxbase/feat.h>
51#include <sphinxbase/hash_table.h>
52#include <sphinxbase/logmath.h>
53#include <sphinxbase/profile.h>
54
55/* Local headers. */
56#include "pocketsphinx.h"
57#include "acmod.h"
58#include "dict.h"
59#include "dict2pid.h"
60
64typedef struct ps_search_s ps_search_t;
65
66
67/* Search names*/
68#define PS_DEFAULT_SEARCH "_default"
69#define PS_DEFAULT_PL_SEARCH "_default_pl"
70
71/* Search types */
72#define PS_SEARCH_TYPE_KWS "kws"
73#define PS_SEARCH_TYPE_FSG "fsg"
74#define PS_SEARCH_TYPE_NGRAM "ngram"
75#define PS_SEARCH_TYPE_ALLPHONE "allphone"
76#define PS_SEARCH_TYPE_STATE_ALIGN "state_align"
77#define PS_SEARCH_TYPE_PHONE_LOOP "phone_loop"
78
82typedef struct ps_searchfuncs_s {
83 int (*start)(ps_search_t *search);
84 int (*step)(ps_search_t *search, int frame_idx);
85 int (*finish)(ps_search_t *search);
86 int (*reinit)(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
87 void (*free)(ps_search_t *search);
88
89 ps_lattice_t *(*lattice)(ps_search_t *search);
90 char const *(*hyp)(ps_search_t *search, int32 *out_score);
91 int32 (*prob)(ps_search_t *search);
92 ps_seg_t *(*seg_iter)(ps_search_t *search);
94
101 char *type;
102 char *name;
103
105 cmd_ln_t *config;
109 char *hyp_str;
112 int32 post;
113 int32 n_words;
116 /* Magical word IDs that must exist in the dictionary: */
117 int32 start_wid;
120};
121
122#define ps_search_base(s) ((ps_search_t *)s)
123#define ps_search_config(s) ps_search_base(s)->config
124#define ps_search_acmod(s) ps_search_base(s)->acmod
125#define ps_search_dict(s) ps_search_base(s)->dict
126#define ps_search_dict2pid(s) ps_search_base(s)->d2p
127#define ps_search_dag(s) ps_search_base(s)->dag
128#define ps_search_last_link(s) ps_search_base(s)->last_link
129#define ps_search_post(s) ps_search_base(s)->post
130#define ps_search_lookahead(s) ps_search_base(s)->pls
131#define ps_search_n_words(s) ps_search_base(s)->n_words
132
133#define ps_search_type(s) ps_search_base(s)->type
134#define ps_search_name(s) ps_search_base(s)->name
135#define ps_search_start(s) (*(ps_search_base(s)->vt->start))(s)
136#define ps_search_step(s,i) (*(ps_search_base(s)->vt->step))(s,i)
137#define ps_search_finish(s) (*(ps_search_base(s)->vt->finish))(s)
138#define ps_search_reinit(s,d,d2p) (*(ps_search_base(s)->vt->reinit))(s,d,d2p)
139#define ps_search_free(s) (*(ps_search_base(s)->vt->free))(s)
140#define ps_search_lattice(s) (*(ps_search_base(s)->vt->lattice))(s)
141#define ps_search_hyp(s,sc) (*(ps_search_base(s)->vt->hyp))(s,sc)
142#define ps_search_prob(s) (*(ps_search_base(s)->vt->prob))(s)
143#define ps_search_seg_iter(s) (*(ps_search_base(s)->vt->seg_iter))(s)
144
145/* For convenience... */
146#define ps_search_silence_wid(s) ps_search_base(s)->silence_wid
147#define ps_search_start_wid(s) ps_search_base(s)->start_wid
148#define ps_search_finish_wid(s) ps_search_base(s)->finish_wid
149
154 const char *type, const char *name,
155 cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
156 dict2pid_t *d2p);
157
158
162void ps_search_base_free(ps_search_t *search);
163
167void ps_search_base_reinit(ps_search_t *search, dict_t *dict,
168 dict2pid_t *d2p);
169
170typedef struct ps_segfuncs_s {
171 ps_seg_t *(*seg_next)(ps_seg_t *seg);
172 void (*seg_free)(ps_seg_t *seg);
174
178struct ps_seg_s {
181 char const *word;
184 int32 ascr;
185 int32 lscr;
186 int32 prob;
187 /* This doesn't need to be 32 bits, so once the scores above are
188 * reduced to 16 bits (or less!), this will be too. */
189 int32 lback;
190 /* Not sure if this should be here at all. */
191 float32 lwf;
192};
193
194#define ps_search_seg_next(seg) (*(seg->vt->seg_next))(seg)
195#define ps_search_seg_free(s) (*(seg->vt->seg_free))(seg)
196
197
202 /* Model parameters and such. */
203 cmd_ln_t *config;
206 /* Basic units of computation. */
210 logmath_t *lmath;
212 /* Search modules. */
213 hash_table_t *searches;
214 /* TODO: Convert this to a stack of searches each with their own
215 * lookahead value. */
220 /* Utterance-processing related stuff. */
221 uint32 uttno;
222 ptmr_t perf;
223 uint32 n_frame;
224 char const *mfclogdir;
225 char const *rawlogdir;
226 char const *senlogdir;
227};
228
229
231 hash_iter_t itor;
232};
233
234#endif /* __POCKETSPHINX_INTERNAL_H__ */
Acoustic model structures for PocketSphinx.
Building triphones for a dictionary.
Operations on dictionary.
int32 frame_idx_t
Type for frame index values.
Definition: hmm.h:64
Main header file for the PocketSphinx decoder.
struct ps_searchfuncs_s ps_searchfuncs_t
V-table for search algorithm.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
Acoustic model structure.
Definition: acmod.h:148
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
a structure for a dictionary.
Definition: dict.h:76
Decoder object.
cmd_ln_t * config
Configuration.
ps_search_t * phone_loop
Phone loop search for lookahead.
char const * senlogdir
Log directory for senone score files.
int pl_window
Window size for phoneme lookahead.
uint32 uttno
Utterance counter.
uint32 n_frame
Total number of frames processed.
hash_table_t * searches
Set of search modules.
char const * mfclogdir
Log directory for MFCC files.
char const * rawlogdir
Log directory for audio files.
int refcount
Reference count.
ptmr_t perf
Performance counter for all of decoding.
logmath_t * lmath
Log math computation.
ps_search_t * search
Currently active search module.
dict2pid_t * d2p
Dictionary to senone mapping.
dict_t * dict
Pronunciation dictionary.
acmod_t * acmod
Acoustic model.
Word graph structure used in bestpath/nbest search.
Base structure for search module.
int32 finish_wid
Finish word ID.
acmod_t * acmod
Acoustic model.
ps_search_t * pls
Phoneme loop for lookahead.
int32 post
Utterance posterior probability.
dict2pid_t * d2p
Dictionary to senone mappings.
ps_lattice_t * dag
Current hypothesis word graph.
dict_t * dict
Pronunciation dictionary.
ps_latlink_t * last_link
Final link in best path.
char * hyp_str
Current hypothesis string.
ps_searchfuncs_t * vt
V-table of search methods.
cmd_ln_t * config
Configuration.
int32 silence_wid
Silence word ID.
int32 n_words
Number of words known to search (may be less than in the dictionary)
int32 start_wid
Start word ID.
V-table for search algorithm.
Base structure for hypothesis segmentation iterator.
ps_search_t * search
Search object from whence this came.
float32 lwf
Language weight factor (for second-pass searches)
int32 lback
Language model backoff.
ps_segfuncs_t * vt
V-table of seg methods.
int32 lscr
Language model score.
int32 ascr
Acoustic score.
frame_idx_t sf
Start frame.
char const * word
Word string (pointer into dictionary hash)
frame_idx_t ef
End frame.
int32 prob
Log posterior probability.