SphinxBase 5prealpha
jsgf.c
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2007 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#include <string.h>
39#include <assert.h>
40
42#include "sphinxbase/strfuncs.h"
44#include "sphinxbase/filename.h"
45#include "sphinxbase/err.h"
46#include "sphinxbase/jsgf.h"
47
48#include "jsgf_internal.h"
49#include "jsgf_parser.h"
50#include "jsgf_scanner.h"
51
52extern int yyparse(void *scanner, jsgf_t * jsgf);
53
61static int expand_rule(jsgf_t * grammar, jsgf_rule_t * rule,
62 int rule_entry, int rule_exit);
63
65jsgf_atom_new(char *name, float weight)
66{
67 jsgf_atom_t *atom;
68
69 atom = ckd_calloc(1, sizeof(*atom));
70 atom->name = ckd_salloc(name);
71 atom->weight = weight;
72 return atom;
73}
74
75int
76jsgf_atom_free(jsgf_atom_t * atom)
77{
78 if (atom == NULL)
79 return 0;
80 ckd_free(atom->name);
81 ckd_free(atom);
82 return 0;
83}
84
85jsgf_t *
87{
88 jsgf_t *grammar;
89
90 grammar = ckd_calloc(1, sizeof(*grammar));
91 /* If this is an imported/subgrammar, then we will share a global
92 * namespace with the parent grammar. */
93 if (parent) {
94 grammar->rules = parent->rules;
95 grammar->imports = parent->imports;
96 grammar->searchpath = parent->searchpath;
97 grammar->parent = parent;
98 }
99 else {
100 grammar->rules = hash_table_new(64, 0);
101 grammar->imports = hash_table_new(16, 0);
102 }
103
104 return grammar;
105}
106
107void
109{
110 /* FIXME: Probably should just use refcounting instead. */
111 if (jsgf->parent == NULL) {
112 hash_iter_t *itor;
113 gnode_t *gn;
114
115 for (itor = hash_table_iter(jsgf->rules); itor;
116 itor = hash_table_iter_next(itor)) {
117 ckd_free((char *) itor->ent->key);
118 jsgf_rule_free((jsgf_rule_t *) itor->ent->val);
119 }
120 hash_table_free(jsgf->rules);
121 for (itor = hash_table_iter(jsgf->imports); itor;
122 itor = hash_table_iter_next(itor)) {
123 ckd_free((char *) itor->ent->key);
124 jsgf_grammar_free((jsgf_t *) itor->ent->val);
125 }
127 for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
128 ckd_free(gnode_ptr(gn));
129 glist_free(jsgf->searchpath);
130 for (gn = jsgf->links; gn; gn = gnode_next(gn))
131 ckd_free(gnode_ptr(gn));
132 glist_free(jsgf->links);
133 }
134 ckd_free(jsgf->name);
135 ckd_free(jsgf->version);
136 ckd_free(jsgf->charset);
137 ckd_free(jsgf->locale);
138 ckd_free(jsgf);
139}
140
141static void
142jsgf_rhs_free(jsgf_rhs_t * rhs)
143{
144 gnode_t *gn;
145
146 if (rhs == NULL)
147 return;
148
149 jsgf_rhs_free(rhs->alt);
150 for (gn = rhs->atoms; gn; gn = gnode_next(gn))
151 jsgf_atom_free(gnode_ptr(gn));
152 glist_free(rhs->atoms);
153 ckd_free(rhs);
154}
155
157jsgf_kleene_new(jsgf_t * jsgf, jsgf_atom_t * atom, int plus)
158{
159 jsgf_rule_t *rule;
160 jsgf_atom_t *rule_atom;
161 jsgf_rhs_t *rhs;
162
163 /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */
164 /* Or if plus is true, (<name> | <name> <g0006>) */
165 rhs = ckd_calloc(1, sizeof(*rhs));
166 if (plus)
167 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
168 else
169 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
170 rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
171 rule_atom = jsgf_atom_new(rule->name, 1.0);
172 rhs = ckd_calloc(1, sizeof(*rhs));
173 rhs->atoms = glist_add_ptr(NULL, rule_atom);
174 rhs->atoms = glist_add_ptr(rhs->atoms, atom);
175 rule->rhs->alt = rhs;
176
177 return jsgf_atom_new(rule->name, 1.0);
178}
179
181jsgf_optional_new(jsgf_t * jsgf, jsgf_rhs_t * exp)
182{
183 jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
184 jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
185 rhs->alt = exp;
186 rhs->atoms = glist_add_ptr(NULL, atom);
187 return jsgf_define_rule(jsgf, NULL, rhs, 0);
188}
189
190void
191jsgf_add_link(jsgf_t * grammar, jsgf_atom_t * atom, int from, int to)
192{
193 jsgf_link_t *link;
194
195 link = ckd_calloc(1, sizeof(*link));
196 link->from = from;
197 link->to = to;
198 link->atom = atom;
199 grammar->links = glist_add_ptr(grammar->links, link);
200}
201
202static char *
203extract_grammar_name(char *rule_name)
204{
205 char *dot_pos;
206 char *grammar_name = ckd_salloc(rule_name + 1);
207 if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
208 ckd_free(grammar_name);
209 return NULL;
210 }
211 *dot_pos = '\0';
212 return grammar_name;
213}
214
215char const *
217{
218 return jsgf->name;
219}
220
221static char *
222jsgf_fullname(jsgf_t * jsgf, const char *name)
223{
224 char *fullname;
225
226 /* Check if it is already qualified */
227 if (strchr(name + 1, '.'))
228 return ckd_salloc(name);
229
230 /* Skip leading < in name */
231 fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
232 sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
233 return fullname;
234}
235
236static char *
237jsgf_fullname_from_rule(jsgf_rule_t * rule, const char *name)
238{
239 char *fullname, *grammar_name;
240
241 /* Check if it is already qualified */
242 if (strchr(name + 1, '.'))
243 return ckd_salloc(name);
244
245 /* Skip leading < in name */
246 if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
247 return ckd_salloc(name);
248 fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
249 sprintf(fullname, "<%s.%s", grammar_name, name + 1);
250 ckd_free(grammar_name);
251
252 return fullname;
253}
254
255/* Extract as rulename everything after the secondlast dot, if existent.
256 * Because everything before the secondlast dot is the path-specification. */
257static char *
258importname2rulename(char *importname)
259{
260 char *rulename = ckd_salloc(importname);
261 char *last_dotpos;
262 char *secondlast_dotpos;
263
264 if ((last_dotpos = strrchr(rulename + 1, '.')) != NULL) {
265 *last_dotpos = '\0';
266 if ((secondlast_dotpos = strrchr(rulename + 1, '.')) != NULL) {
267 *last_dotpos = '.';
268 *secondlast_dotpos = '<';
269 secondlast_dotpos = ckd_salloc(secondlast_dotpos);
270 ckd_free(rulename);
271 return secondlast_dotpos;
272 }
273 else {
274 *last_dotpos = '.';
275 return rulename;
276 }
277 }
278 else {
279 return rulename;
280 }
281}
282
283#define NO_NODE -1
284#define RECURSIVE_NODE -2
285
294static int
295expand_rhs(jsgf_t * grammar, jsgf_rule_t * rule, jsgf_rhs_t * rhs,
296 int rule_entry, int rule_exit)
297{
298 gnode_t *gn;
299 int lastnode;
300
301 /* Last node expanded in this sequence. */
302 lastnode = rule_entry;
303
304 /* Iterate over atoms in rhs and generate links/nodes */
305 for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
306 jsgf_atom_t *atom = gnode_ptr(gn);
307
308 if (jsgf_atom_is_rule(atom)) {
309 jsgf_rule_t *subrule;
310 char *fullname;
311 gnode_t *subnode;
312 jsgf_rule_stack_t *rule_stack_entry = NULL;
313
314 /* Special case for <NULL> and <VOID> pseudo-rules
315 If this is the only atom in the rhs, and it's the
316 first rhs in the rule, then emit a null transition,
317 creating an exit state if needed. */
318 if (0 == strcmp(atom->name, "<NULL>")) {
319 if (gn == rhs->atoms && gnode_next(gn) == NULL) {
320 if (rule_exit == NO_NODE) {
321 jsgf_add_link(grammar, atom,
322 lastnode, grammar->nstate);
323 rule_exit = lastnode = grammar->nstate;
324 ++grammar->nstate;
325 }
326 else {
327 jsgf_add_link(grammar, atom, lastnode, rule_exit);
328 }
329 }
330 continue;
331 }
332 else if (0 == strcmp(atom->name, "<VOID>")) {
333 /* Make this entire RHS unspeakable */
334 return NO_NODE;
335 }
336
337 fullname = jsgf_fullname_from_rule(rule, atom->name);
339 (grammar->rules, fullname, (void **) &subrule) == -1) {
340 E_ERROR("Undefined rule in RHS: %s\n", fullname);
341 ckd_free(fullname);
342 return NO_NODE;
343 }
344 ckd_free(fullname);
345
346 /* Look for this subrule in the stack of expanded rules */
347 for (subnode = grammar->rulestack; subnode;
348 subnode = gnode_next(subnode)) {
349 rule_stack_entry =
350 (jsgf_rule_stack_t *) gnode_ptr(subnode);
351 if (rule_stack_entry->rule == subrule)
352 break;
353 }
354
355 if (subnode != NULL) {
356 /* Allow right-recursion only. */
357 if (gnode_next(gn) != NULL) {
358 E_ERROR
359 ("Only right-recursion is permitted (in %s.%s)\n",
360 grammar->name, rule->name);
361 return NO_NODE;
362 }
363 /* Add a link back to the beginning of this rule instance */
364 E_INFO("Right recursion %s %d => %d\n", atom->name,
365 lastnode, rule_stack_entry->entry);
366 jsgf_add_link(grammar, atom, lastnode,
367 rule_stack_entry->entry);
368
369 /* Let our caller know that this rhs didn't reach an
370 end state. */
371 lastnode = RECURSIVE_NODE;
372 }
373 else {
374 /* If this is the last atom in this rhs, link its
375 expansion to the parent rule's exit state.
376 Otherwise, create a new exit state for it. */
377 int subruleexit = NO_NODE;
378 if (gnode_next(gn) == NULL && rule_exit >= 0)
379 subruleexit = rule_exit;
380
381 /* Expand the subrule */
382 lastnode =
383 expand_rule(grammar, subrule, lastnode, subruleexit);
384
385 if (lastnode == NO_NODE)
386 return NO_NODE;
387 }
388 }
389 else {
390 /* An exit-state is created if this isn't the last atom
391 in the rhs, or if the containing rule doesn't have an
392 exit state yet.
393 Otherwise, the rhs's exit state becomes the containing
394 rule's exit state. */
395 int exitstate;
396 if (gnode_next(gn) == NULL && rule_exit >= 0) {
397 exitstate = rule_exit;
398 }
399 else {
400 exitstate = grammar->nstate;
401 ++grammar->nstate;
402 }
403
404 /* Add a link for this token */
405 jsgf_add_link(grammar, atom, lastnode, exitstate);
406 lastnode = exitstate;
407 }
408 }
409
410 return lastnode;
411}
412
413static int
414expand_rule(jsgf_t * grammar, jsgf_rule_t * rule, int rule_entry,
415 int rule_exit)
416{
417 jsgf_rule_stack_t *rule_stack_entry;
418 jsgf_rhs_t *rhs;
419
420 /* Push this rule onto the stack */
421 rule_stack_entry =
423 rule_stack_entry->rule = rule;
424 rule_stack_entry->entry = rule_entry;
425 grammar->rulestack = glist_add_ptr(grammar->rulestack,
426 rule_stack_entry);
427
428 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
429 int lastnode;
430
431 lastnode = expand_rhs(grammar, rule, rhs, rule_entry, rule_exit);
432
433 if (lastnode == NO_NODE) {
434 return NO_NODE;
435 }
436 else if (lastnode == RECURSIVE_NODE) {
437 /* The rhs ended with right-recursion, i.e. a transition to
438 an earlier state. Nothing needs to happen at this level. */
439 ;
440 }
441 else if (rule_exit == NO_NODE) {
442 /* If this rule doesn't have an exit state yet, use the exit
443 state of its first right-hand-side.
444 All other right-hand-sides will use this exit state. */
445 assert(lastnode >= 0);
446 rule_exit = lastnode;
447 }
448 }
449
450 /* If no exit-state was created, use the entry-state. */
451 if (rule_exit == NO_NODE) {
452 rule_exit = rule_entry;
453 }
454
455 /* Pop this rule from the rule stack */
456 ckd_free(gnode_ptr(grammar->rulestack));
457 grammar->rulestack = gnode_free(grammar->rulestack, NULL);
458
459 return rule_exit;
460}
461
464{
465 return hash_table_iter(grammar->rules);
466}
467
469jsgf_get_rule(jsgf_t * grammar, char const *name)
470{
471 void *val;
472 char *fullname;
473
474 fullname = string_join("<", name, ">", NULL);
475 if (hash_table_lookup(grammar->rules, fullname, &val) < 0) {
476 ckd_free(fullname);
477 return NULL;
478 }
479 ckd_free(fullname);
480 return (jsgf_rule_t *) val;
481}
482
485{
486 jsgf_rule_iter_t *itor;
487 jsgf_rule_t *public_rule = NULL;
488
489 for (itor = jsgf_rule_iter(grammar); itor;
490 itor = jsgf_rule_iter_next(itor)) {
491 jsgf_rule_t *rule = jsgf_rule_iter_rule(itor);
492 if (jsgf_rule_public(rule)) {
493 const char *rule_name = jsgf_rule_name(rule);
494 char *dot_pos;
495 if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) {
496 public_rule = rule;
498 break;
499 }
500 if (0 ==
501 strncmp(rule_name + 1, jsgf_grammar_name(grammar),
502 dot_pos - rule_name - 1)) {
503 public_rule = rule;
505 break;
506 }
507 }
508 }
509 return public_rule;
510}
511
512char const *
514{
515 return rule->name;
516}
517
518int
520{
521 return rule->is_public;
522}
523
524static fsg_model_t *
525jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule,
526 logmath_t * lmath, float32 lw, int do_closure)
527{
528 fsg_model_t *fsg;
529 glist_t nulls;
530 gnode_t *gn;
531 int rule_entry, rule_exit;
532
533 /* Clear previous links */
534 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
535 ckd_free(gnode_ptr(gn));
536 }
537 glist_free(grammar->links);
538 grammar->links = NULL;
539 grammar->nstate = 0;
540
541 /* Create the top-level entry state, and expand the
542 top-level rule. */
543 rule_entry = grammar->nstate++;
544 rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE);
545
546 /* If no exit-state was created, create one. */
547 if (rule_exit == NO_NODE) {
548 rule_exit = grammar->nstate++;
549 jsgf_add_link(grammar, NULL, rule_entry, rule_exit);
550 }
551
552 fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
553 fsg->start_state = rule_entry;
554 fsg->final_state = rule_exit;
555 grammar->links = glist_reverse(grammar->links);
556 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
557 jsgf_link_t *link = gnode_ptr(gn);
558
559 if (link->atom) {
560 if (jsgf_atom_is_rule(link->atom)) {
561 fsg_model_null_trans_add(fsg, link->from, link->to,
562 logmath_log(lmath,
563 link->atom->weight));
564 }
565 else {
566 int wid = fsg_model_word_add(fsg, link->atom->name);
567 fsg_model_trans_add(fsg, link->from, link->to,
568 logmath_log(lmath, link->atom->weight),
569 wid);
570 }
571 }
572 else {
573 fsg_model_null_trans_add(fsg, link->from, link->to, 0);
574 }
575 }
576 if (do_closure) {
577 nulls = fsg_model_null_trans_closure(fsg, NULL);
578 glist_free(nulls);
579 }
580
581 return fsg;
582}
583
586 logmath_t * lmath, float32 lw)
587{
588 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
589}
590
593 logmath_t * lmath, float32 lw)
594{
595 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
596}
597
599jsgf_read_file(const char *file, logmath_t * lmath, float32 lw)
600{
601 fsg_model_t *fsg;
602 jsgf_rule_t *rule;
603 jsgf_t *jsgf;
604 jsgf_rule_iter_t *itor;
605
606 if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) {
607 E_ERROR("Error parsing file: %s\n", file);
608 return NULL;
609 }
610
611 rule = NULL;
612 for (itor = jsgf_rule_iter(jsgf); itor;
613 itor = jsgf_rule_iter_next(itor)) {
614 rule = jsgf_rule_iter_rule(itor);
615 if (jsgf_rule_public(rule)) {
617 break;
618 }
619 }
620 if (rule == NULL) {
621 E_ERROR("No public rules found in %s\n", file);
622 return NULL;
623 }
624 fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
625 jsgf_grammar_free(jsgf);
626 return fsg;
627}
628
630jsgf_read_string(const char *string, logmath_t * lmath, float32 lw)
631{
632 fsg_model_t *fsg;
633 jsgf_rule_t *rule;
634 jsgf_t *jsgf;
635 jsgf_rule_iter_t *itor;
636
637 if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) {
638 E_ERROR("Error parsing input string\n");
639 return NULL;
640 }
641
642 rule = NULL;
643 for (itor = jsgf_rule_iter(jsgf); itor;
644 itor = jsgf_rule_iter_next(itor)) {
645 rule = jsgf_rule_iter_rule(itor);
646 if (jsgf_rule_public(rule)) {
648 break;
649 }
650 }
651 if (rule == NULL) {
652 jsgf_grammar_free(jsgf);
653 E_ERROR("No public rules found in input string\n");
654 return NULL;
655 }
656 fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
657 jsgf_grammar_free(jsgf);
658 return fsg;
659}
660
661
662int
663jsgf_write_fsg(jsgf_t * grammar, jsgf_rule_t * rule, FILE * outfh)
664{
665 fsg_model_t *fsg;
666 logmath_t *lmath = logmath_init(1.0001, 0, 0);
667
668 if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL)
669 goto error_out;
670
671 fsg_model_write(fsg, outfh);
672 logmath_free(lmath);
673 return 0;
674
675 error_out:
676 logmath_free(lmath);
677 return -1;
678}
679
681jsgf_define_rule(jsgf_t * jsgf, char *name, jsgf_rhs_t * rhs,
682 int is_public)
683{
684 jsgf_rule_t *rule;
685 void *val;
686
687 if (name == NULL) {
688 name = ckd_malloc(strlen(jsgf->name) + 16);
689 sprintf(name, "<%s.g%05d>", jsgf->name,
690 hash_table_inuse(jsgf->rules));
691 }
692 else {
693 char *newname;
694
695 newname = jsgf_fullname(jsgf, name);
696 name = newname;
697 }
698
699 rule = ckd_calloc(1, sizeof(*rule));
700 rule->refcnt = 1;
701 rule->name = ckd_salloc(name);
702 rule->rhs = rhs;
703 rule->is_public = is_public;
704
705 E_INFO("Defined rule: %s%s\n",
706 rule->is_public ? "PUBLIC " : "", rule->name);
707 val = hash_table_enter(jsgf->rules, name, rule);
708 if (val != (void *) rule) {
709 E_WARN("Multiply defined symbol: %s\n", name);
710 }
711 return rule;
712}
713
715jsgf_rule_retain(jsgf_rule_t * rule)
716{
717 ++rule->refcnt;
718 return rule;
719}
720
721int
722jsgf_rule_free(jsgf_rule_t * rule)
723{
724 if (rule == NULL)
725 return 0;
726 if (--rule->refcnt > 0)
727 return rule->refcnt;
728 jsgf_rhs_free(rule->rhs);
729 ckd_free(rule->name);
730 ckd_free(rule);
731 return 0;
732}
733
734
735/* FIXME: This should go in libsphinxutil */
736static char *
737path_list_search(glist_t paths, char *path)
738{
739 gnode_t *gn;
740
741 for (gn = paths; gn; gn = gnode_next(gn)) {
742 char *fullpath;
743 FILE *tmp;
744
745 fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
746 tmp = fopen(fullpath, "r");
747 if (tmp != NULL) {
748 fclose(tmp);
749 return fullpath;
750 }
751 else {
752 ckd_free(fullpath);
753 }
754 }
755 return NULL;
756}
757
759jsgf_import_rule(jsgf_t * jsgf, char *name)
760{
761 char *c, *path, *newpath;
762 size_t namelen, packlen;
763 void *val;
764 jsgf_t *imp;
765 int import_all;
766
767 /* Trim the leading and trailing <> */
768 namelen = strlen(name);
769 path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */
770 strcpy(path, name + 1);
771 /* Split off the first part of the name */
772 c = strrchr(path, '.');
773 if (c == NULL) {
774 E_ERROR("Imported rule is not qualified: %s\n", name);
775 ckd_free(path);
776 return NULL;
777 }
778 packlen = c - path;
779 *c = '\0';
780
781 /* Look for import foo.* */
782 import_all = (strlen(name) > 2
783 && 0 == strcmp(name + namelen - 3, ".*>"));
784
785 /* Construct a filename. */
786 for (c = path; *c; ++c)
787 if (*c == '.')
788 *c = '/';
789 strcat(path, ".gram");
790 newpath = path_list_search(jsgf->searchpath, path);
791 if (newpath == NULL) {
792 E_ERROR("Failed to find grammar %s\n", path);
793 ckd_free(path);
794 return NULL;
795 }
796 ckd_free(path);
797
798 path = newpath;
799 E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
800
801 /* FIXME: Also, we need to make sure that path is fully qualified
802 * here, by adding any prefixes from jsgf->name to it. */
803 /* See if we have parsed it already */
804 if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
805 E_INFO("Already imported %s\n", path);
806 imp = val;
807 ckd_free(path);
808 }
809 else {
810 /* If not, parse it. */
811 imp = jsgf_parse_file(path, jsgf);
812 val = hash_table_enter(jsgf->imports, path, imp);
813 if (val != (void *) imp) {
814 E_WARN("Multiply imported file: %s\n", path);
815 }
816 }
817 if (imp != NULL) {
818 hash_iter_t *itor;
819 /* Look for public rules matching rulename. */
820 for (itor = hash_table_iter(imp->rules); itor;
821 itor = hash_table_iter_next(itor)) {
822 hash_entry_t *he = itor->ent;
823 jsgf_rule_t *rule = hash_entry_val(he);
824 int rule_matches;
825 char *rule_name = importname2rulename(name);
826
827 if (import_all) {
828 /* Match package name (symbol table is shared) */
829 rule_matches =
830 !strncmp(rule_name, rule->name, packlen + 1);
831 }
832 else {
833 /* Exact match */
834 rule_matches = !strcmp(rule_name, rule->name);
835 }
836 ckd_free(rule_name);
837 if (rule->is_public && rule_matches) {
838 void *val;
839 char *newname;
840
841 /* Link this rule into the current namespace. */
842 c = strrchr(rule->name, '.');
843 assert(c != NULL);
844 newname = jsgf_fullname(jsgf, c);
845
846 E_INFO("Imported %s\n", newname);
847 val = hash_table_enter(jsgf->rules, newname,
848 jsgf_rule_retain(rule));
849 if (val != (void *) rule) {
850 E_WARN("Multiply defined symbol: %s\n", newname);
851 }
852 if (!import_all) {
854 return rule;
855 }
856 }
857 }
858 }
859
860 return NULL;
861}
862
863static void
864jsgf_set_search_path(jsgf_t * jsgf, const char *filename)
865{
866 char *jsgf_path;
867
868#if !defined(_WIN32_WCE)
869 if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
870 char *word, *c;
871 /* FIXME: This should be a function in libsphinxbase. */
872 word = jsgf_path = ckd_salloc(jsgf_path);
873 while ((c = strchr(word, ':'))) {
874 *c = '\0';
875 jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
876 word = c + 1;
877 }
878 jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
879 jsgf->searchpath = glist_reverse(jsgf->searchpath);
880 return;
881 }
882#endif
883
884 if (!filename) {
885 jsgf->searchpath =
887 return;
888 }
889
890 jsgf_path = ckd_salloc(filename);
891 path2dirname(filename, jsgf_path);
892 jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path);
893}
894
895jsgf_t *
896jsgf_parse_file(const char *filename, jsgf_t * parent)
897{
898 yyscan_t yyscanner;
899 jsgf_t *jsgf;
900 int yyrv;
901 FILE *in = NULL;
902
903 yylex_init(&yyscanner);
904 if (filename == NULL) {
905 yyset_in(stdin, yyscanner);
906 }
907 else {
908 in = fopen(filename, "r");
909 if (in == NULL) {
910 E_ERROR_SYSTEM("Failed to open %s for parsing", filename);
911 return NULL;
912 }
913 yyset_in(in, yyscanner);
914 }
915
916 jsgf = jsgf_grammar_new(parent);
917
918 if (!parent)
919 jsgf_set_search_path(jsgf, filename);
920
921 yyrv = yyparse(yyscanner, jsgf);
922 if (yyrv != 0) {
923 E_ERROR("Failed to parse JSGF grammar from '%s'\n",
924 filename ? filename : "(stdin)");
925 jsgf_grammar_free(jsgf);
926 yylex_destroy(yyscanner);
927 return NULL;
928 }
929 if (in)
930 fclose(in);
931 yylex_destroy(yyscanner);
932
933 return jsgf;
934}
935
936jsgf_t *
937jsgf_parse_string(const char *string, jsgf_t * parent)
938{
939 yyscan_t yyscanner;
940 jsgf_t *jsgf;
941 int yyrv;
942 YY_BUFFER_STATE buf;
943
944 yylex_init(&yyscanner);
945 buf = yy_scan_string(string, yyscanner);
946
947 jsgf = jsgf_grammar_new(parent);
948 if (!parent)
949 jsgf_set_search_path(jsgf, NULL);
950
951 yyrv = yyparse(yyscanner, jsgf);
952 if (yyrv != 0) {
953 E_ERROR("Failed to parse JSGF grammar from input string\n");
954 jsgf_grammar_free(jsgf);
955 yy_delete_buffer(buf, yyscanner);
956 yylex_destroy(yyscanner);
957 return NULL;
958 }
959 yy_delete_buffer(buf, yyscanner);
960 yylex_destroy(yyscanner);
961
962 return jsgf;
963}
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition ckd_alloc.c:244
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition ckd_alloc.h:253
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition ckd_alloc.h:264
Implementation of logging routines.
#define E_ERROR(...)
Print error message to error log.
Definition err.h:104
#define E_INFO(...)
Print logging information to standard error stream.
Definition err.h:114
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Definition err.h:99
#define E_WARN(...)
Print warning message to error log.
Definition err.h:109
File names related operation.
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
Definition filename.c:68
SPHINXBASE_EXPORT glist_t glist_reverse(glist_t g)
Reverse the order of the given glist.
Definition glist.c:169
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed.
Definition glist.c:133
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition glist.c:74
SPHINXBASE_EXPORT gnode_t * gnode_free(gnode_t *gn, gnode_t *pred)
Free the given node, gn, of a glist, pred being its predecessor in the list.
Definition glist.c:257
#define gnode_ptr(g)
Head of a list of gnodes.
Definition glist.h:109
Hash table implementation.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition hash_table.c:688
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition hash_table.c:656
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition hash_table.c:682
SPHINXBASE_EXPORT int32 hash_table_lookup(hash_table_t *h, const char *key, void **val)
Look up a key in a hash table and optionally return the associated value.
Definition hash_table.c:302
#define hash_entry_val(e)
Access macros.
Definition hash_table.h:175
SPHINXBASE_EXPORT void * hash_table_enter(hash_table_t *h, const char *key, void *val)
Try to add a new entry with given key and associated value to hash table h.
Definition hash_table.c:501
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition hash_table.c:646
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition hash_table.c:158
jsgf_rule_iter_t * jsgf_rule_iter(jsgf_t *grammar)
Get an iterator over all rules in a grammar.
Definition jsgf.c:463
void jsgf_grammar_free(jsgf_t *jsgf)
Free a JSGF grammar.
Definition jsgf.c:108
int jsgf_rule_public(jsgf_rule_t *rule)
Test if a rule is public or not.
Definition jsgf.c:519
char const * jsgf_rule_name(jsgf_rule_t *rule)
Get the rule name from a rule.
Definition jsgf.c:513
fsg_model_t * jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition jsgf.c:592
jsgf_t * jsgf_grammar_new(jsgf_t *parent)
Create a new JSGF grammar.
Definition jsgf.c:86
fsg_model_t * jsgf_read_string(const char *string, logmath_t *lmath, float32 lw)
Read JSGF from string and return FSG object from it.
Definition jsgf.c:630
int jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
Convert a JSGF rule to Sphinx FSG text form.
Definition jsgf.c:663
fsg_model_t * jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition jsgf.c:585
fsg_model_t * jsgf_read_file(const char *file, logmath_t *lmath, float32 lw)
Read JSGF from file and return FSG object from it.
Definition jsgf.c:599
jsgf_rule_t * jsgf_get_public_rule(jsgf_t *grammar)
Returns the first public rule of the grammar.
Definition jsgf.c:484
char const * jsgf_grammar_name(jsgf_t *jsgf)
Get the grammar name from the file.
Definition jsgf.c:216
jsgf_t * jsgf_parse_string(const char *string, jsgf_t *parent)
Parse a JSGF grammar from a string.
Definition jsgf.c:937
jsgf_rule_t * jsgf_get_rule(jsgf_t *grammar, char const *name)
Get a rule by name from a grammar.
Definition jsgf.c:469
jsgf_t * jsgf_parse_file(const char *filename, jsgf_t *parent)
Parse a JSGF grammar from a file.
Definition jsgf.c:896
JSGF grammar compiler.
#define jsgf_rule_iter_rule(itor)
Get the current rule in a rule iterator.
Definition jsgf.h:127
#define jsgf_rule_iter_free(itor)
Free a rule iterator (if the end hasn't been reached).
Definition jsgf.h:132
#define jsgf_rule_iter_next(itor)
Advance an iterator to the next rule in the grammar.
Definition jsgf.h:122
Internal definitions for JSGF grammar compiler.
SPHINXBASE_EXPORT int logmath_free(logmath_t *lmath)
Free a log table.
Definition logmath.c:342
SPHINXBASE_EXPORT logmath_t * logmath_init(float64 base, int shift, int use_table)
Initialize a log math computation table.
Definition logmath.c:62
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition logmath.c:447
Miscellaneous useful string functions.
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string.
Definition strfuncs.c:70
Word level FSG definition.
Definition fsg_model.h:99
int32 start_state
Must be in the range [0..n_state-1].
Definition fsg_model.h:109
int32 final_state
Must be in the range [0..n_state-1].
Definition fsg_model.h:110
A node in a generic list.
Definition glist.h:100
A note by ARCHAN at 20050510: Technically what we use is so-called "hash table with buckets" which is...
Definition hash_table.h:149
void * val
Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...
Definition hash_table.h:155
hash_entry_t * ent
Current entry in that table.
Definition hash_table.h:170
float weight
Weight (default 1)
char * name
Rule or token name.
glist_t atoms
Sequence of items.
jsgf_rhs_t * alt
Linked list of alternates.
char * name
Rule name (NULL for an alternation/grouping)
int is_public
Is this rule marked 'public'?
jsgf_rhs_t * rhs
Expansion.
int refcnt
Reference count.
int entry
The entry-state for this expansion.
jsgf_rule_t * rule
The rule being expanded.
char * locale
JSGF locale (default C)
glist_t rulestack
Stack of currently expanded rules.
int nstate
Number of generated states.
glist_t links
Generated FSG links.
hash_table_t * imports
Pointers to imported grammars.
glist_t searchpath
List of directories to search for grammars.
char * name
Grammar name.
char * charset
JSGF charset (default UTF-8)
char * version
JSGF version (from header)
jsgf_t * parent
Parent grammar (if this is an imported one)
hash_table_t * rules
Defined or imported rules in this grammar.