Module nltk_lite.corpora.ycoe
Reads tokens from the York-Toronto-Helsinki Parsed Corpus of
Old English Prose (YCOE), a 1.5 million word syntactically-
annotated corpus of Old English prose texts. The corpus is
distributed by the Oxford Text Archive: http://www.ota.ahds.ac.uk/
The YCOE corpus is divided into 100 files, each representing
an Old English prose text. Tags used within each text complies
to the YCOE standard: http://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm
Output of the reader is as follows:
Raw:
['+D+atte',
'on',
'o+dre',
'wisan',
'sint',
'to',
'manianne',
'+da',
'unge+dyldegan',
',',
'&',
'on',
'o+dre',
'+da',
'ge+dyldegan',
'.']
Tagged:
[('+D+atte', 'C'),
('on', 'P'),
('o+dre', 'ADJ'),
('wisan', 'N'),
('sint', 'BEPI'),
('to', 'TO'),
('manianne', 'VB^D'),
('+da', 'D^N'),
('unge+dyldegan', 'ADJ^N'),
(',', ','),
('&', 'CONJ'),
('on', 'P'),
('o+dre', 'ADJ'),
('+da', 'D^N'),
('ge+dyldegan', 'ADJ^N'),
('.', '.')]
Bracket Parse:
(CP-THT: (C: '+D+atte') (IP-SUB: (IP-SUB-0: (PP: (P: 'on') (NP: (ADJ: 'o+dre') (N: 'wisan')))
(BEPI: 'sint') (IP-INF: (TO: 'to') (VB^D: 'manianne') (NP: '*-1')) (NP-NOM-1: (D^N: '+da')
(ADJ^N: 'unge+dyldegan'))) (,: ',') (CONJP: (CONJ: '&') (IPX-SUB-CON=0: (PP: (P: 'on')
(NP: (ADJ: 'o+dre'))) (NP-NOM: (D^N: '+da') (ADJ^N: 'ge+dyldegan'))))) (.: '.')),
Chunk Parse:
[(S:
('C', '+D+atte')
(PP: ('P', 'on') ('ADJ', 'o+dre') ('N', 'wisan'))
('BEPI', 'sint') ('TO', 'to') ('VB^D', 'manianne')
(NP: ('NP', '*-1')) ('D^N', '+da') ('ADJ^N', 'unge+dyldegan') (',', ',') ('CONJ', '&')
(PP: ('P', 'on') ('ADJ', 'o+dre')) ('D^N', '+da') ('ADJ^N', 'ge+dyldegan') ('.', '.'))]
Function Summary |
|
bracket_parse(files)
|
|
chunked(files,
chunk_types,
top_node,
partial_match,
collapse_partials,
cascade)
|
|
demo()
|
|
raw(files)
|
|
tagged(files)
|
|
_chunk_parse(files,
chunk_types,
top_node,
partial_match,
collapse_partials,
cascade)
|
|
_parse(s)
|
|
_read(files,
conversion_function)
|
|
_strip_spaces(s)
|
Variable Summary |
dict |
item_name = {'coprefcura.o2': 'Preface to the Cura Pasto...
|
list |
items = ['coprefcura.o2', 'cosolsat2', 'coprefsolilo', '...
|
item_name
-
- Type:
-
dict
- Value:
{'coalex.o23': 'Alexanders Letter to Aristotle',
'cocathom1.o3': '\xc6lfrics Catholic Homilies I',
'cochdrul': 'Chrodegang of Metz, Rule',
'colawnorthu.o3': 'Northumbra Preosta Lagu',
'colawwllad.o4': 'Laws, William I, Lad',
'comarvel.o23': 'Marvels of the East',
'coprefcura.o2': 'Preface to the Cura Pastoralis',
'coprefsolilo': 'Preface to Augustines Soliloquies',
...
|
|
items
-
- Type:
-
list
- Value:
['coprefcura.o2',
'cosolsat2',
'coprefsolilo',
'comarvel.o23',
'cochdrul',
'coalex.o23',
'colawwllad.o4',
'cocathom1.o3',
...
|
|