• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • kdecore
  • localization
  • probers
nsMBCSSM.cpp
Go to the documentation of this file.
1/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* -*- C++ -*-
3* Copyright (C) 1998 <developer@mozilla.org>
4*
5*
6* Permission is hereby granted, free of charge, to any person obtaining
7* a copy of this software and associated documentation files (the
8* "Software"), to deal in the Software without restriction, including
9* without limitation the rights to use, copy, modify, merge, publish,
10* distribute, sublicense, and/or sell copies of the Software, and to
11* permit persons to whom the Software is furnished to do so, subject to
12* the following conditions:
13*
14* The above copyright notice and this permission notice shall be included
15* in all copies or substantial portions of the Software.
16*
17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24*/
25
26#include "nsCodingStateMachine.h"
27
28/*
29Modification from frank tang's original work:
30. 0x00 is allowed as a legal character. Since some web pages contains this char in
31 text stream.
32*/
33
34// BIG5
35
36namespace kencodingprober {
37static unsigned int BIG5_cls [ 256 / 8 ] = {
38//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
39PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as legal value
40PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
41PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
42PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
43PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
44PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
45PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
46PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
47PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
48PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
49PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
50PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
51PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
52PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
53PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
54PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f
55PCK4BITS(4,4,4,4,4,4,4,4), // 80 - 87
56PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f
57PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97
58PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f
59PCK4BITS(4,3,3,3,3,3,3,3), // a0 - a7
60PCK4BITS(3,3,3,3,3,3,3,3), // a8 - af
61PCK4BITS(3,3,3,3,3,3,3,3), // b0 - b7
62PCK4BITS(3,3,3,3,3,3,3,3), // b8 - bf
63PCK4BITS(3,3,3,3,3,3,3,3), // c0 - c7
64PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf
65PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7
66PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df
67PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
68PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef
69PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7
70PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff
71};
72
73
74static unsigned int BIG5_st [ 3] = {
75PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07
76PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError),//08-0f
77PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17
78};
79
80static const unsigned int Big5CharLenTable[] = {0, 1, 1, 2, 0};
81
82SMModel Big5SMModel = {
83 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls },
84 5,
85 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st },
86 Big5CharLenTable,
87 "Big5",
88};
89
90static unsigned int EUCJP_cls [ 256 / 8 ] = {
91//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
92PCK4BITS(4,4,4,4,4,4,4,4), // 00 - 07
93PCK4BITS(4,4,4,4,4,4,5,5), // 08 - 0f
94PCK4BITS(4,4,4,4,4,4,4,4), // 10 - 17
95PCK4BITS(4,4,4,5,4,4,4,4), // 18 - 1f
96PCK4BITS(4,4,4,4,4,4,4,4), // 20 - 27
97PCK4BITS(4,4,4,4,4,4,4,4), // 28 - 2f
98PCK4BITS(4,4,4,4,4,4,4,4), // 30 - 37
99PCK4BITS(4,4,4,4,4,4,4,4), // 38 - 3f
100PCK4BITS(4,4,4,4,4,4,4,4), // 40 - 47
101PCK4BITS(4,4,4,4,4,4,4,4), // 48 - 4f
102PCK4BITS(4,4,4,4,4,4,4,4), // 50 - 57
103PCK4BITS(4,4,4,4,4,4,4,4), // 58 - 5f
104PCK4BITS(4,4,4,4,4,4,4,4), // 60 - 67
105PCK4BITS(4,4,4,4,4,4,4,4), // 68 - 6f
106PCK4BITS(4,4,4,4,4,4,4,4), // 70 - 77
107PCK4BITS(4,4,4,4,4,4,4,4), // 78 - 7f
108PCK4BITS(5,5,5,5,5,5,5,5), // 80 - 87
109PCK4BITS(5,5,5,5,5,5,1,3), // 88 - 8f
110PCK4BITS(5,5,5,5,5,5,5,5), // 90 - 97
111PCK4BITS(5,5,5,5,5,5,5,5), // 98 - 9f
112PCK4BITS(5,2,2,2,2,2,2,2), // a0 - a7
113PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
114PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
115PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
116PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
117PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
118PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
119PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
120PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7
121PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef
122PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7
123PCK4BITS(0,0,0,0,0,0,0,5) // f8 - ff
124};
125
126
127static unsigned int EUCJP_st [ 5] = {
128PCK4BITS( 3, 4, 3, 5,eStart,eError,eError,eError),//00-07
129PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
130PCK4BITS(eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError),//10-17
131PCK4BITS(eError,eError,eStart,eError,eError,eError, 3,eError),//18-1f
132PCK4BITS( 3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27
133};
134
135static const unsigned int EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
136
137SMModel EUCJPSMModel = {
138 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls },
139 6,
140 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st },
141 EUCJPCharLenTable,
142 "EUC-JP",
143};
144
145static unsigned int EUCKR_cls [ 256 / 8 ] = {
146//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
147PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
148PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
149PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
150PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
151PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
152PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
153PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
154PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
155PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
156PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
157PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
158PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
159PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
160PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
161PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
162PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
163PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87
164PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
165PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
166PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
167PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7
168PCK4BITS(2,2,2,2,2,3,3,3), // a8 - af
169PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
170PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
171PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
172PCK4BITS(2,3,2,2,2,2,2,2), // c8 - cf
173PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
174PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
175PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
176PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
177PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
178PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
179};
180
181
182static unsigned int EUCKR_st [ 2] = {
183PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07
184PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
185};
186
187static const unsigned int EUCKRCharLenTable[] = {0, 1, 2, 0};
188
189SMModel EUCKRSMModel = {
190 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls },
191 4,
192 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st },
193 EUCKRCharLenTable,
194 "EUC-KR",
195};
196
197/* obsolete GB2312 by gb18030
198static unsigned int GB2312_cls [ 256 / 8 ] = {
199//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
200PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
201PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
202PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
203PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
204PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
205PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
206PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
207PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
208PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
209PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
210PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
211PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
212PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
213PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
214PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
215PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
216PCK4BITS(1,0,0,0,0,0,0,0), // 80 - 87
217PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
218PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
219PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
220PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7
221PCK4BITS(2,2,3,3,3,3,3,3), // a8 - af
222PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
223PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
224PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
225PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
226PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
227PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
228PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
229PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
230PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
231PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
232};
233
234
235static unsigned int GB2312_st [ 2] = {
236PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07
237PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
238};
239
240static const unsigned int GB2312CharLenTable[] = {0, 1, 2, 0};
241
242SMModel GB2312SMModel = {
243 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls },
244 4,
245 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st },
246 GB2312CharLenTable,
247 "GB2312",
248};
249*/
250
251// the following state machine data was created by perl script in
252// intl/chardet/tools. It should be the same as in PSM detector.
253static unsigned int GB18030_cls [ 256 / 8 ] = {
254PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
255PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
256PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
257PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
258PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
259PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
260PCK4BITS(3,3,3,3,3,3,3,3), // 30 - 37
261PCK4BITS(3,3,1,1,1,1,1,1), // 38 - 3f
262PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
263PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
264PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
265PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
266PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
267PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
268PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
269PCK4BITS(2,2,2,2,2,2,2,4), // 78 - 7f
270PCK4BITS(5,6,6,6,6,6,6,6), // 80 - 87
271PCK4BITS(6,6,6,6,6,6,6,6), // 88 - 8f
272PCK4BITS(6,6,6,6,6,6,6,6), // 90 - 97
273PCK4BITS(6,6,6,6,6,6,6,6), // 98 - 9f
274PCK4BITS(6,6,6,6,6,6,6,6), // a0 - a7
275PCK4BITS(6,6,6,6,6,6,6,6), // a8 - af
276PCK4BITS(6,6,6,6,6,6,6,6), // b0 - b7
277PCK4BITS(6,6,6,6,6,6,6,6), // b8 - bf
278PCK4BITS(6,6,6,6,6,6,6,6), // c0 - c7
279PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
280PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
281PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
282PCK4BITS(6,6,6,6,6,6,6,6), // e0 - e7
283PCK4BITS(6,6,6,6,6,6,6,6), // e8 - ef
284PCK4BITS(6,6,6,6,6,6,6,6), // f0 - f7
285PCK4BITS(6,6,6,6,6,6,6,0) // f8 - ff
286};
287
288
289static unsigned int GB18030_st [ 6] = {
290PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart, 3,eError),//00-07
291PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f
292PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart),//10-17
293PCK4BITS( 4,eError,eStart,eStart,eError,eError,eError,eError),//18-1f
294PCK4BITS(eError,eError, 5,eError,eError,eError,eItsMe,eError),//20-27
295PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
296};
297
298// To be accurate, the length of class 6 can be either 2 or 4.
299// But it is not necessary to discriminate between the two since
300// it is used for frequency analysis only, and we are validing
301// each code range there as well. So it is safe to set it to be
302// 2 here.
303static const unsigned int GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
304
305SMModel GB18030SMModel = {
306 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
307 7,
308 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
309 GB18030CharLenTable,
310 "GB18030",
311};
312
313// sjis
314
315static unsigned int SJIS_cls [ 256 / 8 ] = {
316//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
317PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
318PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
319PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
320PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
321PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
322PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
323PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
324PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
325PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
326PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
327PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
328PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
329PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
330PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
331PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
332PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f
333PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87
334PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f
335PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97
336PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f
337//0xa0 is illegal in sjis encoding, but some pages does
338//contain such byte. We need to be more error forgiven.
339PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
340PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
341PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
342PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
343PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
344PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
345PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
346PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
347PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7
348PCK4BITS(3,3,3,3,3,4,4,4), // e8 - ef
349PCK4BITS(4,4,4,4,4,4,4,4), // f0 - f7
350PCK4BITS(4,4,4,4,4,0,0,0) // f8 - ff
351};
352
353
354static unsigned int SJIS_st [ 3] = {
355PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07
356PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
357PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17
358};
359
360static const unsigned int SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
361
362SMModel SJISSMModel = {
363 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls },
364 6,
365 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st },
366 SJISCharLenTable,
367 "Shift_JIS",
368};
369
370
371static unsigned int UCS2BE_cls [ 256 / 8 ] = {
372PCK4BITS(0,0,0,0,0,0,0,0), // 00 - 07
373PCK4BITS(0,0,1,0,0,2,0,0), // 08 - 0f
374PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
375PCK4BITS(0,0,0,3,0,0,0,0), // 18 - 1f
376PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27
377PCK4BITS(0,3,3,3,3,3,0,0), // 28 - 2f
378PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
379PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
380PCK4BITS(0,0,0,0,0,0,0,0), // 40 - 47
381PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
382PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
383PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
384PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
385PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
386PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
387PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
388PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87
389PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
390PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
391PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
392PCK4BITS(0,0,0,0,0,0,0,0), // a0 - a7
393PCK4BITS(0,0,0,0,0,0,0,0), // a8 - af
394PCK4BITS(0,0,0,0,0,0,0,0), // b0 - b7
395PCK4BITS(0,0,0,0,0,0,0,0), // b8 - bf
396PCK4BITS(0,0,0,0,0,0,0,0), // c0 - c7
397PCK4BITS(0,0,0,0,0,0,0,0), // c8 - cf
398PCK4BITS(0,0,0,0,0,0,0,0), // d0 - d7
399PCK4BITS(0,0,0,0,0,0,0,0), // d8 - df
400PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7
401PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef
402PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7
403PCK4BITS(0,0,0,0,0,0,4,5) // f8 - ff
404};
405
406
407static unsigned int UCS2BE_st [ 7] = {
408PCK4BITS( 5, 7, 7,eError, 4, 3,eError,eError),//00-07
409PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
410PCK4BITS(eItsMe,eItsMe, 6, 6, 6, 6,eError,eError),//10-17
411PCK4BITS( 6, 6, 6, 6, 6,eItsMe, 6, 6),//18-1f
412PCK4BITS( 6, 6, 6, 6, 5, 7, 7,eError),//20-27
413PCK4BITS( 5, 8, 6, 6,eError, 6, 6, 6),//28-2f
414PCK4BITS( 6, 6, 6, 6,eError,eError,eStart,eStart) //30-37
415};
416
417static const unsigned int UCS2BECharLenTable[] = {2, 2, 2, 0, 2, 2};
418
419SMModel UCS2BESMModel = {
420 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_cls },
421 6,
422 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_st },
423 UCS2BECharLenTable,
424 "UTF-16BE",
425};
426
427static unsigned int UCS2LE_cls [ 256 / 8 ] = {
428PCK4BITS(0,0,0,0,0,0,0,0), // 00 - 07
429PCK4BITS(0,0,1,0,0,2,0,0), // 08 - 0f
430PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
431PCK4BITS(0,0,0,3,0,0,0,0), // 18 - 1f
432PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27
433PCK4BITS(0,3,3,3,3,3,0,0), // 28 - 2f
434PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
435PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
436PCK4BITS(0,0,0,0,0,0,0,0), // 40 - 47
437PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
438PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
439PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
440PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
441PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
442PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
443PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
444PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87
445PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
446PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
447PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
448PCK4BITS(0,0,0,0,0,0,0,0), // a0 - a7
449PCK4BITS(0,0,0,0,0,0,0,0), // a8 - af
450PCK4BITS(0,0,0,0,0,0,0,0), // b0 - b7
451PCK4BITS(0,0,0,0,0,0,0,0), // b8 - bf
452PCK4BITS(0,0,0,0,0,0,0,0), // c0 - c7
453PCK4BITS(0,0,0,0,0,0,0,0), // c8 - cf
454PCK4BITS(0,0,0,0,0,0,0,0), // d0 - d7
455PCK4BITS(0,0,0,0,0,0,0,0), // d8 - df
456PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7
457PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef
458PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7
459PCK4BITS(0,0,0,0,0,0,4,5) // f8 - ff
460};
461
462
463static unsigned int UCS2LE_st [ 7] = {
464PCK4BITS( 6, 6, 7, 6, 4, 3,eError,eError),//00-07
465PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
466PCK4BITS(eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError),//10-17
467PCK4BITS( 5, 5, 5,eError, 5,eError, 6, 6),//18-1f
468PCK4BITS( 7, 6, 8, 8, 5, 5, 5,eError),//20-27
469PCK4BITS( 5, 5, 5,eError,eError,eError, 5, 5),//28-2f
470PCK4BITS( 5, 5, 5,eError, 5,eError,eStart,eStart) //30-37
471};
472
473static const unsigned int UCS2LECharLenTable[] = {2, 2, 2, 2, 2, 2};
474
475SMModel UCS2LESMModel = {
476 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_cls },
477 6,
478 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_st },
479 UCS2LECharLenTable,
480 "UTF-16LE",
481};
482
483
484static unsigned int UTF8_cls [ 256 / 8 ] = {
485//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
486PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as a legal value
487PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
488PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
489PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
490PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
491PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
492PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
493PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
494PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
495PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
496PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
497PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
498PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
499PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
500PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
501PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
502PCK4BITS(2,2,2,2,3,3,3,3), // 80 - 87
503PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f
504PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97
505PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f
506PCK4BITS(5,5,5,5,5,5,5,5), // a0 - a7
507PCK4BITS(5,5,5,5,5,5,5,5), // a8 - af
508PCK4BITS(5,5,5,5,5,5,5,5), // b0 - b7
509PCK4BITS(5,5,5,5,5,5,5,5), // b8 - bf
510PCK4BITS(0,0,6,6,6,6,6,6), // c0 - c7
511PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
512PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
513PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
514PCK4BITS(7,8,8,8,8,8,8,8), // e0 - e7
515PCK4BITS(8,8,8,8,8,9,8,8), // e8 - ef
516PCK4BITS(10,11,11,11,11,11,11,11), // f0 - f7
517PCK4BITS(12,13,13,13,14,15,0,0) // f8 - ff
518};
519
520
521static unsigned int UTF8_st [ 26] = {
522PCK4BITS(eError,eStart,eError,eError,eError,eError, 12, 10),//00-07
523PCK4BITS( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f
524PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//10-17
525PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//18-1f
526PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//20-27
527PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//28-2f
528PCK4BITS(eError,eError, 5, 5, 5, 5,eError,eError),//30-37
529PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//38-3f
530PCK4BITS(eError,eError,eError, 5, 5, 5,eError,eError),//40-47
531PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//48-4f
532PCK4BITS(eError,eError, 7, 7, 7, 7,eError,eError),//50-57
533PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//58-5f
534PCK4BITS(eError,eError,eError,eError, 7, 7,eError,eError),//60-67
535PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//68-6f
536PCK4BITS(eError,eError, 9, 9, 9, 9,eError,eError),//70-77
537PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//78-7f
538PCK4BITS(eError,eError,eError,eError,eError, 9,eError,eError),//80-87
539PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//88-8f
540PCK4BITS(eError,eError, 12, 12, 12, 12,eError,eError),//90-97
541PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//98-9f
542PCK4BITS(eError,eError,eError,eError,eError, 12,eError,eError),//a0-a7
543PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//a8-af
544PCK4BITS(eError,eError, 12, 12, 12,eError,eError,eError),//b0-b7
545PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//b8-bf
546PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eError,eError),//c0-c7
547PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf
548};
549
550static const unsigned int UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3,
551 3, 3, 4, 4, 5, 5, 6, 6 };
552
553SMModel UTF8SMModel = {
554 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
555 16,
556 {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
557 UTF8CharLenTable,
558 "UTF-8",
559};
560}
561
562
kencodingprober
Definition: CharDistribution.cpp:37
kencodingprober::UCS2LESMModel
KDE_NO_EXPORT SMModel UCS2LESMModel
Definition: nsMBCSSM.cpp:475
kencodingprober::EUCJPSMModel
KDE_NO_EXPORT SMModel EUCJPSMModel
Definition: nsMBCSSM.cpp:137
kencodingprober::BIG5_st
static unsigned int BIG5_st[3]
Definition: nsMBCSSM.cpp:74
kencodingprober::UTF8_cls
static unsigned int UTF8_cls[256/8]
Definition: nsMBCSSM.cpp:484
kencodingprober::eBitSft4bits
@ eBitSft4bits
Definition: nsPkgInt.h:43
kencodingprober::BIG5_cls
static unsigned int BIG5_cls[256/8]
Definition: nsMBCSSM.cpp:37
kencodingprober::EUCKRCharLenTable
static const unsigned int EUCKRCharLenTable[]
Definition: nsMBCSSM.cpp:187
kencodingprober::eIdxSft4bits
@ eIdxSft4bits
Definition: nsPkgInt.h:31
kencodingprober::UCS2LECharLenTable
static const unsigned int UCS2LECharLenTable[]
Definition: nsMBCSSM.cpp:473
kencodingprober::SJISSMModel
KDE_NO_EXPORT SMModel SJISSMModel
Definition: nsMBCSSM.cpp:362
kencodingprober::GB18030SMModel
KDE_NO_EXPORT SMModel GB18030SMModel
Definition: nsMBCSSM.cpp:305
kencodingprober::eUnitMsk4bits
@ eUnitMsk4bits
Definition: nsPkgInt.h:49
kencodingprober::EUCKR_st
static unsigned int EUCKR_st[2]
Definition: nsMBCSSM.cpp:182
kencodingprober::UCS2LE_cls
static unsigned int UCS2LE_cls[256/8]
Definition: nsMBCSSM.cpp:427
kencodingprober::UCS2BECharLenTable
static const unsigned int UCS2BECharLenTable[]
Definition: nsMBCSSM.cpp:417
kencodingprober::SJIS_st
static unsigned int SJIS_st[3]
Definition: nsMBCSSM.cpp:354
kencodingprober::UTF8SMModel
KDE_NO_EXPORT SMModel UTF8SMModel
Definition: nsMBCSSM.cpp:553
kencodingprober::GB18030_cls
static unsigned int GB18030_cls[256/8]
Definition: nsMBCSSM.cpp:253
kencodingprober::GB18030CharLenTable
static const unsigned int GB18030CharLenTable[]
Definition: nsMBCSSM.cpp:303
kencodingprober::UCS2LE_st
static unsigned int UCS2LE_st[7]
Definition: nsMBCSSM.cpp:463
kencodingprober::UCS2BE_cls
static unsigned int UCS2BE_cls[256/8]
Definition: nsMBCSSM.cpp:371
kencodingprober::SJISCharLenTable
static const unsigned int SJISCharLenTable[]
Definition: nsMBCSSM.cpp:360
kencodingprober::EUCKRSMModel
KDE_NO_EXPORT SMModel EUCKRSMModel
Definition: nsMBCSSM.cpp:189
kencodingprober::EUCJP_cls
static unsigned int EUCJP_cls[256/8]
Definition: nsMBCSSM.cpp:90
kencodingprober::EUCKR_cls
static unsigned int EUCKR_cls[256/8]
Definition: nsMBCSSM.cpp:145
kencodingprober::eSftMsk4bits
@ eSftMsk4bits
Definition: nsPkgInt.h:37
kencodingprober::UTF8_st
static unsigned int UTF8_st[26]
Definition: nsMBCSSM.cpp:521
kencodingprober::UCS2BESMModel
KDE_NO_EXPORT SMModel UCS2BESMModel
Definition: nsMBCSSM.cpp:419
kencodingprober::UCS2BE_st
static unsigned int UCS2BE_st[7]
Definition: nsMBCSSM.cpp:407
kencodingprober::GB18030_st
static unsigned int GB18030_st[6]
Definition: nsMBCSSM.cpp:289
kencodingprober::EUCJP_st
static unsigned int EUCJP_st[5]
Definition: nsMBCSSM.cpp:127
kencodingprober::Big5CharLenTable
static const unsigned int Big5CharLenTable[]
Definition: nsMBCSSM.cpp:80
kencodingprober::EUCJPCharLenTable
static const unsigned int EUCJPCharLenTable[]
Definition: nsMBCSSM.cpp:135
kencodingprober::UTF8CharLenTable
static const unsigned int UTF8CharLenTable[]
Definition: nsMBCSSM.cpp:550
kencodingprober::eItsMe
@ eItsMe
Definition: nsCodingStateMachine.h:38
kencodingprober::eError
@ eError
Definition: nsCodingStateMachine.h:37
kencodingprober::eStart
@ eStart
Definition: nsCodingStateMachine.h:36
kencodingprober::SJIS_cls
static unsigned int SJIS_cls[256/8]
Definition: nsMBCSSM.cpp:315
kencodingprober::Big5SMModel
KDE_NO_EXPORT SMModel Big5SMModel
Definition: nsMBCSSM.cpp:82
nsCodingStateMachine.h
PCK4BITS
#define PCK4BITS(a, b, c, d, e, f, g, h)
Definition: nsPkgInt.h:68
kencodingprober::SMModel
Definition: nsCodingStateMachine.h:45
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal