• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • kdecore
  • localization
  • probers
ChineseGroupProber.cpp
Go to the documentation of this file.
1/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* -*- C++ -*-
3* Copyright (C) 1998 <developer@mozilla.org>
4*
5*
6* Permission is hereby granted, free of charge, to any person obtaining
7* a copy of this software and associated documentation files (the
8* "Software"), to deal in the Software without restriction, including
9* without limitation the rights to use, copy, modify, merge, publish,
10* distribute, sublicense, and/or sell copies of the Software, and to
11* permit persons to whom the Software is furnished to do so, subject to
12* the following conditions:
13*
14* The above copyright notice and this permission notice shall be included
15* in all copies or substantial portions of the Software.
16*
17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24*/
25
26#include "ChineseGroupProber.h"
27
28#include "UnicodeGroupProber.h"
29#include "nsGB2312Prober.h"
30#include "nsBig5Prober.h"
31
32#include <stdio.h>
33#include <stdlib.h>
34
35namespace kencodingprober {
36#ifdef DEBUG_PROBE
37static const char* const ProberName[] =
38{
39 "Unicode",
40 "GB18030",
41 "Big5",
42};
43
44#endif
45
46ChineseGroupProber::ChineseGroupProber()
47{
48 mProbers[0] = new UnicodeGroupProber();
49 mProbers[1] = new nsGB18030Prober();
50 mProbers[2] = new nsBig5Prober();
51 Reset();
52}
53
54ChineseGroupProber::~ChineseGroupProber()
55{
56 for (unsigned int i = 0; i < CN_NUM_OF_PROBERS; i++)
57 {
58 delete mProbers[i];
59 }
60}
61
62const char* ChineseGroupProber::GetCharSetName()
63{
64 if (mBestGuess == -1)
65 {
66 GetConfidence();
67 if (mBestGuess == -1)
68 mBestGuess = 1; // assume it's GB18030
69 }
70 return mProbers[mBestGuess]->GetCharSetName();
71}
72
73void ChineseGroupProber::Reset(void)
74{
75 mActiveNum = 0;
76 for (unsigned int i = 0; i < CN_NUM_OF_PROBERS; i++)
77 {
78 if (mProbers[i])
79 {
80 mProbers[i]->Reset();
81 mIsActive[i] = true;
82 ++mActiveNum;
83 }
84 else
85 mIsActive[i] = false;
86 }
87 mBestGuess = -1;
88 mState = eDetecting;
89}
90
91nsProbingState ChineseGroupProber::HandleData(const char* aBuf, unsigned int aLen)
92{
93 nsProbingState st;
94 unsigned int i;
95
96 //do filtering to reduce load to probers
97 char *highbyteBuf;
98 char *hptr;
99 bool keepNext = true; //assume previous is not ascii, it will do no harm except add some noise
100 hptr = highbyteBuf = (char*)malloc(aLen);
101 if (!hptr)
102 return mState;
103 for (i = 0; i < aLen; ++i)
104 {
105 if (aBuf[i] & 0x80)
106 {
107 *hptr++ = aBuf[i];
108 keepNext = true;
109 }
110 else
111 {
112 //if previous is highbyte, keep this even it is a ASCII
113 if (keepNext)
114 {
115 *hptr++ = aBuf[i];
116 keepNext = false;
117 }
118 }
119 }
120
121 for (i = 0; i < CN_NUM_OF_PROBERS; ++i)
122 {
123 if (!mIsActive[i])
124 continue;
125 st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
126 if (st == eFoundIt)
127 {
128 mBestGuess = i;
129 mState = eFoundIt;
130 break;
131 }
132 else if (st == eNotMe)
133 {
134 mIsActive[i] = false;
135 mActiveNum--;
136 if (mActiveNum <= 0)
137 {
138 mState = eNotMe;
139 break;
140 }
141 }
142 }
143
144 free(highbyteBuf);
145
146 return mState;
147}
148
149float ChineseGroupProber::GetConfidence(void)
150{
151 unsigned int i;
152 float bestConf = 0.0, cf;
153
154 switch (mState)
155 {
156 case eFoundIt:
157 return (float)0.99;
158 case eNotMe:
159 return (float)0.01;
160 default:
161 for (i = 0; i < CN_NUM_OF_PROBERS; ++i)
162 {
163 if (!mIsActive[i])
164 continue;
165 cf = mProbers[i]->GetConfidence();
166 if (bestConf < cf)
167 {
168 bestConf = cf;
169 mBestGuess = i;
170 }
171 }
172 }
173 return bestConf;
174}
175
176#ifdef DEBUG_PROBE
177void ChineseGroupProber::DumpStatus()
178{
179 unsigned int i;
180 float cf;
181
182 GetConfidence();
183 for (i = 0; i < CN_NUM_OF_PROBERS; i++)
184 {
185 if (!mIsActive[i])
186 printf(" Chinese group inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
187 else
188 {
189 cf = mProbers[i]->GetConfidence();
190 printf(" Chinese group %1.3f: [%s]\r\n", cf, ProberName[i]);
191 }
192 }
193}
194#endif
195}
ChineseGroupProber.h
CN_NUM_OF_PROBERS
#define CN_NUM_OF_PROBERS
Definition: ChineseGroupProber.h:32
UnicodeGroupProber.h
kencodingprober::ChineseGroupProber::mActiveNum
unsigned int mActiveNum
Definition: ChineseGroupProber.h:54
kencodingprober::ChineseGroupProber::mState
nsProbingState mState
Definition: ChineseGroupProber.h:50
kencodingprober::ChineseGroupProber::GetConfidence
float GetConfidence(void)
Definition: ChineseGroupProber.cpp:149
kencodingprober::ChineseGroupProber::~ChineseGroupProber
virtual ~ChineseGroupProber()
Definition: ChineseGroupProber.cpp:54
kencodingprober::ChineseGroupProber::mProbers
nsCharSetProber * mProbers[CN_NUM_OF_PROBERS]
Definition: ChineseGroupProber.h:51
kencodingprober::ChineseGroupProber::mBestGuess
int mBestGuess
Definition: ChineseGroupProber.h:53
kencodingprober::ChineseGroupProber::HandleData
nsProbingState HandleData(const char *aBuf, unsigned int aLen)
Definition: ChineseGroupProber.cpp:91
kencodingprober::ChineseGroupProber::ChineseGroupProber
ChineseGroupProber()
Definition: ChineseGroupProber.cpp:46
kencodingprober::ChineseGroupProber::mIsActive
bool mIsActive[CN_NUM_OF_PROBERS]
Definition: ChineseGroupProber.h:52
kencodingprober::ChineseGroupProber::GetCharSetName
const char * GetCharSetName()
Definition: ChineseGroupProber.cpp:62
kencodingprober::ChineseGroupProber::Reset
void Reset(void)
Definition: ChineseGroupProber.cpp:73
kencodingprober::UnicodeGroupProber
Definition: UnicodeGroupProber.h:34
kencodingprober::nsBig5Prober
Definition: nsBig5Prober.h:33
kencodingprober::nsCharSetProber::Reset
virtual void Reset(void)=0
kencodingprober::nsCharSetProber::HandleData
virtual nsProbingState HandleData(const char *aBuf, unsigned int aLen)=0
kencodingprober::nsCharSetProber::GetConfidence
virtual float GetConfidence(void)=0
kencodingprober::nsCharSetProber::GetCharSetName
virtual const char * GetCharSetName()=0
kencodingprober::nsGB18030Prober
Definition: nsGB2312Prober.h:35
kencodingprober
Definition: CharDistribution.cpp:37
kencodingprober::nsProbingState
nsProbingState
Definition: nsCharSetProber.h:34
kencodingprober::eNotMe
@ eNotMe
Definition: nsCharSetProber.h:37
kencodingprober::eFoundIt
@ eFoundIt
Definition: nsCharSetProber.h:36
kencodingprober::eDetecting
@ eDetecting
Definition: nsCharSetProber.h:35
nsBig5Prober.h
nsGB2312Prober.h
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal