• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • kdecore
  • localization
  • probers
nsLatin1Prober.cpp
Go to the documentation of this file.
1/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* -*- C++ -*-
3* Copyright (C) 1998 <developer@mozilla.org>
4*
5*
6* Permission is hereby granted, free of charge, to any person obtaining
7* a copy of this software and associated documentation files (the
8* "Software"), to deal in the Software without restriction, including
9* without limitation the rights to use, copy, modify, merge, publish,
10* distribute, sublicense, and/or sell copies of the Software, and to
11* permit persons to whom the Software is furnished to do so, subject to
12* the following conditions:
13*
14* The above copyright notice and this permission notice shall be included
15* in all copies or substantial portions of the Software.
16*
17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24*/
25
26#include "nsLatin1Prober.h"
27#include <stdio.h>
28#include <stdlib.h>
29
30#define UDF 0 // undefined
31#define OTH 1 //other
32#define ASC 2 // ascii capital letter
33#define ASS 3 // ascii small letter
34#define ACV 4 // accent capital vowel
35#define ACO 5 // accent capital other
36#define ASV 6 // accent small vowel
37#define ASO 7 // accent small other
38#define CLASS_NUM 8 // total classes
39
40namespace kencodingprober {
41static unsigned char Latin1_CharToClass[] =
42{
43 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
44 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
45 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17
46 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F
47 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27
48 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F
49 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37
50 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F
51 OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47
52 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F
53 ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57
54 ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F
55 OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67
56 ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F
57 ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77
58 ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F
59 OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87
60 OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F
61 UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97
62 OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F
63 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7
64 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF
65 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7
66 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF
67 ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7
68 ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF
69 ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7
70 ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF
71 ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7
72 ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF
73 ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7
74 ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
75};
76
77
78/* 0 : illegal
79 1 : very unlikely
80 2 : normal
81 3 : very likely
82*/
83static unsigned char Latin1ClassModel[] =
84{
85/* UDF OTH ASC ASS ACV ACO ASV ASO */
86/*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
87/*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
88/*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
89/*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
90/*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
91/*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
92/*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
93/*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
94};
95
96void nsLatin1Prober::Reset(void)
97{
98 mState = eDetecting;
99 mLastCharClass = OTH;
100 for (int i = 0; i < FREQ_CAT_NUM; i++)
101 mFreqCounter[i] = 0;
102}
103
104
105nsProbingState nsLatin1Prober::HandleData(const char* aBuf, unsigned int aLen)
106{
107 char *newBuf1 = 0;
108 unsigned int newLen1 = 0;
109
110 if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
111 newBuf1 = (char*)aBuf;
112 newLen1 = aLen;
113 }
114
115 unsigned char charClass;
116 unsigned char freq;
117 for (unsigned int i = 0; i < newLen1; i++)
118 {
119 charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
120 freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass];
121 if (freq == 0) {
122 mState = eNotMe;
123 break;
124 }
125 mFreqCounter[freq]++;
126 mLastCharClass = charClass;
127 }
128
129 if (newBuf1 != aBuf)
130 free(newBuf1);
131
132 return mState;
133}
134
135float nsLatin1Prober::GetConfidence(void)
136{
137 if (mState == eNotMe)
138 return 0.01f;
139
140 float confidence;
141 unsigned int total = 0;
142 for (int i = 0; i < FREQ_CAT_NUM; i++)
143 total += mFreqCounter[i];
144
145 if(!total)
146 confidence = 0.0f;
147 else
148 {
149 confidence = mFreqCounter[3]*1.0f / total;
150 confidence -= mFreqCounter[1]*20.0f/total;
151 }
152
153 if (confidence < 0.0f)
154 confidence = 0.0f;
155
156 // lower the confidence of latin1 so that other more accurate detector
157 // can take priority.
158 confidence *= 0.50f;
159
160 return confidence;
161}
162
163#ifdef DEBUG_PROBE
164void nsLatin1Prober::DumpStatus()
165{
166 printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
167}
168#endif
169}
170
171
kencodingprober::nsCharSetProber::FilterWithEnglishLetters
static bool FilterWithEnglishLetters(const char *aBuf, unsigned int aLen, char **newBuf, unsigned int &newLen)
Definition: nsCharSetProber.cpp:72
kencodingprober::nsLatin1Prober::HandleData
nsProbingState HandleData(const char *aBuf, unsigned int aLen)
Definition: nsLatin1Prober.cpp:105
kencodingprober::nsLatin1Prober::mFreqCounter
unsigned int mFreqCounter[FREQ_CAT_NUM]
Definition: nsLatin1Prober.h:52
kencodingprober::nsLatin1Prober::Reset
void Reset(void)
Definition: nsLatin1Prober.cpp:96
kencodingprober::nsLatin1Prober::GetConfidence
float GetConfidence(void)
Definition: nsLatin1Prober.cpp:135
kencodingprober::nsLatin1Prober::mState
nsProbingState mState
Definition: nsLatin1Prober.h:50
kencodingprober::nsLatin1Prober::GetCharSetName
const char * GetCharSetName()
Definition: nsLatin1Prober.h:38
kencodingprober::nsLatin1Prober::mLastCharClass
char mLastCharClass
Definition: nsLatin1Prober.h:51
kencodingprober
Definition: CharDistribution.cpp:37
kencodingprober::nsProbingState
nsProbingState
Definition: nsCharSetProber.h:34
kencodingprober::eNotMe
@ eNotMe
Definition: nsCharSetProber.h:37
kencodingprober::eDetecting
@ eDetecting
Definition: nsCharSetProber.h:35
kencodingprober::Latin1ClassModel
static unsigned char Latin1ClassModel[]
Definition: nsLatin1Prober.cpp:83
kencodingprober::Latin1_CharToClass
static unsigned char Latin1_CharToClass[]
Definition: nsLatin1Prober.cpp:41
ACO
#define ACO
Definition: nsLatin1Prober.cpp:35
ASV
#define ASV
Definition: nsLatin1Prober.cpp:36
UDF
#define UDF
Definition: nsLatin1Prober.cpp:30
ASS
#define ASS
Definition: nsLatin1Prober.cpp:33
ASC
#define ASC
Definition: nsLatin1Prober.cpp:32
OTH
#define OTH
Definition: nsLatin1Prober.cpp:31
ASO
#define ASO
Definition: nsLatin1Prober.cpp:37
ACV
#define ACV
Definition: nsLatin1Prober.cpp:34
CLASS_NUM
#define CLASS_NUM
Definition: nsLatin1Prober.cpp:38
nsLatin1Prober.h
FREQ_CAT_NUM
#define FREQ_CAT_NUM
Definition: nsLatin1Prober.h:31
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal