27#include "xml/dom_stringimpl.h"
28#include "xml/dom3_xpathimpl.h"
69 "processing-instruction",
83Tokenizer::XMLCat Tokenizer::charCat(QChar aChar)
87 if (aChar.unicode() ==
'_')
90 if (aChar.unicode() ==
'.' || aChar.unicode() ==
'-')
93 switch (aChar.category()) {
94 case QChar::Letter_Lowercase:
95 case QChar::Letter_Uppercase:
96 case QChar::Letter_Other:
97 case QChar::Letter_Titlecase:
98 case QChar::Number_Letter:
101 case QChar::Mark_SpacingCombining:
102 case QChar::Mark_Enclosing:
103 case QChar::Mark_NonSpacing:
104 case QChar::Letter_Modifier:
105 case QChar::Number_DecimalDigit:
109 return NotPartOfName;
115 if (!s_axisNamesDict) {
118 s_axisNamesDict->insert(QLatin1String(
axisNames[p].name),
123 if ( it != s_axisNamesDict->constEnd() ) {
126 return it != s_axisNamesDict->constEnd();
129bool Tokenizer::isNodeTypeName(QString name)
131 if (!s_nodeTypeNamesDict) {
134 s_nodeTypeNamesDict->insert(QLatin1String(
nodeTypeNames[p]));
136 return s_nodeTypeNamesDict->contains(name);
143bool Tokenizer::isOperatorContext()
145 if ( m_nextPos == 0 ) {
149 switch ( m_lastTokenType ) {
153 case '@':
case AXISNAME:
case '(':
case '[':
160void Tokenizer::skipWS()
162 while (m_nextPos < m_data.length() && m_data[m_nextPos].isSpace())
166Token Tokenizer::makeTokenAndAdvance(
int code,
int advance)
168 m_nextPos += advance;
172Token Tokenizer::makeIntTokenAndAdvance(
int code,
int val,
int advance)
174 m_nextPos += advance;
175 return Token(code, val);
179char Tokenizer::peekAheadHelper()
181 if (m_nextPos + 1 >= m_data.length())
183 QChar
next = m_data[m_nextPos + 1];
190char Tokenizer::peekCurHelper()
192 if (m_nextPos >= m_data.length())
194 QChar
next = m_data[m_nextPos];
201Token Tokenizer::lexString()
203 QChar delimiter = m_data[m_nextPos];
204 int startPos = m_nextPos + 1;
206 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
207 if (m_data[m_nextPos] == delimiter) {
208 QString value = m_data.mid(startPos, m_nextPos - startPos);
218Token Tokenizer::lexNumber()
220 int startPos = m_nextPos;
221 bool seenDot =
false;
224 for (; m_nextPos < m_data.length(); ++m_nextPos) {
225 QChar aChar = m_data[m_nextPos];
226 if (aChar.row() != 0)
break;
228 if (aChar.cell() <
'0' || aChar.cell() >
'9') {
229 if (aChar.cell() ==
'.' && !seenDot)
236 QString value = m_data.mid(startPos, m_nextPos - startPos);
240Token Tokenizer::lexNCName()
242 int startPos = m_nextPos;
243 if (m_nextPos < m_data.length() && charCat(m_data[m_nextPos]) == NameStart)
246 for (; m_nextPos < m_data.length(); ++m_nextPos) {
247 if (charCat(m_data[m_nextPos]) == NotPartOfName)
251 QString value = m_data.mid(startPos, m_nextPos - startPos);
255 return makeTokenAndAdvance(
ERROR);
258Token Tokenizer::lexQName()
260 Token t1 = lexNCName();
265 if (peekAheadHelper() !=
':')
268 Token t2 = lexNCName();
274Token Tokenizer::nextTokenInternal()
278 if (m_nextPos >= m_data.length()) {
282 char code = peekCurHelper();
284 case '(':
case ')':
case '[':
case ']':
285 case '@':
case ',':
case '|':
286 return makeTokenAndAdvance(code);
290 case '0':
case '1':
case '2':
case '3':
case '4':
291 case '5':
case '6':
case '7':
case '8':
case '9':
294 char next = peekAheadHelper();
296 return makeTokenAndAdvance(
DOTDOT, 2);
297 else if (next >=
'0' && next <=
'9')
300 return makeTokenAndAdvance(
'.');
303 if (peekAheadHelper() ==
'/')
306 return makeTokenAndAdvance(
'/');
308 return makeTokenAndAdvance(
PLUS);
310 return makeTokenAndAdvance(
MINUS);
314 if (peekAheadHelper() ==
'=')
320 if (peekAheadHelper() ==
'=')
325 if (peekAheadHelper() ==
'=')
330 if (isOperatorContext())
338 Token par = lexQName();
346 Token t1 = lexNCName();
352 if (isOperatorContext()) {
353 if (t1.
value == QLatin1String(
"and"))
355 if (t1.
value == QLatin1String(
"or"))
357 if (t1.
value == QLatin1String(
"mod"))
359 if (t1.
value == QLatin1String(
"div"))
364 if (peekCurHelper() ==
':') {
367 if (peekCurHelper() ==
':') {
372 if (isAxisName(t1.
value, &axisType))
380 if (peekCurHelper() ==
'*') {
386 Token t2 = lexNCName();
393 if (peekCurHelper() ==
'(') {
397 if (isNodeTypeName(t1.
value)) {
398 if (t1.
value ==
"processing-instruction")
413 Token toRet = nextTokenInternal();
414 m_lastTokenType = toRet.
type;
418Tokenizer::Tokenizer()
423Tokenizer::~Tokenizer()
425 delete s_axisNamesDict;
426 delete s_nodeTypeNamesDict;
This class implements the basic string we use in the DOM.
static Tokenizer & self()
This library provides a full-featured HTML parser and widget.
KAction * next(const QObject *recvr, const char *slot, QObject *parent)
const char * name(StandardAction id)
static unsigned int axisNamesCount
static AxisNameMapping axisNames[]
void initTokenizer(const DOM::DOMString &string)
static const char *const nodeTypeNames[]