001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.vfs2.provider;
018
019import org.apache.commons.vfs2.FileName;
020import org.apache.commons.vfs2.FileSystemException;
021import org.apache.commons.vfs2.FileType;
022import org.apache.commons.vfs2.VFS;
023import org.apache.commons.vfs2.util.Os;
024
025/**
026 * Utilities for dealing with URIs. See RFC 2396 for details.
027 *
028 * 2005) $
029 */
030public final class UriParser {
031    /**
032     * The set of valid separators. These are all converted to the normalized one. Does <i>not</i> contain the
033     * normalized separator
034     */
035    // public static final char[] separators = {'\\'};
036    public static final char TRANS_SEPARATOR = '\\';
037
038    /**
039     * The normalised separator to use.
040     */
041    private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
042
043    private static final int HEX_BASE = 16;
044
045    private static final int BITS_IN_HALF_BYTE = 4;
046
047    private static final char LOW_MASK = 0x0F;
048
049    private UriParser() {
050    }
051
052    /**
053     * Extracts the first element of a path.
054     *
055     * @param name StringBuilder containing the path.
056     * @return The first element of the path.
057     */
058    public static String extractFirstElement(final StringBuilder name) {
059        final int len = name.length();
060        if (len < 1) {
061            return null;
062        }
063        int startPos = 0;
064        if (name.charAt(0) == SEPARATOR_CHAR) {
065            startPos = 1;
066        }
067        for (int pos = startPos; pos < len; pos++) {
068            if (name.charAt(pos) == SEPARATOR_CHAR) {
069                // Found a separator
070                final String elem = name.substring(startPos, pos);
071                name.delete(startPos, pos + 1);
072                return elem;
073            }
074        }
075
076        // No separator
077        final String elem = name.substring(startPos);
078        name.setLength(0);
079        return elem;
080    }
081
082    /**
083     * Normalises a path. Does the following:
084     * <ul>
085     * <li>Removes empty path elements.
086     * <li>Handles '.' and '..' elements.
087     * <li>Removes trailing separator.
088     * </ul>
089     *
090     * Its assumed that the separators are already fixed.
091     *
092     * @param path The path to normalize.
093     * @return The FileType.
094     * @throws FileSystemException if an error occurs.
095     *
096     * @see #fixSeparators
097     */
098    public static FileType normalisePath(final StringBuilder path) throws FileSystemException {
099        FileType fileType = FileType.FOLDER;
100        if (path.length() == 0) {
101            return fileType;
102        }
103
104        if (path.charAt(path.length() - 1) != '/') {
105            fileType = FileType.FILE;
106        }
107
108        // Adjust separators
109        // fixSeparators(path);
110
111        // Determine the start of the first element
112        int startFirstElem = 0;
113        if (path.charAt(0) == SEPARATOR_CHAR) {
114            if (path.length() == 1) {
115                return fileType;
116            }
117            startFirstElem = 1;
118        }
119
120        // Iterate over each element
121        int startElem = startFirstElem;
122        int maxlen = path.length();
123        while (startElem < maxlen) {
124            // Find the end of the element
125            int endElem = startElem;
126            for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++) {
127            }
128
129            final int elemLen = endElem - startElem;
130            if (elemLen == 0) {
131                // An empty element - axe it
132                path.delete(endElem, endElem + 1);
133                maxlen = path.length();
134                continue;
135            }
136            if (elemLen == 1 && path.charAt(startElem) == '.') {
137                // A '.' element - axe it
138                path.delete(startElem, endElem + 1);
139                maxlen = path.length();
140                continue;
141            }
142            if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') {
143                // A '..' element - remove the previous element
144                if (startElem == startFirstElem) {
145                    // Previous element is missing
146                    throw new FileSystemException("vfs.provider/invalid-relative-path.error");
147                }
148
149                // Find start of previous element
150                int pos = startElem - 2;
151                for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--) {
152                }
153                startElem = pos + 1;
154
155                path.delete(startElem, endElem + 1);
156                maxlen = path.length();
157                continue;
158            }
159
160            // A regular element
161            startElem = endElem + 1;
162        }
163
164        // Remove trailing separator
165        if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) {
166            path.delete(maxlen - 1, maxlen);
167        }
168
169        return fileType;
170    }
171
172    /**
173     * Normalises the separators in a name.
174     *
175     * @param name The StringBuilder containing the name
176     * @return true if the StringBuilder was modified.
177     */
178    public static boolean fixSeparators(final StringBuilder name) {
179        boolean changed = false;
180        final int maxlen = name.length();
181        for (int i = 0; i < maxlen; i++) {
182            final char ch = name.charAt(i);
183            if (ch == TRANS_SEPARATOR) {
184                name.setCharAt(i, SEPARATOR_CHAR);
185                changed = true;
186            }
187        }
188        return changed;
189    }
190
191    /**
192     * Extracts the scheme from a URI.
193     *
194     * @param uri The URI.
195     * @return The scheme name. Returns null if there is no scheme.
196     */
197    public static String extractScheme(final String uri) {
198        return extractScheme(uri, null);
199    }
200
201    /**
202     * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
203     *
204     * @param uri The URI.
205     * @param buffer Returns the remainder of the URI.
206     * @return The scheme name. Returns null if there is no scheme.
207     */
208    public static String extractScheme(final String uri, final StringBuilder buffer) {
209        if (buffer != null) {
210            buffer.setLength(0);
211            buffer.append(uri);
212        }
213
214        final int maxPos = uri.length();
215        for (int pos = 0; pos < maxPos; pos++) {
216            final char ch = uri.charAt(pos);
217
218            if (ch == ':') {
219                // Found the end of the scheme
220                final String scheme = uri.substring(0, pos);
221                if (scheme.length() <= 1 && Os.isFamily(Os.OS_FAMILY_WINDOWS)) {
222                    // This is not a scheme, but a Windows drive letter
223                    return null;
224                }
225                if (buffer != null) {
226                    buffer.delete(0, pos + 1);
227                }
228                return scheme.intern();
229            }
230
231            if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
232                // A scheme character
233                continue;
234            }
235            if (pos > 0 && ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.')) {
236                // A scheme character (these are not allowed as the first
237                // character of the scheme, but can be used as subsequent
238                // characters.
239                continue;
240            }
241
242            // Not a scheme character
243            break;
244        }
245
246        // No scheme in URI
247        return null;
248    }
249
250    /**
251     * Removes %nn encodings from a string.
252     *
253     * @param encodedStr The encoded String.
254     * @return The decoded String.
255     * @throws FileSystemException if an error occurs.
256     */
257    public static String decode(final String encodedStr) throws FileSystemException {
258        if (encodedStr == null) {
259            return null;
260        }
261        if (encodedStr.indexOf('%') < 0) {
262            return encodedStr;
263        }
264        final StringBuilder buffer = new StringBuilder(encodedStr);
265        decode(buffer, 0, buffer.length());
266        return buffer.toString();
267    }
268
269    /**
270     * Removes %nn encodings from a string.
271     *
272     * @param buffer StringBuilder containing the string to decode.
273     * @param offset The position in the string to start decoding.
274     * @param length The number of characters to decode.
275     * @throws FileSystemException if an error occurs.
276     */
277    public static void decode(final StringBuilder buffer, final int offset, final int length)
278            throws FileSystemException {
279        int index = offset;
280        int count = length;
281        for (; count > 0; count--, index++) {
282            final char ch = buffer.charAt(index);
283            if (ch != '%') {
284                continue;
285            }
286            if (count < 3) {
287                throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
288                        buffer.substring(index, index + count));
289            }
290
291            // Decode
292            final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
293            final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
294            if (dig1 == -1 || dig2 == -1) {
295                throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
296                        buffer.substring(index, index + 3));
297            }
298            final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
299
300            // Replace
301            buffer.setCharAt(index, value);
302            buffer.delete(index + 1, index + 3);
303            count -= 2;
304        }
305    }
306
307    /**
308     * Encodes and appends a string to a StringBuilder.
309     *
310     * @param buffer The StringBuilder to append to.
311     * @param unencodedValue The String to encode and append.
312     * @param reserved characters to encode.
313     */
314    public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) {
315        final int offset = buffer.length();
316        buffer.append(unencodedValue);
317        encode(buffer, offset, unencodedValue.length(), reserved);
318    }
319
320    /**
321     * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters.
322     *
323     * @param buffer The StringBuilder to append to.
324     * @param offset The position in the buffer to start encoding at.
325     * @param length The number of characters to encode.
326     * @param reserved characters to encode.
327     */
328    public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) {
329        int index = offset;
330        int count = length;
331        for (; count > 0; index++, count--) {
332            final char ch = buffer.charAt(index);
333            boolean match = ch == '%';
334            if (reserved != null) {
335                for (int i = 0; !match && i < reserved.length; i++) {
336                    if (ch == reserved[i]) {
337                        match = true;
338                    }
339                }
340            }
341            if (match) {
342                // Encode
343                final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE),
344                        Character.forDigit(ch & LOW_MASK, HEX_BASE) };
345                buffer.setCharAt(index, '%');
346                buffer.insert(index + 1, digits);
347                index += 2;
348            }
349        }
350    }
351
352    /**
353     * Removes %nn encodings from a string.
354     *
355     * @param decodedStr The decoded String.
356     * @return The encoded String.
357     */
358    public static String encode(final String decodedStr) {
359        return encode(decodedStr, null);
360    }
361
362    /**
363     * Converts "special" characters to their %nn value.
364     *
365     * @param decodedStr The decoded String.
366     * @param reserved Characters to encode.
367     * @return The encoded String
368     */
369    public static String encode(final String decodedStr, final char[] reserved) {
370        if (decodedStr == null) {
371            return null;
372        }
373        final StringBuilder buffer = new StringBuilder(decodedStr);
374        encode(buffer, 0, buffer.length(), reserved);
375        return buffer.toString();
376    }
377
378    /**
379     * Encode an array of Strings.
380     *
381     * @param strings The array of Strings to encode.
382     * @return An array of encoded Strings.
383     */
384    public static String[] encode(final String[] strings) {
385        if (strings == null) {
386            return null;
387        }
388        for (int i = 0; i < strings.length; i++) {
389            strings[i] = encode(strings[i]);
390        }
391        return strings;
392    }
393
394    /**
395     * Decodes the String.
396     *
397     * @param uri The String to decode.
398     * @throws FileSystemException if an error occurs.
399     */
400    public static void checkUriEncoding(final String uri) throws FileSystemException {
401        decode(uri);
402    }
403
404    public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length,
405            final FileNameParser fileNameParser) throws FileSystemException {
406        int index = offset;
407        int count = length;
408        for (; count > 0; count--, index++) {
409            final char ch = buffer.charAt(index);
410            if (ch == '%') {
411                if (count < 3) {
412                    throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
413                            buffer.substring(index, index + count));
414                }
415
416                // Decode
417                final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
418                final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
419                if (dig1 == -1 || dig2 == -1) {
420                    throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
421                            buffer.substring(index, index + 3));
422                }
423                final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
424
425                final boolean match = value == '%' || fileNameParser.encodeCharacter(value);
426
427                if (match) {
428                    // this is a reserved character, not allowed to decode
429                    index += 2;
430                    count -= 2;
431                    continue;
432                }
433
434                // Replace
435                buffer.setCharAt(index, value);
436                buffer.delete(index + 1, index + 3);
437                count -= 2;
438            } else if (fileNameParser.encodeCharacter(ch)) {
439                // Encode
440                final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE),
441                        Character.forDigit(ch & LOW_MASK, HEX_BASE) };
442                buffer.setCharAt(index, '%');
443                buffer.insert(index + 1, digits);
444                index += 2;
445            }
446        }
447    }
448
449    /**
450     * Extract the query String from the URI.
451     *
452     * @param name StringBuilder containing the URI.
453     * @return The query string, if any. null otherwise.
454     */
455    public static String extractQueryString(final StringBuilder name) {
456        for (int pos = 0; pos < name.length(); pos++) {
457            if (name.charAt(pos) == '?') {
458                final String queryString = name.substring(pos + 1);
459                name.delete(pos, name.length());
460                return queryString;
461            }
462        }
463
464        return null;
465    }
466}