001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.List;
025import java.util.Stack;
026import java.util.regex.Matcher;
027import java.util.regex.Pattern;
028
029/**
030 * General filename and filepath manipulation utilities.
031 * <p>
032 * When dealing with filenames you can hit problems when moving from a Windows
033 * based development machine to a Unix based production machine.
034 * This class aims to help avoid those problems.
035 * <p>
036 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
037 * using JDK {@link java.io.File File} objects and the two argument constructor
038 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
039 * <p>
040 * Most methods on this class are designed to work the same on both Unix and Windows.
041 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
042 * <p>
043 * Most methods recognise both separators (forward and back), and both
044 * sets of prefixes. See the javadoc of each method for details.
045 * <p>
046 * This class defines six components within a filename
047 * (example C:\dev\project\file.txt):
048 * <ul>
049 * <li>the prefix - C:\</li>
050 * <li>the path - dev\project\</li>
051 * <li>the full path - C:\dev\project\</li>
052 * <li>the name - file.txt</li>
053 * <li>the base name - file</li>
054 * <li>the extension - txt</li>
055 * </ul>
056 * Note that this class works best if directory filenames end with a separator.
057 * If you omit the last separator, it is impossible to determine if the filename
058 * corresponds to a file or a directory. As a result, we have chosen to say
059 * it corresponds to a file.
060 * <p>
061 * This class only supports Unix and Windows style names.
062 * Prefixes are matched as follows:
063 * <pre>
064 * Windows:
065 * a\b\c.txt           --&gt; ""          --&gt; relative
066 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
067 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
068 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
069 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
070 *
071 * Unix:
072 * a/b/c.txt           --&gt; ""          --&gt; relative
073 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
074 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
075 * ~                   --&gt; "~/"        --&gt; current user (slash added)
076 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
077 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
078 * </pre>
079 * Both prefix styles are matched always, irrespective of the machine that you are
080 * currently running on.
081 * <p>
082 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
083 *
084 * @since 1.1
085 */
086public class FilenameUtils {
087
088    private static final int NOT_FOUND = -1;
089
090    /**
091     * The extension separator character.
092     * @since 1.4
093     */
094    public static final char EXTENSION_SEPARATOR = '.';
095
096    /**
097     * The extension separator String.
098     * @since 1.4
099     */
100    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
101
102    /**
103     * The Unix separator character.
104     */
105    private static final char UNIX_SEPARATOR = '/';
106
107    /**
108     * The Windows separator character.
109     */
110    private static final char WINDOWS_SEPARATOR = '\\';
111
112    /**
113     * The system separator character.
114     */
115    private static final char SYSTEM_SEPARATOR = File.separatorChar;
116
117    /**
118     * The separator character that is the opposite of the system separator.
119     */
120    private static final char OTHER_SEPARATOR;
121    static {
122        if (isSystemWindows()) {
123            OTHER_SEPARATOR = UNIX_SEPARATOR;
124        } else {
125            OTHER_SEPARATOR = WINDOWS_SEPARATOR;
126        }
127    }
128
129    /**
130     * Instances should NOT be constructed in standard programming.
131     */
132    public FilenameUtils() {
133        super();
134    }
135
136    //-----------------------------------------------------------------------
137    /**
138     * Determines if Windows file system is in use.
139     *
140     * @return true if the system is Windows
141     */
142    static boolean isSystemWindows() {
143        return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
144    }
145
146    //-----------------------------------------------------------------------
147    /**
148     * Checks if the character is a separator.
149     *
150     * @param ch  the character to check
151     * @return true if it is a separator character
152     */
153    private static boolean isSeparator(final char ch) {
154        return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR;
155    }
156
157    //-----------------------------------------------------------------------
158    /**
159     * Normalizes a path, removing double and single dot path steps.
160     * <p>
161     * This method normalizes a path to a standard format.
162     * The input may contain separators in either Unix or Windows format.
163     * The output will contain separators in the format of the system.
164     * <p>
165     * A trailing slash will be retained.
166     * A double slash will be merged to a single slash (but UNC names are handled).
167     * A single dot path segment will be removed.
168     * A double dot will cause that path segment and the one before to be removed.
169     * If the double dot has no parent path segment to work with, {@code null}
170     * is returned.
171     * <p>
172     * The output will be the same on both Unix and Windows except
173     * for the separator character.
174     * <pre>
175     * /foo//               --&gt;   /foo/
176     * /foo/./              --&gt;   /foo/
177     * /foo/../bar          --&gt;   /bar
178     * /foo/../bar/         --&gt;   /bar/
179     * /foo/../bar/../baz   --&gt;   /baz
180     * //foo//./bar         --&gt;   /foo/bar
181     * /../                 --&gt;   null
182     * ../foo               --&gt;   null
183     * foo/bar/..           --&gt;   foo/
184     * foo/../../bar        --&gt;   null
185     * foo/../bar           --&gt;   bar
186     * //server/foo/../bar  --&gt;   //server/bar
187     * //server/../bar      --&gt;   null
188     * C:\foo\..\bar        --&gt;   C:\bar
189     * C:\..\bar            --&gt;   null
190     * ~/foo/../bar/        --&gt;   ~/bar/
191     * ~/../bar             --&gt;   null
192     * </pre>
193     * (Note the file separator returned will be correct for Windows/Unix)
194     *
195     * @param filename  the filename to normalize, null returns null
196     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
197     */
198    public static String normalize(final String filename) {
199        return doNormalize(filename, SYSTEM_SEPARATOR, true);
200    }
201    /**
202     * Normalizes a path, removing double and single dot path steps.
203     * <p>
204     * This method normalizes a path to a standard format.
205     * The input may contain separators in either Unix or Windows format.
206     * The output will contain separators in the format specified.
207     * <p>
208     * A trailing slash will be retained.
209     * A double slash will be merged to a single slash (but UNC names are handled).
210     * A single dot path segment will be removed.
211     * A double dot will cause that path segment and the one before to be removed.
212     * If the double dot has no parent path segment to work with, {@code null}
213     * is returned.
214     * <p>
215     * The output will be the same on both Unix and Windows except
216     * for the separator character.
217     * <pre>
218     * /foo//               --&gt;   /foo/
219     * /foo/./              --&gt;   /foo/
220     * /foo/../bar          --&gt;   /bar
221     * /foo/../bar/         --&gt;   /bar/
222     * /foo/../bar/../baz   --&gt;   /baz
223     * //foo//./bar         --&gt;   /foo/bar
224     * /../                 --&gt;   null
225     * ../foo               --&gt;   null
226     * foo/bar/..           --&gt;   foo/
227     * foo/../../bar        --&gt;   null
228     * foo/../bar           --&gt;   bar
229     * //server/foo/../bar  --&gt;   //server/bar
230     * //server/../bar      --&gt;   null
231     * C:\foo\..\bar        --&gt;   C:\bar
232     * C:\..\bar            --&gt;   null
233     * ~/foo/../bar/        --&gt;   ~/bar/
234     * ~/../bar             --&gt;   null
235     * </pre>
236     * The output will be the same on both Unix and Windows including
237     * the separator character.
238     *
239     * @param filename  the filename to normalize, null returns null
240     * @param unixSeparator {@code true} if a unix separator should
241     * be used or {@code false} if a windows separator should be used.
242     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
243     * @since 2.0
244     */
245    public static String normalize(final String filename, final boolean unixSeparator) {
246        final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
247        return doNormalize(filename, separator, true);
248    }
249
250    //-----------------------------------------------------------------------
251    /**
252     * Normalizes a path, removing double and single dot path steps,
253     * and removing any final directory separator.
254     * <p>
255     * This method normalizes a path to a standard format.
256     * The input may contain separators in either Unix or Windows format.
257     * The output will contain separators in the format of the system.
258     * <p>
259     * A trailing slash will be removed.
260     * A double slash will be merged to a single slash (but UNC names are handled).
261     * A single dot path segment will be removed.
262     * A double dot will cause that path segment and the one before to be removed.
263     * If the double dot has no parent path segment to work with, {@code null}
264     * is returned.
265     * <p>
266     * The output will be the same on both Unix and Windows except
267     * for the separator character.
268     * <pre>
269     * /foo//               --&gt;   /foo
270     * /foo/./              --&gt;   /foo
271     * /foo/../bar          --&gt;   /bar
272     * /foo/../bar/         --&gt;   /bar
273     * /foo/../bar/../baz   --&gt;   /baz
274     * //foo//./bar         --&gt;   /foo/bar
275     * /../                 --&gt;   null
276     * ../foo               --&gt;   null
277     * foo/bar/..           --&gt;   foo
278     * foo/../../bar        --&gt;   null
279     * foo/../bar           --&gt;   bar
280     * //server/foo/../bar  --&gt;   //server/bar
281     * //server/../bar      --&gt;   null
282     * C:\foo\..\bar        --&gt;   C:\bar
283     * C:\..\bar            --&gt;   null
284     * ~/foo/../bar/        --&gt;   ~/bar
285     * ~/../bar             --&gt;   null
286     * </pre>
287     * (Note the file separator returned will be correct for Windows/Unix)
288     *
289     * @param filename  the filename to normalize, null returns null
290     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
291     */
292    public static String normalizeNoEndSeparator(final String filename) {
293        return doNormalize(filename, SYSTEM_SEPARATOR, false);
294    }
295
296    /**
297     * Normalizes a path, removing double and single dot path steps,
298     * and removing any final directory separator.
299     * <p>
300     * This method normalizes a path to a standard format.
301     * The input may contain separators in either Unix or Windows format.
302     * The output will contain separators in the format specified.
303     * <p>
304     * A trailing slash will be removed.
305     * A double slash will be merged to a single slash (but UNC names are handled).
306     * A single dot path segment will be removed.
307     * A double dot will cause that path segment and the one before to be removed.
308     * If the double dot has no parent path segment to work with, {@code null}
309     * is returned.
310     * <p>
311     * The output will be the same on both Unix and Windows including
312     * the separator character.
313     * <pre>
314     * /foo//               --&gt;   /foo
315     * /foo/./              --&gt;   /foo
316     * /foo/../bar          --&gt;   /bar
317     * /foo/../bar/         --&gt;   /bar
318     * /foo/../bar/../baz   --&gt;   /baz
319     * //foo//./bar         --&gt;   /foo/bar
320     * /../                 --&gt;   null
321     * ../foo               --&gt;   null
322     * foo/bar/..           --&gt;   foo
323     * foo/../../bar        --&gt;   null
324     * foo/../bar           --&gt;   bar
325     * //server/foo/../bar  --&gt;   //server/bar
326     * //server/../bar      --&gt;   null
327     * C:\foo\..\bar        --&gt;   C:\bar
328     * C:\..\bar            --&gt;   null
329     * ~/foo/../bar/        --&gt;   ~/bar
330     * ~/../bar             --&gt;   null
331     * </pre>
332     *
333     * @param filename  the filename to normalize, null returns null
334     * @param unixSeparator {@code true} if a unix separator should
335     * be used or {@code false} if a windows separator should be used.
336     * @return the normalized filename, or null if invalid. Null bytes inside string will be removed
337     * @since 2.0
338     */
339    public static String normalizeNoEndSeparator(final String filename, final boolean unixSeparator) {
340         final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR;
341        return doNormalize(filename, separator, false);
342    }
343
344    /**
345     * Internal method to perform the normalization.
346     *
347     * @param filename  the filename
348     * @param separator The separator character to use
349     * @param keepSeparator  true to keep the final separator
350     * @return the normalized filename. Null bytes inside string will be removed.
351     */
352    private static String doNormalize(final String filename, final char separator, final boolean keepSeparator) {
353        if (filename == null) {
354            return null;
355        }
356
357        failIfNullBytePresent(filename);
358
359        int size = filename.length();
360        if (size == 0) {
361            return filename;
362        }
363        final int prefix = getPrefixLength(filename);
364        if (prefix < 0) {
365            return null;
366        }
367
368        final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
369        filename.getChars(0, filename.length(), array, 0);
370
371        // fix separators throughout
372        final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR;
373        for (int i = 0; i < array.length; i++) {
374            if (array[i] == otherSeparator) {
375                array[i] = separator;
376            }
377        }
378
379        // add extra separator on the end to simplify code below
380        boolean lastIsDirectory = true;
381        if (array[size - 1] != separator) {
382            array[size++] = separator;
383            lastIsDirectory = false;
384        }
385
386        // adjoining slashes
387        for (int i = prefix + 1; i < size; i++) {
388            if (array[i] == separator && array[i - 1] == separator) {
389                System.arraycopy(array, i, array, i - 1, size - i);
390                size--;
391                i--;
392            }
393        }
394
395        // dot slash
396        for (int i = prefix + 1; i < size; i++) {
397            if (array[i] == separator && array[i - 1] == '.' &&
398                    (i == prefix + 1 || array[i - 2] == separator)) {
399                if (i == size - 1) {
400                    lastIsDirectory = true;
401                }
402                System.arraycopy(array, i + 1, array, i - 1, size - i);
403                size -=2;
404                i--;
405            }
406        }
407
408        // double dot slash
409        outer:
410        for (int i = prefix + 2; i < size; i++) {
411            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
412                    (i == prefix + 2 || array[i - 3] == separator)) {
413                if (i == prefix + 2) {
414                    return null;
415                }
416                if (i == size - 1) {
417                    lastIsDirectory = true;
418                }
419                int j;
420                for (j = i - 4 ; j >= prefix; j--) {
421                    if (array[j] == separator) {
422                        // remove b/../ from a/b/../c
423                        System.arraycopy(array, i + 1, array, j + 1, size - i);
424                        size -= i - j;
425                        i = j + 1;
426                        continue outer;
427                    }
428                }
429                // remove a/../ from a/../c
430                System.arraycopy(array, i + 1, array, prefix, size - i);
431                size -= i + 1 - prefix;
432                i = prefix + 1;
433            }
434        }
435
436        if (size <= 0) {  // should never be less than 0
437            return "";
438        }
439        if (size <= prefix) {  // should never be less than prefix
440            return new String(array, 0, size);
441        }
442        if (lastIsDirectory && keepSeparator) {
443            return new String(array, 0, size);  // keep trailing separator
444        }
445        return new String(array, 0, size - 1);  // lose trailing separator
446    }
447
448    //-----------------------------------------------------------------------
449    /**
450     * Concatenates a filename to a base path using normal command line style rules.
451     * <p>
452     * The effect is equivalent to resultant directory after changing
453     * directory to the first argument, followed by changing directory to
454     * the second argument.
455     * <p>
456     * The first argument is the base path, the second is the path to concatenate.
457     * The returned path is always normalized via {@link #normalize(String)},
458     * thus <code>..</code> is handled.
459     * <p>
460     * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
461     * it will be normalized and returned.
462     * Otherwise, the paths will be joined, normalized and returned.
463     * <p>
464     * The output will be the same on both Unix and Windows except
465     * for the separator character.
466     * <pre>
467     * /foo/ + bar          --&gt;   /foo/bar
468     * /foo + bar           --&gt;   /foo/bar
469     * /foo + /bar          --&gt;   /bar
470     * /foo + C:/bar        --&gt;   C:/bar
471     * /foo + C:bar         --&gt;   C:bar (*)
472     * /foo/a/ + ../bar     --&gt;   foo/bar
473     * /foo/ + ../../bar    --&gt;   null
474     * /foo/ + /bar         --&gt;   /bar
475     * /foo/.. + /bar       --&gt;   /bar
476     * /foo + bar/c.txt     --&gt;   /foo/bar/c.txt
477     * /foo/c.txt + bar     --&gt;   /foo/c.txt/bar (!)
478     * </pre>
479     * (*) Note that the Windows relative drive prefix is unreliable when
480     * used with this method.
481     * (!) Note that the first parameter must be a path. If it ends with a name, then
482     * the name will be built into the concatenated path. If this might be a problem,
483     * use {@link #getFullPath(String)} on the base path argument.
484     *
485     * @param basePath  the base path to attach to, always treated as a path
486     * @param fullFilenameToAdd  the filename (or path) to attach to the base
487     * @return the concatenated path, or null if invalid.  Null bytes inside string will be removed
488     */
489    public static String concat(final String basePath, final String fullFilenameToAdd) {
490        final int prefix = getPrefixLength(fullFilenameToAdd);
491        if (prefix < 0) {
492            return null;
493        }
494        if (prefix > 0) {
495            return normalize(fullFilenameToAdd);
496        }
497        if (basePath == null) {
498            return null;
499        }
500        final int len = basePath.length();
501        if (len == 0) {
502            return normalize(fullFilenameToAdd);
503        }
504        final char ch = basePath.charAt(len - 1);
505        if (isSeparator(ch)) {
506            return normalize(basePath + fullFilenameToAdd);
507        } else {
508            return normalize(basePath + '/' + fullFilenameToAdd);
509        }
510    }
511
512    /**
513     * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory).
514     * <p>
515     * The files names are expected to be normalized.
516     * </p>
517     *
518     * Edge cases:
519     * <ul>
520     * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
521     * <li>A directory does not contain itself: return false</li>
522     * <li>A null child file is not contained in any parent: return false</li>
523     * </ul>
524     *
525     * @param canonicalParent
526     *            the file to consider as the parent.
527     * @param canonicalChild
528     *            the file to consider as the child.
529     * @return true is the candidate leaf is under by the specified composite. False otherwise.
530     * @throws IOException
531     *             if an IO error occurs while checking the files.
532     * @since 2.2
533     * @see FileUtils#directoryContains(File, File)
534     */
535    public static boolean directoryContains(final String canonicalParent, final String canonicalChild)
536            throws IOException {
537
538        // Fail fast against NullPointerException
539        if (canonicalParent == null) {
540            throw new IllegalArgumentException("Directory must not be null");
541        }
542
543        if (canonicalChild == null) {
544            return false;
545        }
546
547        if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
548            return false;
549        }
550
551        return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent);
552    }
553
554    //-----------------------------------------------------------------------
555    /**
556     * Converts all separators to the Unix separator of forward slash.
557     *
558     * @param path  the path to be changed, null ignored
559     * @return the updated path
560     */
561    public static String separatorsToUnix(final String path) {
562        if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) {
563            return path;
564        }
565        return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
566    }
567
568    /**
569     * Converts all separators to the Windows separator of backslash.
570     *
571     * @param path  the path to be changed, null ignored
572     * @return the updated path
573     */
574    public static String separatorsToWindows(final String path) {
575        if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) {
576            return path;
577        }
578        return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
579    }
580
581    /**
582     * Converts all separators to the system separator.
583     *
584     * @param path  the path to be changed, null ignored
585     * @return the updated path
586     */
587    public static String separatorsToSystem(final String path) {
588        if (path == null) {
589            return null;
590        }
591        if (isSystemWindows()) {
592            return separatorsToWindows(path);
593        } else {
594            return separatorsToUnix(path);
595        }
596    }
597
598    //-----------------------------------------------------------------------
599    /**
600     * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
601     * <p>
602     * This method will handle a file in either Unix or Windows format.
603     * <p>
604     * The prefix length includes the first slash in the full filename
605     * if applicable. Thus, it is possible that the length returned is greater
606     * than the length of the input string.
607     * <pre>
608     * Windows:
609     * a\b\c.txt           --&gt; ""          --&gt; relative
610     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
611     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
612     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
613     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
614     * \\\a\b\c.txt        --&gt;  error, length = -1
615     *
616     * Unix:
617     * a/b/c.txt           --&gt; ""          --&gt; relative
618     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
619     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
620     * ~                   --&gt; "~/"        --&gt; current user (slash added)
621     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
622     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
623     * //server/a/b/c.txt  --&gt; "//server/"
624     * ///a/b/c.txt        --&gt; error, length = -1
625     * </pre>
626     * <p>
627     * The output will be the same irrespective of the machine that the code is running on.
628     * ie. both Unix and Windows prefixes are matched regardless.
629     *
630     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
631     * These must be followed by a server name, so double-slashes are not collapsed
632     * to a single slash at the start of the filename.
633     *
634     * @param filename  the filename to find the prefix in, null returns -1
635     * @return the length of the prefix, -1 if invalid or null
636     */
637    public static int getPrefixLength(final String filename) {
638        if (filename == null) {
639            return NOT_FOUND;
640        }
641        final int len = filename.length();
642        if (len == 0) {
643            return 0;
644        }
645        char ch0 = filename.charAt(0);
646        if (ch0 == ':') {
647            return NOT_FOUND;
648        }
649        if (len == 1) {
650            if (ch0 == '~') {
651                return 2;  // return a length greater than the input
652            }
653            return isSeparator(ch0) ? 1 : 0;
654        } else {
655            if (ch0 == '~') {
656                int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
657                int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
658                if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
659                    return len + 1;  // return a length greater than the input
660                }
661                posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
662                posWin = posWin == NOT_FOUND ? posUnix : posWin;
663                return Math.min(posUnix, posWin) + 1;
664            }
665            final char ch1 = filename.charAt(1);
666            if (ch1 == ':') {
667                ch0 = Character.toUpperCase(ch0);
668                if (ch0 >= 'A' && ch0 <= 'Z') {
669                    if (len == 2 || isSeparator(filename.charAt(2)) == false) {
670                        return 2;
671                    }
672                    return 3;
673                } else if (ch0 == UNIX_SEPARATOR) {
674                    return 1;
675                }
676                return NOT_FOUND;
677
678            } else if (isSeparator(ch0) && isSeparator(ch1)) {
679                int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
680                int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
681                if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
682                    return NOT_FOUND;
683                }
684                posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
685                posWin = posWin == NOT_FOUND ? posUnix : posWin;
686                int pos = Math.min(posUnix, posWin) + 1;
687                String hostnamePart = filename.substring(2, pos - 1);
688                return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
689            } else {
690                return isSeparator(ch0) ? 1 : 0;
691            }
692        }
693    }
694
695    /**
696     * Returns the index of the last directory separator character.
697     * <p>
698     * This method will handle a file in either Unix or Windows format.
699     * The position of the last forward or backslash is returned.
700     * <p>
701     * The output will be the same irrespective of the machine that the code is running on.
702     *
703     * @param filename  the filename to find the last path separator in, null returns -1
704     * @return the index of the last separator character, or -1 if there
705     * is no such character
706     */
707    public static int indexOfLastSeparator(final String filename) {
708        if (filename == null) {
709            return NOT_FOUND;
710        }
711        final int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
712        final int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
713        return Math.max(lastUnixPos, lastWindowsPos);
714    }
715
716    /**
717     * Returns the index of the last extension separator character, which is a dot.
718     * <p>
719     * This method also checks that there is no directory separator after the last dot. To do this it uses
720     * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
721     * </p>
722     * <p>
723     * The output will be the same irrespective of the machine that the code is running on.
724     * </p>
725     *
726     * @param filename
727     *            the filename to find the last extension separator in, null returns -1
728     * @return the index of the last extension separator character, or -1 if there is no such character
729     */
730    public static int indexOfExtension(final String filename) {
731        if (filename == null) {
732            return NOT_FOUND;
733        }
734        final int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
735        final int lastSeparator = indexOfLastSeparator(filename);
736        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
737    }
738
739    //-----------------------------------------------------------------------
740    /**
741     * Gets the prefix from a full filename, such as <code>C:/</code>
742     * or <code>~/</code>.
743     * <p>
744     * This method will handle a file in either Unix or Windows format.
745     * The prefix includes the first slash in the full filename where applicable.
746     * <pre>
747     * Windows:
748     * a\b\c.txt           --&gt; ""          --&gt; relative
749     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
750     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
751     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
752     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
753     *
754     * Unix:
755     * a/b/c.txt           --&gt; ""          --&gt; relative
756     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
757     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
758     * ~                   --&gt; "~/"        --&gt; current user (slash added)
759     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
760     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
761     * </pre>
762     * <p>
763     * The output will be the same irrespective of the machine that the code is running on.
764     * ie. both Unix and Windows prefixes are matched regardless.
765     *
766     * @param filename  the filename to query, null returns null
767     * @return the prefix of the file, null if invalid. Null bytes inside string will be removed
768     */
769    public static String getPrefix(final String filename) {
770        if (filename == null) {
771            return null;
772        }
773        final int len = getPrefixLength(filename);
774        if (len < 0) {
775            return null;
776        }
777        if (len > filename.length()) {
778            failIfNullBytePresent(filename + UNIX_SEPARATOR);
779            return filename + UNIX_SEPARATOR;
780        }
781        final String path = filename.substring(0, len);
782        failIfNullBytePresent(path);
783        return path;
784    }
785
786    /**
787     * Gets the path from a full filename, which excludes the prefix.
788     * <p>
789     * This method will handle a file in either Unix or Windows format.
790     * The method is entirely text based, and returns the text before and
791     * including the last forward or backslash.
792     * <pre>
793     * C:\a\b\c.txt --&gt; a\b\
794     * ~/a/b/c.txt  --&gt; a/b/
795     * a.txt        --&gt; ""
796     * a/b/c        --&gt; a/b/
797     * a/b/c/       --&gt; a/b/c/
798     * </pre>
799     * <p>
800     * The output will be the same irrespective of the machine that the code is running on.
801     * <p>
802     * This method drops the prefix from the result.
803     * See {@link #getFullPath(String)} for the method that retains the prefix.
804     *
805     * @param filename  the filename to query, null returns null
806     * @return the path of the file, an empty string if none exists, null if invalid.
807     * Null bytes inside string will be removed
808     */
809    public static String getPath(final String filename) {
810        return doGetPath(filename, 1);
811    }
812
813    /**
814     * Gets the path from a full filename, which excludes the prefix, and
815     * also excluding the final directory separator.
816     * <p>
817     * This method will handle a file in either Unix or Windows format.
818     * The method is entirely text based, and returns the text before the
819     * last forward or backslash.
820     * <pre>
821     * C:\a\b\c.txt --&gt; a\b
822     * ~/a/b/c.txt  --&gt; a/b
823     * a.txt        --&gt; ""
824     * a/b/c        --&gt; a/b
825     * a/b/c/       --&gt; a/b/c
826     * </pre>
827     * <p>
828     * The output will be the same irrespective of the machine that the code is running on.
829     * <p>
830     * This method drops the prefix from the result.
831     * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
832     *
833     * @param filename  the filename to query, null returns null
834     * @return the path of the file, an empty string if none exists, null if invalid.
835     * Null bytes inside string will be removed
836     */
837    public static String getPathNoEndSeparator(final String filename) {
838        return doGetPath(filename, 0);
839    }
840
841    /**
842     * Does the work of getting the path.
843     *
844     * @param filename  the filename
845     * @param separatorAdd  0 to omit the end separator, 1 to return it
846     * @return the path. Null bytes inside string will be removed
847     */
848    private static String doGetPath(final String filename, final int separatorAdd) {
849        if (filename == null) {
850            return null;
851        }
852        final int prefix = getPrefixLength(filename);
853        if (prefix < 0) {
854            return null;
855        }
856        final int index = indexOfLastSeparator(filename);
857        final int endIndex = index+separatorAdd;
858        if (prefix >= filename.length() || index < 0 || prefix >= endIndex) {
859            return "";
860        }
861        final String path = filename.substring(prefix, endIndex);
862        failIfNullBytePresent(path);
863        return path;
864    }
865
866    /**
867     * Gets the full path from a full filename, which is the prefix + path.
868     * <p>
869     * This method will handle a file in either Unix or Windows format.
870     * The method is entirely text based, and returns the text before and
871     * including the last forward or backslash.
872     * <pre>
873     * C:\a\b\c.txt --&gt; C:\a\b\
874     * ~/a/b/c.txt  --&gt; ~/a/b/
875     * a.txt        --&gt; ""
876     * a/b/c        --&gt; a/b/
877     * a/b/c/       --&gt; a/b/c/
878     * C:           --&gt; C:
879     * C:\          --&gt; C:\
880     * ~            --&gt; ~/
881     * ~/           --&gt; ~/
882     * ~user        --&gt; ~user/
883     * ~user/       --&gt; ~user/
884     * </pre>
885     * <p>
886     * The output will be the same irrespective of the machine that the code is running on.
887     *
888     * @param filename  the filename to query, null returns null
889     * @return the path of the file, an empty string if none exists, null if invalid
890     */
891    public static String getFullPath(final String filename) {
892        return doGetFullPath(filename, true);
893    }
894
895    /**
896     * Gets the full path from a full filename, which is the prefix + path,
897     * and also excluding the final directory separator.
898     * <p>
899     * This method will handle a file in either Unix or Windows format.
900     * The method is entirely text based, and returns the text before the
901     * last forward or backslash.
902     * <pre>
903     * C:\a\b\c.txt --&gt; C:\a\b
904     * ~/a/b/c.txt  --&gt; ~/a/b
905     * a.txt        --&gt; ""
906     * a/b/c        --&gt; a/b
907     * a/b/c/       --&gt; a/b/c
908     * C:           --&gt; C:
909     * C:\          --&gt; C:\
910     * ~            --&gt; ~
911     * ~/           --&gt; ~
912     * ~user        --&gt; ~user
913     * ~user/       --&gt; ~user
914     * </pre>
915     * <p>
916     * The output will be the same irrespective of the machine that the code is running on.
917     *
918     * @param filename  the filename to query, null returns null
919     * @return the path of the file, an empty string if none exists, null if invalid
920     */
921    public static String getFullPathNoEndSeparator(final String filename) {
922        return doGetFullPath(filename, false);
923    }
924
925    /**
926     * Does the work of getting the path.
927     *
928     * @param filename  the filename
929     * @param includeSeparator  true to include the end separator
930     * @return the path
931     */
932    private static String doGetFullPath(final String filename, final boolean includeSeparator) {
933        if (filename == null) {
934            return null;
935        }
936        final int prefix = getPrefixLength(filename);
937        if (prefix < 0) {
938            return null;
939        }
940        if (prefix >= filename.length()) {
941            if (includeSeparator) {
942                return getPrefix(filename);  // add end slash if necessary
943            } else {
944                return filename;
945            }
946        }
947        final int index = indexOfLastSeparator(filename);
948        if (index < 0) {
949            return filename.substring(0, prefix);
950        }
951        int end = index + (includeSeparator ?  1 : 0);
952        if (end == 0) {
953            end++;
954        }
955        return filename.substring(0, end);
956    }
957
958    /**
959     * Gets the name minus the path from a full filename.
960     * <p>
961     * This method will handle a file in either Unix or Windows format.
962     * The text after the last forward or backslash is returned.
963     * <pre>
964     * a/b/c.txt --&gt; c.txt
965     * a.txt     --&gt; a.txt
966     * a/b/c     --&gt; c
967     * a/b/c/    --&gt; ""
968     * </pre>
969     * <p>
970     * The output will be the same irrespective of the machine that the code is running on.
971     *
972     * @param filename  the filename to query, null returns null
973     * @return the name of the file without the path, or an empty string if none exists.
974     * Null bytes inside string will be removed
975     */
976    public static String getName(final String filename) {
977        if (filename == null) {
978            return null;
979        }
980        failIfNullBytePresent(filename);
981        final int index = indexOfLastSeparator(filename);
982        return filename.substring(index + 1);
983    }
984
985    /**
986     * Check the input for null bytes, a sign of unsanitized data being passed to to file level functions.
987     *
988     * This may be used for poison byte attacks.
989     * @param path the path to check
990     */
991    private static void failIfNullBytePresent(final String path) {
992        final int len = path.length();
993        for (int i = 0; i < len; i++) {
994            if (path.charAt(i) == 0) {
995                throw new IllegalArgumentException("Null byte present in file/path name. There are no " +
996                        "known legitimate use cases for such data, but several injection attacks may use it");
997            }
998        }
999    }
1000
1001    /**
1002     * Gets the base name, minus the full path and extension, from a full filename.
1003     * <p>
1004     * This method will handle a file in either Unix or Windows format.
1005     * The text after the last forward or backslash and before the last dot is returned.
1006     * <pre>
1007     * a/b/c.txt --&gt; c
1008     * a.txt     --&gt; a
1009     * a/b/c     --&gt; c
1010     * a/b/c/    --&gt; ""
1011     * </pre>
1012     * <p>
1013     * The output will be the same irrespective of the machine that the code is running on.
1014     *
1015     * @param filename  the filename to query, null returns null
1016     * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string
1017     * will be removed
1018     */
1019    public static String getBaseName(final String filename) {
1020        return removeExtension(getName(filename));
1021    }
1022
1023    /**
1024     * Gets the extension of a filename.
1025     * <p>
1026     * This method returns the textual part of the filename after the last dot.
1027     * There must be no directory separator after the dot.
1028     * <pre>
1029     * foo.txt      --&gt; "txt"
1030     * a/b/c.jpg    --&gt; "jpg"
1031     * a/b.txt/c    --&gt; ""
1032     * a/b/c        --&gt; ""
1033     * </pre>
1034     * <p>
1035     * The output will be the same irrespective of the machine that the code is running on.
1036     *
1037     * @param filename the filename to retrieve the extension of.
1038     * @return the extension of the file or an empty string if none exists or {@code null}
1039     * if the filename is {@code null}.
1040     */
1041    public static String getExtension(final String filename) {
1042        if (filename == null) {
1043            return null;
1044        }
1045        final int index = indexOfExtension(filename);
1046        if (index == NOT_FOUND) {
1047            return "";
1048        } else {
1049            return filename.substring(index + 1);
1050        }
1051    }
1052
1053    //-----------------------------------------------------------------------
1054    /**
1055     * Removes the extension from a filename.
1056     * <p>
1057     * This method returns the textual part of the filename before the last dot.
1058     * There must be no directory separator after the dot.
1059     * <pre>
1060     * foo.txt    --&gt; foo
1061     * a\b\c.jpg  --&gt; a\b\c
1062     * a\b\c      --&gt; a\b\c
1063     * a.b\c      --&gt; a.b\c
1064     * </pre>
1065     * <p>
1066     * The output will be the same irrespective of the machine that the code is running on.
1067     *
1068     * @param filename  the filename to query, null returns null
1069     * @return the filename minus the extension
1070     */
1071    public static String removeExtension(final String filename) {
1072        if (filename == null) {
1073            return null;
1074        }
1075        failIfNullBytePresent(filename);
1076
1077        final int index = indexOfExtension(filename);
1078        if (index == NOT_FOUND) {
1079            return filename;
1080        } else {
1081            return filename.substring(0, index);
1082        }
1083    }
1084
1085    //-----------------------------------------------------------------------
1086    /**
1087     * Checks whether two filenames are equal exactly.
1088     * <p>
1089     * No processing is performed on the filenames other than comparison,
1090     * thus this is merely a null-safe case-sensitive equals.
1091     *
1092     * @param filename1  the first filename to query, may be null
1093     * @param filename2  the second filename to query, may be null
1094     * @return true if the filenames are equal, null equals null
1095     * @see IOCase#SENSITIVE
1096     */
1097    public static boolean equals(final String filename1, final String filename2) {
1098        return equals(filename1, filename2, false, IOCase.SENSITIVE);
1099    }
1100
1101    /**
1102     * Checks whether two filenames are equal using the case rules of the system.
1103     * <p>
1104     * No processing is performed on the filenames other than comparison.
1105     * The check is case-sensitive on Unix and case-insensitive on Windows.
1106     *
1107     * @param filename1  the first filename to query, may be null
1108     * @param filename2  the second filename to query, may be null
1109     * @return true if the filenames are equal, null equals null
1110     * @see IOCase#SYSTEM
1111     */
1112    public static boolean equalsOnSystem(final String filename1, final String filename2) {
1113        return equals(filename1, filename2, false, IOCase.SYSTEM);
1114    }
1115
1116    //-----------------------------------------------------------------------
1117    /**
1118     * Checks whether two filenames are equal after both have been normalized.
1119     * <p>
1120     * Both filenames are first passed to {@link #normalize(String)}.
1121     * The check is then performed in a case-sensitive manner.
1122     *
1123     * @param filename1  the first filename to query, may be null
1124     * @param filename2  the second filename to query, may be null
1125     * @return true if the filenames are equal, null equals null
1126     * @see IOCase#SENSITIVE
1127     */
1128    public static boolean equalsNormalized(final String filename1, final String filename2) {
1129        return equals(filename1, filename2, true, IOCase.SENSITIVE);
1130    }
1131
1132    /**
1133     * Checks whether two filenames are equal after both have been normalized
1134     * and using the case rules of the system.
1135     * <p>
1136     * Both filenames are first passed to {@link #normalize(String)}.
1137     * The check is then performed case-sensitive on Unix and
1138     * case-insensitive on Windows.
1139     *
1140     * @param filename1  the first filename to query, may be null
1141     * @param filename2  the second filename to query, may be null
1142     * @return true if the filenames are equal, null equals null
1143     * @see IOCase#SYSTEM
1144     */
1145    public static boolean equalsNormalizedOnSystem(final String filename1, final String filename2) {
1146        return equals(filename1, filename2, true, IOCase.SYSTEM);
1147    }
1148
1149    /**
1150     * Checks whether two filenames are equal, optionally normalizing and providing
1151     * control over the case-sensitivity.
1152     *
1153     * @param filename1  the first filename to query, may be null
1154     * @param filename2  the second filename to query, may be null
1155     * @param normalized  whether to normalize the filenames
1156     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1157     * @return true if the filenames are equal, null equals null
1158     * @since 1.3
1159     */
1160    public static boolean equals(
1161            String filename1, String filename2,
1162            final boolean normalized, IOCase caseSensitivity) {
1163
1164        if (filename1 == null || filename2 == null) {
1165            return filename1 == null && filename2 == null;
1166        }
1167        if (normalized) {
1168            filename1 = normalize(filename1);
1169            filename2 = normalize(filename2);
1170            if (filename1 == null || filename2 == null) {
1171                throw new NullPointerException(
1172                    "Error normalizing one or both of the file names");
1173            }
1174        }
1175        if (caseSensitivity == null) {
1176            caseSensitivity = IOCase.SENSITIVE;
1177        }
1178        return caseSensitivity.checkEquals(filename1, filename2);
1179    }
1180
1181    //-----------------------------------------------------------------------
1182    /**
1183     * Checks whether the extension of the filename is that specified.
1184     * <p>
1185     * This method obtains the extension as the textual part of the filename
1186     * after the last dot. There must be no directory separator after the dot.
1187     * The extension check is case-sensitive on all platforms.
1188     *
1189     * @param filename  the filename to query, null returns false
1190     * @param extension  the extension to check for, null or empty checks for no extension
1191     * @return true if the filename has the specified extension
1192     * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes
1193     */
1194    public static boolean isExtension(final String filename, final String extension) {
1195        if (filename == null) {
1196            return false;
1197        }
1198        failIfNullBytePresent(filename);
1199
1200        if (extension == null || extension.isEmpty()) {
1201            return indexOfExtension(filename) == NOT_FOUND;
1202        }
1203        final String fileExt = getExtension(filename);
1204        return fileExt.equals(extension);
1205    }
1206
1207    /**
1208     * Checks whether the extension of the filename is one of those specified.
1209     * <p>
1210     * This method obtains the extension as the textual part of the filename
1211     * after the last dot. There must be no directory separator after the dot.
1212     * The extension check is case-sensitive on all platforms.
1213     *
1214     * @param filename  the filename to query, null returns false
1215     * @param extensions  the extensions to check for, null checks for no extension
1216     * @return true if the filename is one of the extensions
1217     * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes
1218     */
1219    public static boolean isExtension(final String filename, final String[] extensions) {
1220        if (filename == null) {
1221            return false;
1222        }
1223        failIfNullBytePresent(filename);
1224
1225        if (extensions == null || extensions.length == 0) {
1226            return indexOfExtension(filename) == NOT_FOUND;
1227        }
1228        final String fileExt = getExtension(filename);
1229        for (final String extension : extensions) {
1230            if (fileExt.equals(extension)) {
1231                return true;
1232            }
1233        }
1234        return false;
1235    }
1236
1237    /**
1238     * Checks whether the extension of the filename is one of those specified.
1239     * <p>
1240     * This method obtains the extension as the textual part of the filename
1241     * after the last dot. There must be no directory separator after the dot.
1242     * The extension check is case-sensitive on all platforms.
1243     *
1244     * @param filename  the filename to query, null returns false
1245     * @param extensions  the extensions to check for, null checks for no extension
1246     * @return true if the filename is one of the extensions
1247     * @throws java.lang.IllegalArgumentException if the supplied filename contains null bytes
1248     */
1249    public static boolean isExtension(final String filename, final Collection<String> extensions) {
1250        if (filename == null) {
1251            return false;
1252        }
1253        failIfNullBytePresent(filename);
1254
1255        if (extensions == null || extensions.isEmpty()) {
1256            return indexOfExtension(filename) == NOT_FOUND;
1257        }
1258        final String fileExt = getExtension(filename);
1259        for (final String extension : extensions) {
1260            if (fileExt.equals(extension)) {
1261                return true;
1262            }
1263        }
1264        return false;
1265    }
1266
1267    //-----------------------------------------------------------------------
1268    /**
1269     * Checks a filename to see if it matches the specified wildcard matcher,
1270     * always testing case-sensitive.
1271     * <p>
1272     * The wildcard matcher uses the characters '?' and '*' to represent a
1273     * single or multiple (zero or more) wildcard characters.
1274     * This is the same as often found on Dos/Unix command lines.
1275     * The check is case-sensitive always.
1276     * <pre>
1277     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1278     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1279     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1280     * wildcardMatch("c.txt", "*.???")      --&gt; true
1281     * wildcardMatch("c.txt", "*.????")     --&gt; false
1282     * </pre>
1283     * N.B. the sequence "*?" does not work properly at present in match strings.
1284     *
1285     * @param filename  the filename to match on
1286     * @param wildcardMatcher  the wildcard string to match against
1287     * @return true if the filename matches the wildcard string
1288     * @see IOCase#SENSITIVE
1289     */
1290    public static boolean wildcardMatch(final String filename, final String wildcardMatcher) {
1291        return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1292    }
1293
1294    /**
1295     * Checks a filename to see if it matches the specified wildcard matcher
1296     * using the case rules of the system.
1297     * <p>
1298     * The wildcard matcher uses the characters '?' and '*' to represent a
1299     * single or multiple (zero or more) wildcard characters.
1300     * This is the same as often found on Dos/Unix command lines.
1301     * The check is case-sensitive on Unix and case-insensitive on Windows.
1302     * <pre>
1303     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1304     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1305     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1306     * wildcardMatch("c.txt", "*.???")      --&gt; true
1307     * wildcardMatch("c.txt", "*.????")     --&gt; false
1308     * </pre>
1309     * N.B. the sequence "*?" does not work properly at present in match strings.
1310     *
1311     * @param filename  the filename to match on
1312     * @param wildcardMatcher  the wildcard string to match against
1313     * @return true if the filename matches the wildcard string
1314     * @see IOCase#SYSTEM
1315     */
1316    public static boolean wildcardMatchOnSystem(final String filename, final String wildcardMatcher) {
1317        return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1318    }
1319
1320    /**
1321     * Checks a filename to see if it matches the specified wildcard matcher
1322     * allowing control over case-sensitivity.
1323     * <p>
1324     * The wildcard matcher uses the characters '?' and '*' to represent a
1325     * single or multiple (zero or more) wildcard characters.
1326     * N.B. the sequence "*?" does not work properly at present in match strings.
1327     *
1328     * @param filename  the filename to match on
1329     * @param wildcardMatcher  the wildcard string to match against
1330     * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1331     * @return true if the filename matches the wildcard string
1332     * @since 1.3
1333     */
1334    public static boolean wildcardMatch(final String filename, final String wildcardMatcher, IOCase caseSensitivity) {
1335        if (filename == null && wildcardMatcher == null) {
1336            return true;
1337        }
1338        if (filename == null || wildcardMatcher == null) {
1339            return false;
1340        }
1341        if (caseSensitivity == null) {
1342            caseSensitivity = IOCase.SENSITIVE;
1343        }
1344        final String[] wcs = splitOnTokens(wildcardMatcher);
1345        boolean anyChars = false;
1346        int textIdx = 0;
1347        int wcsIdx = 0;
1348        final Stack<int[]> backtrack = new Stack<>();
1349
1350        // loop around a backtrack stack, to handle complex * matching
1351        do {
1352            if (backtrack.size() > 0) {
1353                final int[] array = backtrack.pop();
1354                wcsIdx = array[0];
1355                textIdx = array[1];
1356                anyChars = true;
1357            }
1358
1359            // loop whilst tokens and text left to process
1360            while (wcsIdx < wcs.length) {
1361
1362                if (wcs[wcsIdx].equals("?")) {
1363                    // ? so move to next text char
1364                    textIdx++;
1365                    if (textIdx > filename.length()) {
1366                        break;
1367                    }
1368                    anyChars = false;
1369
1370                } else if (wcs[wcsIdx].equals("*")) {
1371                    // set any chars status
1372                    anyChars = true;
1373                    if (wcsIdx == wcs.length - 1) {
1374                        textIdx = filename.length();
1375                    }
1376
1377                } else {
1378                    // matching text token
1379                    if (anyChars) {
1380                        // any chars then try to locate text token
1381                        textIdx = caseSensitivity.checkIndexOf(filename, textIdx, wcs[wcsIdx]);
1382                        if (textIdx == NOT_FOUND) {
1383                            // token not found
1384                            break;
1385                        }
1386                        final int repeat = caseSensitivity.checkIndexOf(filename, textIdx + 1, wcs[wcsIdx]);
1387                        if (repeat >= 0) {
1388                            backtrack.push(new int[] {wcsIdx, repeat});
1389                        }
1390                    } else {
1391                        // matching from current position
1392                        if (!caseSensitivity.checkRegionMatches(filename, textIdx, wcs[wcsIdx])) {
1393                            // couldnt match token
1394                            break;
1395                        }
1396                    }
1397
1398                    // matched text token, move text index to end of matched token
1399                    textIdx += wcs[wcsIdx].length();
1400                    anyChars = false;
1401                }
1402
1403                wcsIdx++;
1404            }
1405
1406            // full match
1407            if (wcsIdx == wcs.length && textIdx == filename.length()) {
1408                return true;
1409            }
1410
1411        } while (backtrack.size() > 0);
1412
1413        return false;
1414    }
1415
1416    /**
1417     * Splits a string into a number of tokens.
1418     * The text is split by '?' and '*'.
1419     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1420     *
1421     * @param text  the text to split
1422     * @return the array of tokens, never null
1423     */
1424    static String[] splitOnTokens(final String text) {
1425        // used by wildcardMatch
1426        // package level so a unit test may run on this
1427
1428        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1429            return new String[] { text };
1430        }
1431
1432        final char[] array = text.toCharArray();
1433        final ArrayList<String> list = new ArrayList<>();
1434        final StringBuilder buffer = new StringBuilder();
1435        char prevChar = 0;
1436        for (final char ch : array) {
1437            if (ch == '?' || ch == '*') {
1438                if (buffer.length() != 0) {
1439                    list.add(buffer.toString());
1440                    buffer.setLength(0);
1441                }
1442                if (ch == '?') {
1443                    list.add("?");
1444                } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1445                    list.add("*");
1446                }
1447            } else {
1448                buffer.append(ch);
1449            }
1450            prevChar = ch;
1451        }
1452        if (buffer.length() != 0) {
1453            list.add(buffer.toString());
1454        }
1455
1456        return list.toArray( new String[ list.size() ] );
1457    }
1458
1459    /**
1460     * Checks whether a given string is a valid host name according to
1461     * RFC 3986.
1462     *
1463     * <p>Accepted are IP addresses (v4 and v6) as well as what the
1464     * RFC calls a "reg-name". Percent encoded names don't seem to be
1465     * valid names in UNC paths.</p>
1466     *
1467     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1468     * @param name the hostname to validate
1469     * @return true if the given name is a valid host name
1470     */
1471    private static boolean isValidHostName(String name) {
1472        return isIPv4Address(name) || isIPv6Address(name) || isRFC3986HostName(name);
1473    }
1474
1475    private static final Pattern IPV4_PATTERN =
1476        Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
1477    private static final int IPV4_MAX_OCTET_VALUE = 255;
1478
1479    // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1480    private static boolean isIPv4Address(String name) {
1481        Matcher m = IPV4_PATTERN.matcher(name);
1482        if (!m.matches() || m.groupCount() != 4) {
1483            return false;
1484        }
1485
1486        // verify that address subgroups are legal
1487        for (int i = 1; i < 5; i++) {
1488            String ipSegment = m.group(i);
1489            if (ipSegment == null || ipSegment.length() == 0) {
1490                return false;
1491            }
1492
1493            int iIpSegment = 0;
1494
1495            try {
1496                iIpSegment = Integer.parseInt(ipSegment);
1497            } catch(NumberFormatException e) {
1498                return false;
1499            }
1500
1501            if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1502                return false;
1503            }
1504
1505            if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1506                return false;
1507            }
1508
1509        }
1510
1511        return true;
1512    }
1513
1514    private static final int IPV6_MAX_HEX_GROUPS = 8;
1515    private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
1516    private static final int MAX_UNSIGNED_SHORT = 0xffff;
1517    private static final int BASE_16 = 16;
1518
1519    // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1520    private static boolean isIPv6Address(String inet6Address) {
1521        boolean containsCompressedZeroes = inet6Address.contains("::");
1522        if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) {
1523            return false;
1524        }
1525        if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::"))
1526                || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) {
1527            return false;
1528        }
1529        String[] octets = inet6Address.split(":");
1530        if (containsCompressedZeroes) {
1531            List<String> octetList = new ArrayList<String>(Arrays.asList(octets));
1532            if (inet6Address.endsWith("::")) {
1533                // String.split() drops ending empty segments
1534                octetList.add("");
1535            } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1536                octetList.remove(0);
1537            }
1538            octets = octetList.toArray(new String[octetList.size()]);
1539        }
1540        if (octets.length > IPV6_MAX_HEX_GROUPS) {
1541            return false;
1542        }
1543        int validOctets = 0;
1544        int emptyOctets = 0; // consecutive empty chunks
1545        for (int index = 0; index < octets.length; index++) {
1546            String octet = octets[index];
1547            if (octet.length() == 0) {
1548                emptyOctets++;
1549                if (emptyOctets > 1) {
1550                    return false;
1551                }
1552            } else {
1553                emptyOctets = 0;
1554                // Is last chunk an IPv4 address?
1555                if (index == octets.length - 1 && octet.contains(".")) {
1556                    if (!isIPv4Address(octet)) {
1557                        return false;
1558                    }
1559                    validOctets += 2;
1560                    continue;
1561                }
1562                if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1563                    return false;
1564                }
1565                int octetInt = 0;
1566                try {
1567                    octetInt = Integer.parseInt(octet, BASE_16);
1568                } catch (NumberFormatException e) {
1569                    return false;
1570                }
1571                if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1572                    return false;
1573                }
1574            }
1575            validOctets++;
1576        }
1577        if (validOctets > IPV6_MAX_HEX_GROUPS || (validOctets < IPV6_MAX_HEX_GROUPS && !containsCompressedZeroes)) {
1578            return false;
1579        }
1580        return true;
1581    }
1582
1583    private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
1584
1585    private static boolean isRFC3986HostName(String name) {
1586        String[] parts = name.split("\\.", -1);
1587        for (int i = 0; i < parts.length; i++) {
1588            if (parts[i].length() == 0) {
1589                // trailing dot is legal, otherwise we've hit a .. sequence
1590                return i == parts.length - 1;
1591            }
1592            if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1593                return false;
1594            }
1595        }
1596        return true;
1597    }
1598}