001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.RandomAccessFile;
023import java.io.UnsupportedEncodingException;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetEncoder;
026import java.nio.charset.StandardCharsets;
027
028import org.apache.commons.io.Charsets;
029
030/**
031 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
032 * the last line). Useful for e.g. searching in log files.
033 *
034 * @since 2.2
035 */
036public class ReversedLinesFileReader implements Closeable {
037
038    private final int blockSize;
039    private final Charset encoding;
040
041    private final RandomAccessFile randomAccessFile;
042
043    private final long totalByteLength;
044    private final long totalBlockCount;
045
046    private final byte[][] newLineSequences;
047    private final int avoidNewlineSplitBufferSize;
048    private final int byteDecrement;
049
050    private FilePart currentFilePart;
051
052    private boolean trailingNewlineOfFileSkipped = false;
053
054    /**
055     * Creates a ReversedLinesFileReader with default block size of 4KB and the
056     * platform's default encoding.
057     *
058     * @param file
059     *            the file to be read
060     * @throws IOException  if an I/O error occurs
061     * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead
062     */
063    @Deprecated
064    public ReversedLinesFileReader(final File file) throws IOException {
065        this(file, 4096, Charset.defaultCharset());
066    }
067
068    /**
069     * Creates a ReversedLinesFileReader with default block size of 4KB and the
070     * specified encoding.
071     *
072     * @param file
073     *            the file to be read
074     * @param charset the encoding to use
075     * @throws IOException  if an I/O error occurs
076     * @since 2.5
077     */
078    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
079        this(file, 4096, charset);
080    }
081
082    /**
083     * Creates a ReversedLinesFileReader with the given block size and encoding.
084     *
085     * @param file
086     *            the file to be read
087     * @param blockSize
088     *            size of the internal buffer (for ideal performance this should
089     *            match with the block size of the underlying file system).
090     * @param encoding
091     *            the encoding of the file
092     * @throws IOException  if an I/O error occurs
093     * @since 2.3
094     */
095    public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
096        this.blockSize = blockSize;
097        this.encoding = encoding;
098
099        // --- check & prepare encoding ---
100        final Charset charset = Charsets.toCharset(encoding);
101        final CharsetEncoder charsetEncoder = charset.newEncoder();
102        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
103        if (maxBytesPerChar == 1f) {
104            // all one byte encodings are no problem
105            byteDecrement = 1;
106        } else if (charset == StandardCharsets.UTF_8) {
107            // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
108            // http://en.wikipedia.org/wiki/UTF-8
109            byteDecrement = 1;
110        } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
111                // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
112                charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
113                charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
114                charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
115                charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
116            byteDecrement = 1;
117        } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) {
118            // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
119            // however byte order has to be specified
120            byteDecrement = 2;
121        } else if (charset == StandardCharsets.UTF_16) {
122            throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or " +
123                    "UTF-16LE)");
124        } else {
125            throw new UnsupportedEncodingException("Encoding " + encoding + " is not supported yet (feel free to " +
126                    "submit a patch)");
127        }
128
129        // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
130        newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
131
132        avoidNewlineSplitBufferSize = newLineSequences[0].length;
133
134        // Open file
135        randomAccessFile = new RandomAccessFile(file, "r");
136        totalByteLength = randomAccessFile.length();
137        int lastBlockLength = (int) (totalByteLength % blockSize);
138        if (lastBlockLength > 0) {
139            totalBlockCount = totalByteLength / blockSize + 1;
140        } else {
141            totalBlockCount = totalByteLength / blockSize;
142            if (totalByteLength > 0) {
143                lastBlockLength = blockSize;
144            }
145        }
146        currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
147
148    }
149
150    /**
151     * Creates a ReversedLinesFileReader with the given block size and encoding.
152     *
153     * @param file
154     *            the file to be read
155     * @param blockSize
156     *            size of the internal buffer (for ideal performance this should
157     *            match with the block size of the underlying file system).
158     * @param encoding
159     *            the encoding of the file
160     * @throws IOException  if an I/O error occurs
161     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of {@link UnsupportedEncodingException} in
162     * version 2.2 if the encoding is not supported.
163     */
164    public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
165        this(file, blockSize, Charsets.toCharset(encoding));
166    }
167
168    /**
169     * Returns the lines of the file from bottom to top.
170     *
171     * @return the next line or null if the start of the file is reached
172     * @throws IOException  if an I/O error occurs
173     */
174    public String readLine() throws IOException {
175
176        String line = currentFilePart.readLine();
177        while (line == null) {
178            currentFilePart = currentFilePart.rollOver();
179            if (currentFilePart != null) {
180                line = currentFilePart.readLine();
181            } else {
182                // no more fileparts: we're done, leave line set to null
183                break;
184            }
185        }
186
187        // aligned behaviour with BufferedReader that doesn't return a last, empty line
188        if("".equals(line) && !trailingNewlineOfFileSkipped) {
189            trailingNewlineOfFileSkipped = true;
190            line = readLine();
191        }
192
193        return line;
194    }
195
196    /**
197     * Closes underlying resources.
198     *
199     * @throws IOException  if an I/O error occurs
200     */
201    @Override
202    public void close() throws IOException {
203        randomAccessFile.close();
204    }
205
206    private class FilePart {
207        private final long no;
208
209        private final byte[] data;
210
211        private byte[] leftOver;
212
213        private int currentLastBytePos;
214
215        /**
216         * ctor
217         * @param no the part number
218         * @param length its length
219         * @param leftOverOfLastFilePart remainder
220         * @throws IOException if there is a problem reading the file
221         */
222        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
223            this.no = no;
224            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
225            this.data = new byte[dataLength];
226            final long off = (no - 1) * blockSize;
227
228            // read data
229            if (no > 0 /* file not empty */) {
230                randomAccessFile.seek(off);
231                final int countRead = randomAccessFile.read(data, 0, length);
232                if (countRead != length) {
233                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
234                }
235            }
236            // copy left over part into data arr
237            if (leftOverOfLastFilePart != null) {
238                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
239            }
240            this.currentLastBytePos = data.length - 1;
241            this.leftOver = null;
242        }
243
244        /**
245         * Handles block rollover
246         *
247         * @return the new FilePart or null
248         * @throws IOException if there was a problem reading the file
249         */
250        private FilePart rollOver() throws IOException {
251
252            if (currentLastBytePos > -1) {
253                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
254                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
255            }
256
257            if (no > 1) {
258                return new FilePart(no - 1, blockSize, leftOver);
259            } else {
260                // NO 1 was the last FilePart, we're finished
261                if (leftOver != null) {
262                    throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
263                            + new String(leftOver, encoding));
264                }
265                return null;
266            }
267        }
268
269        /**
270         * Reads a line.
271         *
272         * @return the line or null
273         * @throws IOException if there is an error reading from the file
274         */
275        private String readLine() throws IOException {
276
277            String line = null;
278            int newLineMatchByteCount;
279
280            final boolean isLastFilePart = no == 1;
281
282            int i = currentLastBytePos;
283            while (i > -1) {
284
285                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
286                    // avoidNewlineSplitBuffer: for all except the last file part we
287                    // take a few bytes to the next file part to avoid splitting of newlines
288                    createLeftOver();
289                    break; // skip last few bytes and leave it to the next file part
290                }
291
292                // --- check for newline ---
293                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
294                    final int lineStart = i + 1;
295                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
296
297                    if (lineLengthBytes < 0) {
298                        throw new IllegalStateException("Unexpected negative line length="+lineLengthBytes);
299                    }
300                    final byte[] lineData = new byte[lineLengthBytes];
301                    System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
302
303                    line = new String(lineData, encoding);
304
305                    currentLastBytePos = i - newLineMatchByteCount;
306                    break; // found line
307                }
308
309                // --- move cursor ---
310                i -= byteDecrement;
311
312                // --- end of file part handling ---
313                if (i < 0) {
314                    createLeftOver();
315                    break; // end of file part
316                }
317            }
318
319            // --- last file part handling ---
320            if (isLastFilePart && leftOver != null) {
321                // there will be no line break anymore, this is the first line of the file
322                line = new String(leftOver, encoding);
323                leftOver = null;
324            }
325
326            return line;
327        }
328
329        /**
330         * Creates the buffer containing any left over bytes.
331         */
332        private void createLeftOver() {
333            final int lineLengthBytes = currentLastBytePos + 1;
334            if (lineLengthBytes > 0) {
335                // create left over for next block
336                leftOver = new byte[lineLengthBytes];
337                System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
338            } else {
339                leftOver = null;
340            }
341            currentLastBytePos = -1;
342        }
343
344        /**
345         * Finds the new-line sequence and return its length.
346         *
347         * @param data buffer to scan
348         * @param i start offset in buffer
349         * @return length of newline sequence or 0 if none found
350         */
351        private int getNewLineMatchByteCount(final byte[] data, final int i) {
352            for (final byte[] newLineSequence : newLineSequences) {
353                boolean match = true;
354                for (int j = newLineSequence.length - 1; j >= 0; j--) {
355                    final int k = i + j - (newLineSequence.length - 1);
356                    match &= k >= 0 && data[k] == newLineSequence[j];
357                }
358                if (match) {
359                    return newLineSequence.length;
360                }
361            }
362            return 0;
363        }
364    }
365
366}