001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.dump;
020
021import org.apache.commons.compress.archivers.ArchiveException;
022import org.apache.commons.compress.archivers.ArchiveInputStream;
023import org.apache.commons.compress.archivers.zip.ZipEncoding;
024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
025import org.apache.commons.compress.utils.IOUtils;
026
027import java.io.EOFException;
028import java.io.IOException;
029import java.io.InputStream;
030
031import java.util.Arrays;
032import java.util.HashMap;
033import java.util.Map;
034import java.util.PriorityQueue;
035import java.util.Queue;
036import java.util.Stack;
037
038/**
039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream.
040 * Methods are provided to position at each successive entry in
041 * the archive, and the read each entry as a normal input stream
042 * using read().
043 *
044 * There doesn't seem to exist a hint on the encoding of string values
045 * in any piece documentation.  Given the main purpose of dump/restore
046 * is backing up a system it seems very likely the format uses the
047 * current default encoding of the system.
048 *
049 * @NotThreadSafe
050 */
051public class DumpArchiveInputStream extends ArchiveInputStream {
052    private final DumpArchiveSummary summary;
053    private DumpArchiveEntry active;
054    private boolean isClosed;
055    private boolean hasHitEOF;
056    private long entrySize;
057    private long entryOffset;
058    private int readIdx;
059    private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
060    private byte[] blockBuffer;
061    private int recordOffset;
062    private long filepos;
063    protected TapeInputStream raw;
064
065    // map of ino -> dirent entry. We can use this to reconstruct full paths.
066    private final Map<Integer, Dirent> names = new HashMap<>();
067
068    // map of ino -> (directory) entry when we're missing one or more elements in the path.
069    private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>();
070
071    // queue of (directory) entries where we now have the full path.
072    private final Queue<DumpArchiveEntry> queue;
073
074    /**
075     * The encoding to use for file names and labels.
076     */
077    private final ZipEncoding zipEncoding;
078
079    // the provided encoding (for unit tests)
080    final String encoding;
081
082    /**
083     * Constructor using the platform's default encoding for file
084     * names.
085     *
086     * @param is stream to read from
087     * @throws ArchiveException on error
088     */
089    public DumpArchiveInputStream(final InputStream is) throws ArchiveException {
090        this(is, null);
091    }
092
093    /**
094     * Constructor.
095     *
096     * @param is stream to read from
097     * @param encoding the encoding to use for file names, use null
098     * for the platform's default encoding
099     * @since 1.6
100     * @throws ArchiveException on error
101     */
102    public DumpArchiveInputStream(final InputStream is, final String encoding)
103        throws ArchiveException {
104        this.raw = new TapeInputStream(is);
105        this.hasHitEOF = false;
106        this.encoding = encoding;
107        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
108
109        try {
110            // read header, verify it's a dump archive.
111            final byte[] headerBytes = raw.readRecord();
112
113            if (!DumpArchiveUtil.verify(headerBytes)) {
114                throw new UnrecognizedFormatException();
115            }
116
117            // get summary information
118            summary = new DumpArchiveSummary(headerBytes, this.zipEncoding);
119
120            // reset buffer with actual block size.
121            raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
122
123            // allocate our read buffer.
124            blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
125
126            // skip past CLRI and BITS segments since we don't handle them yet.
127            readCLRI();
128            readBITS();
129        } catch (final IOException ex) {
130            throw new ArchiveException(ex.getMessage(), ex);
131        }
132
133        // put in a dummy record for the root node.
134        final Dirent root = new Dirent(2, 2, 4, ".");
135        names.put(2, root);
136
137        // use priority based on queue to ensure parent directories are
138        // released first.
139        queue = new PriorityQueue<>(10,
140                (p, q) -> {
141                    if (p.getOriginalName() == null || q.getOriginalName() == null) {
142                        return Integer.MAX_VALUE;
143                    }
144
145                    return p.getOriginalName().compareTo(q.getOriginalName());
146                });
147    }
148
149    @Deprecated
150    @Override
151    public int getCount() {
152        return (int) getBytesRead();
153    }
154
155    @Override
156    public long getBytesRead() {
157        return raw.getBytesRead();
158    }
159
160    /**
161     * Return the archive summary information.
162     * @return the summary
163     */
164    public DumpArchiveSummary getSummary() {
165        return summary;
166    }
167
168    /**
169     * Read CLRI (deleted inode) segment.
170     */
171    private void readCLRI() throws IOException {
172        final byte[] buffer = raw.readRecord();
173
174        if (!DumpArchiveUtil.verify(buffer)) {
175            throw new InvalidFormatException();
176        }
177
178        active = DumpArchiveEntry.parse(buffer);
179
180        if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
181            throw new InvalidFormatException();
182        }
183
184        // we don't do anything with this yet.
185        if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
186            == -1) {
187            throw new EOFException();
188        }
189        readIdx = active.getHeaderCount();
190    }
191
192    /**
193     * Read BITS segment.
194     */
195    private void readBITS() throws IOException {
196        final byte[] buffer = raw.readRecord();
197
198        if (!DumpArchiveUtil.verify(buffer)) {
199            throw new InvalidFormatException();
200        }
201
202        active = DumpArchiveEntry.parse(buffer);
203
204        if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
205            throw new InvalidFormatException();
206        }
207
208        // we don't do anything with this yet.
209        if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
210            == -1) {
211            throw new EOFException();
212        }
213        readIdx = active.getHeaderCount();
214    }
215
216    /**
217     * Read the next entry.
218     * @return the next entry
219     * @throws IOException on error
220     */
221    public DumpArchiveEntry getNextDumpEntry() throws IOException {
222        return getNextEntry();
223    }
224
225    @Override
226    public DumpArchiveEntry getNextEntry() throws IOException {
227        DumpArchiveEntry entry = null;
228        String path = null;
229
230        // is there anything in the queue?
231        if (!queue.isEmpty()) {
232            return queue.remove();
233        }
234
235        while (entry == null) {
236            if (hasHitEOF) {
237                return null;
238            }
239
240            // skip any remaining records in this segment for prior file.
241            // we might still have holes... easiest to do it
242            // block by block. We may want to revisit this if
243            // the unnecessary decompression time adds up.
244            while (readIdx < active.getHeaderCount()) {
245                if (!active.isSparseRecord(readIdx++)
246                    && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
247                    throw new EOFException();
248                }
249            }
250
251            readIdx = 0;
252            filepos = raw.getBytesRead();
253
254            byte[] headerBytes = raw.readRecord();
255
256            if (!DumpArchiveUtil.verify(headerBytes)) {
257                throw new InvalidFormatException();
258            }
259
260            active = DumpArchiveEntry.parse(headerBytes);
261
262            // skip any remaining segments for prior file.
263            while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
264                if (raw.skip((long) DumpArchiveConstants.TP_SIZE
265                             * (active.getHeaderCount()
266                                - active.getHeaderHoles())) == -1) {
267                    throw new EOFException();
268                }
269
270                filepos = raw.getBytesRead();
271                headerBytes = raw.readRecord();
272
273                if (!DumpArchiveUtil.verify(headerBytes)) {
274                    throw new InvalidFormatException();
275                }
276
277                active = DumpArchiveEntry.parse(headerBytes);
278            }
279
280            // check if this is an end-of-volume marker.
281            if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
282                hasHitEOF = true;
283
284                return null;
285            }
286
287            entry = active;
288
289            if (entry.isDirectory()) {
290                readDirectoryEntry(active);
291
292                // now we create an empty InputStream.
293                entryOffset = 0;
294                entrySize = 0;
295                readIdx = active.getHeaderCount();
296            } else {
297                entryOffset = 0;
298                entrySize = active.getEntrySize();
299                readIdx = 0;
300            }
301
302            recordOffset = readBuf.length;
303
304            path = getPath(entry);
305
306            if (path == null) {
307                entry = null;
308            }
309        }
310
311        entry.setName(path);
312        entry.setSimpleName(names.get(entry.getIno()).getName());
313        entry.setOffset(filepos);
314
315        return entry;
316    }
317
318    /**
319     * Read directory entry.
320     */
321    private void readDirectoryEntry(DumpArchiveEntry entry)
322        throws IOException {
323        long size = entry.getEntrySize();
324        boolean first = true;
325
326        while (first ||
327                DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {
328            // read the header that we just peeked at.
329            if (!first) {
330                raw.readRecord();
331            }
332
333            if (!names.containsKey(entry.getIno()) &&
334                    DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {
335                pending.put(entry.getIno(), entry);
336            }
337
338            final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
339
340            if (blockBuffer.length < datalen) {
341                blockBuffer = IOUtils.readRange(raw, datalen);
342                if (blockBuffer.length != datalen) {
343                    throw new EOFException();
344                }
345            } else if (raw.read(blockBuffer, 0, datalen) != datalen) {
346                throw new EOFException();
347            }
348
349            int reclen = 0;
350
351            for (int i = 0; i < datalen - 8 && i < size - 8;
352                    i += reclen) {
353                final int ino = DumpArchiveUtil.convert32(blockBuffer, i);
354                reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
355
356                final byte type = blockBuffer[i + 6];
357
358                final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]);
359
360                if (".".equals(name) || "..".equals(name)) {
361                    // do nothing...
362                    continue;
363                }
364
365                final Dirent d = new Dirent(ino, entry.getIno(), type, name);
366
367                /*
368                if ((type == 4) && names.containsKey(ino)) {
369                    System.out.println("we already have ino: " +
370                                       names.get(ino));
371                }
372                */
373
374                names.put(ino, d);
375
376                // check whether this allows us to fill anything in the pending list.
377                for (final Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) {
378                    final String path = getPath(e.getValue());
379
380                    if (path != null) {
381                        e.getValue().setName(path);
382                        e.getValue()
383                         .setSimpleName(names.get(e.getKey()).getName());
384                        queue.add(e.getValue());
385                    }
386                }
387
388                // remove anything that we found. (We can't do it earlier
389                // because of concurrent modification exceptions.)
390                for (final DumpArchiveEntry e : queue) {
391                    pending.remove(e.getIno());
392                }
393            }
394
395            final byte[] peekBytes = raw.peek();
396
397            if (!DumpArchiveUtil.verify(peekBytes)) {
398                throw new InvalidFormatException();
399            }
400
401            entry = DumpArchiveEntry.parse(peekBytes);
402            first = false;
403            size -= DumpArchiveConstants.TP_SIZE;
404        }
405    }
406
407    /**
408     * Get full path for specified archive entry, or null if there's a gap.
409     *
410     * @param entry
411     * @return  full path for specified archive entry, or null if there's a gap.
412     */
413    private String getPath(final DumpArchiveEntry entry) {
414        // build the stack of elements. It's possible that we're
415        // still missing an intermediate value and if so we
416        final Stack<String> elements = new Stack<>();
417        Dirent dirent = null;
418
419        for (int i = entry.getIno();; i = dirent.getParentIno()) {
420            if (!names.containsKey(i)) {
421                elements.clear();
422                break;
423            }
424
425            dirent = names.get(i);
426            elements.push(dirent.getName());
427
428            if (dirent.getIno() == dirent.getParentIno()) {
429                break;
430            }
431        }
432
433        // if an element is missing defer the work and read next entry.
434        if (elements.isEmpty()) {
435            pending.put(entry.getIno(), entry);
436
437            return null;
438        }
439
440        // generate full path from stack of elements.
441        final StringBuilder sb = new StringBuilder(elements.pop());
442
443        while (!elements.isEmpty()) {
444            sb.append('/');
445            sb.append(elements.pop());
446        }
447
448        return sb.toString();
449    }
450
451    /**
452     * Reads bytes from the current dump archive entry.
453     *
454     * This method is aware of the boundaries of the current
455     * entry in the archive and will deal with them as if they
456     * were this stream's start and EOF.
457     *
458     * @param buf The buffer into which to place bytes read.
459     * @param off The offset at which to place bytes read.
460     * @param len The number of bytes to read.
461     * @return The number of bytes read, or -1 at EOF.
462     * @throws IOException on error
463     */
464    @Override
465    public int read(final byte[] buf, int off, int len) throws IOException {
466        if (len == 0) {
467            return 0;
468        }
469        int totalRead = 0;
470
471        if (hasHitEOF || isClosed || entryOffset >= entrySize) {
472            return -1;
473        }
474
475        if (active == null) {
476            throw new IllegalStateException("No current dump entry");
477        }
478
479        if (len + entryOffset > entrySize) {
480            len = (int) (entrySize - entryOffset);
481        }
482
483        while (len > 0) {
484            final int sz = len > readBuf.length - recordOffset
485                ? readBuf.length - recordOffset : len;
486
487            // copy any data we have
488            if (recordOffset + sz <= readBuf.length) {
489                System.arraycopy(readBuf, recordOffset, buf, off, sz);
490                totalRead += sz;
491                recordOffset += sz;
492                len -= sz;
493                off += sz;
494            }
495
496            // load next block if necessary.
497            if (len > 0) {
498                if (readIdx >= 512) {
499                    final byte[] headerBytes = raw.readRecord();
500
501                    if (!DumpArchiveUtil.verify(headerBytes)) {
502                        throw new InvalidFormatException();
503                    }
504
505                    active = DumpArchiveEntry.parse(headerBytes);
506                    readIdx = 0;
507                }
508
509                if (!active.isSparseRecord(readIdx++)) {
510                    final int r = raw.read(readBuf, 0, readBuf.length);
511                    if (r != readBuf.length) {
512                        throw new EOFException();
513                    }
514                } else {
515                    Arrays.fill(readBuf, (byte) 0);
516                }
517
518                recordOffset = 0;
519            }
520        }
521
522        entryOffset += totalRead;
523
524        return totalRead;
525    }
526
527    /**
528     * Closes the stream for this entry.
529     */
530    @Override
531    public void close() throws IOException {
532        if (!isClosed) {
533            isClosed = true;
534            raw.close();
535        }
536    }
537
538    /**
539     * Look at the first few bytes of the file to decide if it's a dump
540     * archive. With 32 bytes we can look at the magic value, with a full
541     * 1k we can verify the checksum.
542     * @param buffer data to match
543     * @param length length of data
544     * @return whether the buffer seems to contain dump data
545     */
546    public static boolean matches(final byte[] buffer, final int length) {
547        // do we have enough of the header?
548        if (length < 32) {
549            return false;
550        }
551
552        // this is the best test
553        if (length >= DumpArchiveConstants.TP_SIZE) {
554            return DumpArchiveUtil.verify(buffer);
555        }
556
557        // this will work in a pinch.
558        return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer,
559            24);
560    }
561
562}