001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.security.AccessController; 026import java.security.PrivilegedAction; 027import java.util.ArrayList; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.Locale; 031import java.util.Set; 032import java.util.SortedMap; 033import java.util.TreeMap; 034 035import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 036import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 037import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 038import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 039import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 040import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 041import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 042import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 043import org.apache.commons.compress.archivers.sevenz.SevenZFile; 044import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 045import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 046import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 047import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 048import org.apache.commons.compress.utils.IOUtils; 049import org.apache.commons.compress.utils.Lists; 050import org.apache.commons.compress.utils.ServiceLoaderIterator; 051import org.apache.commons.compress.utils.Sets; 052 053/** 054 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 055 * the InputStream. In order to add other implementations, you should extend 056 * ArchiveStreamFactory and override the appropriate methods (and call their 057 * implementation from super of course). 058 * 059 * Compressing a ZIP-File: 060 * 061 * <pre> 062 * final OutputStream out = Files.newOutputStream(output.toPath()); 063 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 064 * 065 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 066 * IOUtils.copy(Files.newInputStream(file1.toPath()), os); 067 * os.closeArchiveEntry(); 068 * 069 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 070 * IOUtils.copy(Files.newInputStream(file2.toPath()), os); 071 * os.closeArchiveEntry(); 072 * os.close(); 073 * </pre> 074 * 075 * Decompressing a ZIP-File: 076 * 077 * <pre> 078 * final InputStream is = Files.newInputStream(input.toPath()); 079 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 080 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 081 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); 082 * IOUtils.copy(in, out); 083 * out.close(); 084 * in.close(); 085 * </pre> 086 * @Immutable provided that the deprecated method setEntryEncoding is not used. 087 * @ThreadSafe even if the deprecated method setEntryEncoding is used 088 */ 089public class ArchiveStreamFactory implements ArchiveStreamProvider { 090 091 private static final int TAR_HEADER_SIZE = 512; 092 093 private static final int DUMP_SIGNATURE_SIZE = 32; 094 095 private static final int SIGNATURE_SIZE = 12; 096 097 /** 098 * The singleton instance using the platform default encoding. 099 * @since 1.21 100 */ 101 public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory(); 102 103 /** 104 * Constant (value {@value}) used to identify the AR archive format. 105 * @since 1.1 106 */ 107 public static final String AR = "ar"; 108 109 /** 110 * Constant (value {@value}) used to identify the ARJ archive format. 111 * Not supported as an output stream type. 112 * @since 1.6 113 */ 114 public static final String ARJ = "arj"; 115 116 /** 117 * Constant (value {@value}) used to identify the CPIO archive format. 118 * @since 1.1 119 */ 120 public static final String CPIO = "cpio"; 121 122 /** 123 * Constant (value {@value}) used to identify the Unix DUMP archive format. 124 * Not supported as an output stream type. 125 * @since 1.3 126 */ 127 public static final String DUMP = "dump"; 128 129 /** 130 * Constant (value {@value}) used to identify the JAR archive format. 131 * @since 1.1 132 */ 133 public static final String JAR = "jar"; 134 135 /** 136 * Constant used to identify the TAR archive format. 137 * @since 1.1 138 */ 139 public static final String TAR = "tar"; 140 141 /** 142 * Constant (value {@value}) used to identify the ZIP archive format. 143 * @since 1.1 144 */ 145 public static final String ZIP = "zip"; 146 147 /** 148 * Constant (value {@value}) used to identify the 7z archive format. 149 * @since 1.8 150 */ 151 public static final String SEVEN_Z = "7z"; 152 153 /** 154 * Entry encoding, null for the platform default. 155 */ 156 private final String encoding; 157 158 /** 159 * Entry encoding, null for the default. 160 */ 161 private volatile String entryEncoding; 162 163 private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; 164 165 private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; 166 167 private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() { 168 return Lists.newArrayList(serviceLoaderIterator()); 169 } 170 171 static void putAll(final Set<String> names, final ArchiveStreamProvider provider, 172 final TreeMap<String, ArchiveStreamProvider> map) { 173 for (final String name : names) { 174 map.put(toKey(name), provider); 175 } 176 } 177 178 private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() { 179 return new ServiceLoaderIterator<>(ArchiveStreamProvider.class); 180 } 181 182 private static String toKey(final String name) { 183 return name.toUpperCase(Locale.ROOT); 184 } 185 186 /** 187 * Constructs a new sorted map from input stream provider names to provider 188 * objects. 189 * 190 * <p> 191 * The map returned by this method will have one entry for each provider for 192 * which support is available in the current Java virtual machine. If two or 193 * more supported provider have the same name then the resulting map will 194 * contain just one of them; which one it will contain is not specified. 195 * </p> 196 * 197 * <p> 198 * The invocation of this method, and the subsequent use of the resulting 199 * map, may cause time-consuming disk or network I/O operations to occur. 200 * This method is provided for applications that need to enumerate all of 201 * the available providers, for example to allow user provider selection. 202 * </p> 203 * 204 * <p> 205 * This method may return different results at different times if new 206 * providers are dynamically made available to the current Java virtual 207 * machine. 208 * </p> 209 * 210 * @return An immutable, map from names to provider objects 211 * @since 1.13 212 */ 213 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { 214 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 215 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 216 putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map); 217 for (final ArchiveStreamProvider provider : findArchiveStreamProviders()) { 218 putAll(provider.getInputStreamArchiveNames(), provider, map); 219 } 220 return map; 221 }); 222 } 223 224 /** 225 * Constructs a new sorted map from output stream provider names to provider 226 * objects. 227 * 228 * <p> 229 * The map returned by this method will have one entry for each provider for 230 * which support is available in the current Java virtual machine. If two or 231 * more supported provider have the same name then the resulting map will 232 * contain just one of them; which one it will contain is not specified. 233 * </p> 234 * 235 * <p> 236 * The invocation of this method, and the subsequent use of the resulting 237 * map, may cause time-consuming disk or network I/O operations to occur. 238 * This method is provided for applications that need to enumerate all of 239 * the available providers, for example to allow user provider selection. 240 * </p> 241 * 242 * <p> 243 * This method may return different results at different times if new 244 * providers are dynamically made available to the current Java virtual 245 * machine. 246 * </p> 247 * 248 * @return An immutable, map from names to provider objects 249 * @since 1.13 250 */ 251 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { 252 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 253 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 254 putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map); 255 for (final ArchiveStreamProvider provider : findArchiveStreamProviders()) { 256 putAll(provider.getOutputStreamArchiveNames(), provider, map); 257 } 258 return map; 259 }); 260 } 261 262 /** 263 * Create an instance using the platform default encoding. 264 */ 265 public ArchiveStreamFactory() { 266 this(null); 267 } 268 269 /** 270 * Create an instance using the specified encoding. 271 * 272 * @param encoding the encoding to be used. 273 * 274 * @since 1.10 275 */ 276 public ArchiveStreamFactory(final String encoding) { 277 this.encoding = encoding; 278 // Also set the original field so can continue to use it. 279 this.entryEncoding = encoding; 280 } 281 282 /** 283 * Returns the encoding to use for arj, jar, zip, dump, cpio and tar 284 * files, or null for the archiver default. 285 * 286 * @return entry encoding, or null for the archiver default 287 * @since 1.5 288 */ 289 public String getEntryEncoding() { 290 return entryEncoding; 291 } 292 293 /** 294 * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default. 295 * 296 * @param entryEncoding the entry encoding, null uses the archiver default. 297 * @since 1.5 298 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 299 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 300 * was used to specify the factory encoding. 301 */ 302 @Deprecated 303 public void setEntryEncoding(final String entryEncoding) { 304 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 305 if (encoding != null) { 306 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 307 } 308 this.entryEncoding = entryEncoding; 309 } 310 311 /** 312 * Creates an archive input stream from an archiver name and an input stream. 313 * 314 * @param archiverName the archive name, 315 * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} 316 * @param in the input stream 317 * @return the archive input stream 318 * @throws ArchiveException if the archiver name is not known 319 * @throws StreamingNotSupportedException if the format cannot be 320 * read from a stream 321 * @throws IllegalArgumentException if the archiver name or stream is null 322 */ 323 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) 324 throws ArchiveException { 325 return createArchiveInputStream(archiverName, in, entryEncoding); 326 } 327 328 @Override 329 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in, 330 final String actualEncoding) throws ArchiveException { 331 332 if (archiverName == null) { 333 throw new IllegalArgumentException("Archivername must not be null."); 334 } 335 336 if (in == null) { 337 throw new IllegalArgumentException("InputStream must not be null."); 338 } 339 340 if (AR.equalsIgnoreCase(archiverName)) { 341 return new ArArchiveInputStream(in); 342 } 343 if (ARJ.equalsIgnoreCase(archiverName)) { 344 if (actualEncoding != null) { 345 return new ArjArchiveInputStream(in, actualEncoding); 346 } 347 return new ArjArchiveInputStream(in); 348 } 349 if (ZIP.equalsIgnoreCase(archiverName)) { 350 if (actualEncoding != null) { 351 return new ZipArchiveInputStream(in, actualEncoding); 352 } 353 return new ZipArchiveInputStream(in); 354 } 355 if (TAR.equalsIgnoreCase(archiverName)) { 356 if (actualEncoding != null) { 357 return new TarArchiveInputStream(in, actualEncoding); 358 } 359 return new TarArchiveInputStream(in); 360 } 361 if (JAR.equalsIgnoreCase(archiverName)) { 362 if (actualEncoding != null) { 363 return new JarArchiveInputStream(in, actualEncoding); 364 } 365 return new JarArchiveInputStream(in); 366 } 367 if (CPIO.equalsIgnoreCase(archiverName)) { 368 if (actualEncoding != null) { 369 return new CpioArchiveInputStream(in, actualEncoding); 370 } 371 return new CpioArchiveInputStream(in); 372 } 373 if (DUMP.equalsIgnoreCase(archiverName)) { 374 if (actualEncoding != null) { 375 return new DumpArchiveInputStream(in, actualEncoding); 376 } 377 return new DumpArchiveInputStream(in); 378 } 379 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 380 throw new StreamingNotSupportedException(SEVEN_Z); 381 } 382 383 final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); 384 if (archiveStreamProvider != null) { 385 return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); 386 } 387 388 throw new ArchiveException("Archiver: " + archiverName + " not found."); 389 } 390 391 /** 392 * Creates an archive output stream from an archiver name and an output stream. 393 * 394 * @param archiverName the archive name, 395 * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 396 * @param out the output stream 397 * @return the archive output stream 398 * @throws ArchiveException if the archiver name is not known 399 * @throws StreamingNotSupportedException if the format cannot be 400 * written to a stream 401 * @throws IllegalArgumentException if the archiver name or stream is null 402 */ 403 public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out) 404 throws ArchiveException { 405 return createArchiveOutputStream(archiverName, out, entryEncoding); 406 } 407 408 @Override 409 public ArchiveOutputStream createArchiveOutputStream( 410 final String archiverName, final OutputStream out, final String actualEncoding) 411 throws ArchiveException { 412 if (archiverName == null) { 413 throw new IllegalArgumentException("Archivername must not be null."); 414 } 415 if (out == null) { 416 throw new IllegalArgumentException("OutputStream must not be null."); 417 } 418 419 if (AR.equalsIgnoreCase(archiverName)) { 420 return new ArArchiveOutputStream(out); 421 } 422 if (ZIP.equalsIgnoreCase(archiverName)) { 423 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 424 if (actualEncoding != null) { 425 zip.setEncoding(actualEncoding); 426 } 427 return zip; 428 } 429 if (TAR.equalsIgnoreCase(archiverName)) { 430 if (actualEncoding != null) { 431 return new TarArchiveOutputStream(out, actualEncoding); 432 } 433 return new TarArchiveOutputStream(out); 434 } 435 if (JAR.equalsIgnoreCase(archiverName)) { 436 if (actualEncoding != null) { 437 return new JarArchiveOutputStream(out, actualEncoding); 438 } 439 return new JarArchiveOutputStream(out); 440 } 441 if (CPIO.equalsIgnoreCase(archiverName)) { 442 if (actualEncoding != null) { 443 return new CpioArchiveOutputStream(out, actualEncoding); 444 } 445 return new CpioArchiveOutputStream(out); 446 } 447 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 448 throw new StreamingNotSupportedException(SEVEN_Z); 449 } 450 451 final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); 452 if (archiveStreamProvider != null) { 453 return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); 454 } 455 456 throw new ArchiveException("Archiver: " + archiverName + " not found."); 457 } 458 459 /** 460 * Create an archive input stream from an input stream, autodetecting 461 * the archive type from the first few bytes of the stream. The InputStream 462 * must support marks, like BufferedInputStream. 463 * 464 * @param in the input stream 465 * @return the archive input stream 466 * @throws ArchiveException if the archiver name is not known 467 * @throws StreamingNotSupportedException if the format cannot be 468 * read from a stream 469 * @throws IllegalArgumentException if the stream is null or does not support mark 470 */ 471 public ArchiveInputStream createArchiveInputStream(final InputStream in) 472 throws ArchiveException { 473 return createArchiveInputStream(detect(in), in); 474 } 475 476 /** 477 * Try to determine the type of Archiver 478 * @param in input stream 479 * @return type of archiver if found 480 * @throws ArchiveException if an archiver cannot be detected in the stream 481 * @since 1.14 482 */ 483 public static String detect(final InputStream in) throws ArchiveException { 484 if (in == null) { 485 throw new IllegalArgumentException("Stream must not be null."); 486 } 487 488 if (!in.markSupported()) { 489 throw new IllegalArgumentException("Mark is not supported."); 490 } 491 492 final byte[] signature = new byte[SIGNATURE_SIZE]; 493 in.mark(signature.length); 494 int signatureLength = -1; 495 try { 496 signatureLength = IOUtils.readFully(in, signature); 497 in.reset(); 498 } catch (final IOException e) { 499 throw new ArchiveException("IOException while reading signature.", e); 500 } 501 502 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 503 return ZIP; 504 } 505 if (JarArchiveInputStream.matches(signature, signatureLength)) { 506 return JAR; 507 } 508 if (ArArchiveInputStream.matches(signature, signatureLength)) { 509 return AR; 510 } 511 if (CpioArchiveInputStream.matches(signature, signatureLength)) { 512 return CPIO; 513 } 514 if (ArjArchiveInputStream.matches(signature, signatureLength)) { 515 return ARJ; 516 } 517 if (SevenZFile.matches(signature, signatureLength)) { 518 return SEVEN_Z; 519 } 520 521 // Dump needs a bigger buffer to check the signature; 522 final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; 523 in.mark(dumpsig.length); 524 try { 525 signatureLength = IOUtils.readFully(in, dumpsig); 526 in.reset(); 527 } catch (final IOException e) { 528 throw new ArchiveException("IOException while reading dump signature", e); 529 } 530 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 531 return DUMP; 532 } 533 534 // Tar needs an even bigger buffer to check the signature; read the first block 535 final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; 536 in.mark(tarHeader.length); 537 try { 538 signatureLength = IOUtils.readFully(in, tarHeader); 539 in.reset(); 540 } catch (final IOException e) { 541 throw new ArchiveException("IOException while reading tar signature", e); 542 } 543 if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { 544 return TAR; 545 } 546 547 // COMPRESS-117 - improve auto-recognition 548 if (signatureLength >= TAR_HEADER_SIZE) { 549 TarArchiveInputStream tais = null; 550 try { 551 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader)); 552 // COMPRESS-191 - verify the header checksum 553 if (tais.getNextTarEntry().isCheckSumOK()) { 554 return TAR; 555 } 556 } catch (final Exception e) { // NOPMD NOSONAR 557 // can generate IllegalArgumentException as well 558 // as IOException 559 // autodetection, simply not a TAR 560 // ignored 561 } finally { 562 IOUtils.closeQuietly(tais); 563 } 564 } 565 throw new ArchiveException("No Archiver found for the stream signature"); 566 } 567 568 public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { 569 if (archiveInputStreamProviders == null) { 570 archiveInputStreamProviders = Collections 571 .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); 572 } 573 return archiveInputStreamProviders; 574 } 575 576 public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { 577 if (archiveOutputStreamProviders == null) { 578 archiveOutputStreamProviders = Collections 579 .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); 580 } 581 return archiveOutputStreamProviders; 582 } 583 584 @Override 585 public Set<String> getInputStreamArchiveNames() { 586 return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); 587 } 588 589 @Override 590 public Set<String> getOutputStreamArchiveNames() { 591 return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); 592 } 593 594}