fkjellberg commented on code in PR #690: URL: https://github.com/apache/commons-compress/pull/690#discussion_r2296173913
########## src/main/java/org/apache/commons/compress/archivers/lha/LhaArchiveInputStream.java: ########## @@ -0,0 +1,701 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.archivers.lha; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipUtil; +import org.apache.commons.compress.compressors.lha.Lh4CompressorInputStream; +import org.apache.commons.compress.compressors.lha.Lh5CompressorInputStream; +import org.apache.commons.compress.compressors.lha.Lh6CompressorInputStream; +import org.apache.commons.compress.compressors.lha.Lh7CompressorInputStream; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.input.BoundedInputStream; +import org.apache.commons.io.input.ChecksumInputStream; + +/** + * Implements the LHA archive format as an InputStream. + * + * This implementation is based on the documentation that can be found at + * http://dangan.g.dgdg.jp/en/Content/Program/Java/jLHA/Notes/Notes.html + * + * @NotThreadSafe + * @since 1.29 + */ +public class LhaArchiveInputStream extends ArchiveInputStream<LhaArchiveEntry> { + // Fields that are the same across all header levels + private static final int HEADER_GENERIC_MINIMUM_HEADER_LENGTH = 22; + private static final int HEADER_GENERIC_OFFSET_COMPRESSION_METHOD = 2; + private static final int HEADER_GENERIC_OFFSET_HEADER_LEVEL = 20; + + // Header Level 0 + private static final int HEADER_LEVEL_0_OFFSET_HEADER_SIZE = 0; + private static final int HEADER_LEVEL_0_OFFSET_HEADER_CHECKSUM = 1; + private static final int HEADER_LEVEL_0_OFFSET_COMPRESSED_SIZE = 7; + private static final int HEADER_LEVEL_0_OFFSET_ORIGINAL_SIZE = 11; + private static final int HEADER_LEVEL_0_OFFSET_LAST_MODIFIED_DATE_TIME = 15; + private static final int HEADER_LEVEL_0_OFFSET_FILENAME_LENGTH = 21; + private static final int HEADER_LEVEL_0_OFFSET_FILENAME = 22; + + // Header Level 1 + private static final int HEADER_LEVEL_1_OFFSET_BASE_HEADER_SIZE = 0; + private static final int HEADER_LEVEL_1_OFFSET_BASE_HEADER_CHECKSUM = 1; + private static final int HEADER_LEVEL_1_OFFSET_SKIP_SIZE = 7; + private static final int HEADER_LEVEL_1_OFFSET_ORIGINAL_SIZE = 11; + private static final int HEADER_LEVEL_1_OFFSET_LAST_MODIFIED_DATE_TIME = 15; + private static final int HEADER_LEVEL_1_OFFSET_FILENAME_LENGTH = 21; + private static final int HEADER_LEVEL_1_OFFSET_FILENAME = 22; + + // Header Level 2 + private static final int HEADER_LEVEL_2_OFFSET_HEADER_SIZE = 0; + private static final int HEADER_LEVEL_2_OFFSET_COMPRESSED_SIZE = 7; + private static final int HEADER_LEVEL_2_OFFSET_ORIGINAL_SIZE = 11; + private static final int HEADER_LEVEL_2_OFFSET_LAST_MODIFIED_DATE_TIME = 15; + private static final int HEADER_LEVEL_2_OFFSET_CRC = 21; + private static final int HEADER_LEVEL_2_OFFSET_OS_ID = 23; + private static final int HEADER_LEVEL_2_OFFSET_FIRST_EXTENDED_HEADER_SIZE = 24; + + // Extended header types + private static final int EXTENDED_HEADER_TYPE_COMMON = 0x00; + private static final int EXTENDED_HEADER_TYPE_FILENAME = 0x01; + private static final int EXTENDED_HEADER_TYPE_DIRECTORY_NAME = 0x02; + + private static final int EXTENDED_HEADER_TYPE_MSDOS_FILE_ATTRIBUTES = 0x40; + + private static final int EXTENDED_HEADER_TYPE_UNIX_PERMISSION = 0x50; + private static final int EXTENDED_HEADER_TYPE_UNIX_UID_GID = 0x51; + private static final int EXTENDED_HEADER_TYPE_UNIX_TIMESTAMP = 0x54; + + // Compression methods + private static final String COMPRESSION_METHOD_DIRECTORY = "-lhd-"; // Directory entry + private static final String COMPRESSION_METHOD_LH0 = "-lh0-"; + private static final String COMPRESSION_METHOD_LH4 = "-lh4-"; + private static final String COMPRESSION_METHOD_LH5 = "-lh5-"; + private static final String COMPRESSION_METHOD_LH6 = "-lh6-"; + private static final String COMPRESSION_METHOD_LH7 = "-lh7-"; + private static final String COMPRESSION_METHOD_LZ4 = "-lz4-"; + + private final char fileSeparatorChar; + private LhaArchiveEntry currentEntry; + private InputStream currentCompressedStream; + private InputStream currentDecompressedStream; + + /** + * Constructs the LhaArchiveInputStream, taking ownership of the inputStream that is passed in. + * + * @param inputStream the underlying stream, whose ownership is taken + */ + public LhaArchiveInputStream(final InputStream inputStream) { + this(inputStream, null); + } + + /** + * Constructs the LhaArchiveInputStream, taking ownership of the inputStream that is passed in. + * + * @param inputStream the underlying stream, whose ownership is taken + * @param charsetName the charset used for file names in the archive. May be {@code null} to use US-ASCII as default. + */ + public LhaArchiveInputStream(final InputStream inputStream, final String charsetName) { + this(inputStream, charsetName, File.separatorChar); + } + + /** + * Constructs the LhaArchiveInputStream, taking ownership of the inputStream that is passed in. + * + * @param inputStream the underlying stream, whose ownership is taken + * @param charsetName the charset used for file names in the archive. May be {@code null} to use US-ASCII as default. + * @param fileSeparatorChar the character used to separate file path elements + */ + public LhaArchiveInputStream(final InputStream inputStream, final String charsetName, final char fileSeparatorChar) { + super(inputStream, charsetName == null ? StandardCharsets.US_ASCII.name() : charsetName); + this.fileSeparatorChar = fileSeparatorChar; + } + + @Override + public boolean canReadEntryData(final ArchiveEntry archiveEntry) { + return currentDecompressedStream != null; + } + + @Override + public int read(final byte[] buffer, final int offset, final int length) throws IOException { + if (currentEntry == null) { + throw new IllegalStateException("No current entry"); + } + + if (currentDecompressedStream == null) { + throw new ArchiveException("Unsupported compression method: %s", currentEntry.getCompressionMethod()); + } + + return currentDecompressedStream.read(buffer, offset, length); + } + + /** + * Checks if the signature matches what is expected for an LHA file. There is no specific + * signature for LHA files, so this method checks if the header level and the compression + * method are valid for an LHA archive. The signature must be at least the minimum header + * length of 22 bytes for this check to work properly. + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true, if this stream is an LHA archive stream, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (signature.length < HEADER_GENERIC_MINIMUM_HEADER_LENGTH || length < HEADER_GENERIC_MINIMUM_HEADER_LENGTH) { + return false; + } + + final ByteBuffer header = ByteBuffer.wrap(signature).order(ByteOrder.LITTLE_ENDIAN); + + // Determine header level. Expected value is in the range 0-3. + final byte headerLevel = header.get(HEADER_GENERIC_OFFSET_HEADER_LEVEL); + if (headerLevel < 0 || headerLevel > 3) { + return false; + } + + // Check if the compression method is valid for LHA archives + try { + getCompressionMethod(header); + } catch (ArchiveException e) { + return false; + } + + return true; + } + + @Override + public LhaArchiveEntry getNextEntry() throws IOException { + if (this.currentCompressedStream != null) { + // Consume the entire compressed stream to end up at the next entry + IOUtils.consume(this.currentCompressedStream); + + this.currentCompressedStream = null; + this.currentDecompressedStream = null; + } + + this.currentEntry = readHeader(); + + return this.currentEntry; + } + + /** + * Read the next LHA header from the input stream. + * + * @return the next header entry, or null if there are no more entries + * @throws IOException + */ + protected LhaArchiveEntry readHeader() throws IOException { + // Header level is not known yet. Read the minimum length header. + final byte[] buffer = new byte[HEADER_GENERIC_MINIMUM_HEADER_LENGTH]; + final int len = in.read(buffer); Review Comment: Yes, this is actually what I want! I ended up using the IOUtils.read method directly from commons-io instead of using the IOUtils in commons-compress. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@commons.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org