Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,492 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.accessor; + +import java.math.BigInteger; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.assertion.ChmAssert; +import org.apache.tika.parser.chm.core.ChmConstants; +import org.apache.tika.parser.chm.exception.ChmParsingException; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * The Header 0000: char[4] 'ITSF' 0004: DWORD 3 (Version number) 0008: DWORD + * Total header length, including header section table and following data. 000C: + * DWORD 1 (unknown) 0010: DWORD a timestamp 0014: DWORD Windows Language ID + * 0018: GUID {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} 0028: GUID + * {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} Note: a GUID is $10 bytes, arranged + * as 1 DWORD, 2 WORDs, and 8 BYTEs. 0000: QWORD Offset of section from + * beginning of file 0008: QWORD Length of section Following the header section + * table is 8 bytes of additional header data. In Version 2 files, this data is + * not there and the content section starts immediately after the directory. + * + * {@link http + * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original + * /?show-translation-form=1} + * + */ +/* structure of ITSF headers */ +public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> { + private static final long serialVersionUID = 2215291838533213826L; + private byte[] signature; + private int version; /* 4 */ + private int header_len; /* 8 */ + private int unknown_000c; /* c */ + private long last_modified; /* 10 */ + private long lang_id; /* 14 */ + private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */ + private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */ + private long unknown_offset; /* 38 */ + private long unknown_len; /* 40 */ + private long dir_offset; /* 48 */ + private long dir_len; /* 50 */ + private long data_offset; /* 58 (Not present before V3) */ + + /* local usage */ + private int dataRemained; + private int currentPlace = 0; + + public ChmItsfHeader() { + signature = ChmConstants.ITSF.getBytes(UTF_8); /* 0 (ITSF) */ + } + + /** + * Prints the values of ChmfHeader + */ + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(new String(getSignature(), UTF_8) + " "); + sb.append(getVersion() + " "); + sb.append(getHeaderLen() + " "); + sb.append(getUnknown_000c() + " "); + sb.append(getLastModified() + " "); + sb.append(getLangId() + " "); + sb.append(getDir_uuid() + " "); + sb.append(getStream_uuid() + " "); + sb.append(getUnknownOffset() + " "); + sb.append(getUnknownLen() + " "); + sb.append(getDirOffset() + " "); + sb.append(getDirLen() + " "); + sb.append(getDataOffset() + " "); + return sb.toString(); + } + + /** + * Returns a signature of itsf header + * + * @return itsf header + */ + public byte[] getSignature() { + return signature; + } + + /** + * Sets itsf header signature + * + * @param signature + */ + protected void setSignature(byte[] signature) { + this.signature = signature; + } + + /** + * Returns itsf header version + * + * @return itsf version + */ + public int getVersion() { + return version; + } + + /** + * Sets itsf version + * + * @param version + */ + protected void setVersion(int version) { + this.version = version; + } + + /** + * Returns itsf header length + * + * @return length + */ + public int getHeaderLen() { + return header_len; + } + + /** + * Sets itsf header length + * + * @param header_len + */ + protected void setHeaderLen(int header_len) { + this.header_len = header_len; + } + + /** + * Returns unknown_00c value + * + * @return unknown_00c + */ + public int getUnknown_000c() { + return unknown_000c; + } + + /** + * Sets unknown_00c + * + * @param unknown_000c + */ + protected void setUnknown_000c(int unknown_000c) { + this.unknown_000c = unknown_000c; + } + + /** + * Returns last modified date of the chm file + * + * @return last modified date as long + */ + public long getLastModified() { + return last_modified; + } + + /** + * Sets last modified date of the chm file + * + * @param last_modified + */ + protected void setLastModified(long last_modified) { + this.last_modified = last_modified; + } + + /** + * Returns language ID + * + * @return language_id + */ + public long getLangId() { + return lang_id; + } + + /** + * Sets language_id + * + * @param lang_id + */ + protected void setLangId(long lang_id) { + this.lang_id = lang_id; + } + + /** + * Returns directory uuid + * + * @return dir_uuid + */ + public byte[] getDir_uuid() { + return dir_uuid; + } + + /** + * Sets directory uuid + * + * @param dir_uuid + */ + protected void setDir_uuid(byte[] dir_uuid) { + this.dir_uuid = dir_uuid; + } + + /** + * Returns stream uuid + * + * @return stream_uuid + */ + public byte[] getStream_uuid() { + return stream_uuid; + } + + /** + * Sets stream uuid + * + * @param stream_uuid + */ + protected void setStream_uuid(byte[] stream_uuid) { + this.stream_uuid = stream_uuid; + } + + /** + * Returns unknown offset + * + * @return unknown_offset + */ + public long getUnknownOffset() { + return unknown_offset; + } + + /** + * Sets unknown offset + * + * @param unknown_offset + */ + protected void setUnknownOffset(long unknown_offset) { + this.unknown_offset = unknown_offset; + } + + /** + * Returns unknown length + * + * @return unknown_length + */ + public long getUnknownLen() { + return unknown_len; + } + + /** + * Sets unknown length + * + * @param unknown_len + */ + protected void setUnknownLen(long unknown_len) { + this.unknown_len = unknown_len; + } + + /** + * Returns directory offset + * + * @return directory_offset + */ + public long getDirOffset() { + return dir_offset; + } + + /** + * Sets directory offset + * + * @param dir_offset + */ + protected void setDirOffset(long dir_offset) { + this.dir_offset = dir_offset; + } + + /** + * Returns directory length + * + * @return directory_offset + */ + public long getDirLen() { + return dir_len; + } + + /** + * Sets directory length + * + * @param dir_len + */ + protected void setDirLen(long dir_len) { + this.dir_len = dir_len; + } + + /** + * Returns data offset + * + * @return data_offset + */ + public long getDataOffset() { + return data_offset; + } + + /** + * Sets data offset + * + * @param data_offset + */ + protected void setDataOffset(long data_offset) { + this.data_offset = data_offset; + } + + /** + * Copies 4 first bytes of the byte[] + * + * @param data + * @param chmItsfHeader + * @param count + * @throws TikaException + */ + private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader, + int count) throws TikaException { + ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count); + System.arraycopy(data, 0, chmItsfHeader.signature, 0, count); + this.setCurrentPlace(this.getCurrentPlace() + count); + this.setDataRemained(this.getDataRemained() - count); + } + + /** + * Copies X bytes of source byte[] to the dest byte[] + * + * @param data + * @param dest + * @param count + * @return + */ + private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) { + System.arraycopy(data, this.getCurrentPlace(), dest, 0, count); + this.setCurrentPlace(this.getCurrentPlace() + count); + this.setDataRemained(this.getDataRemained() - count); + return dest; + } + + /** + * Takes 8 bytes and reverses them + * + * @param data + * @param dest + * @return + * @throws TikaException + */ + private long unmarshalUint64(byte[] data, long dest) throws TikaException{ + byte[] temp = new byte[8]; + int i, j; + + if (8 > this.getDataRemained()) + throw new TikaException("8 > this.getDataRemained()"); + + for (i = 8, j = 7; i > 0; i--) { + temp[j--] = data[this.getCurrentPlace()]; + this.setCurrentPlace(this.getCurrentPlace() + 1); + } + + dest = new BigInteger(temp).longValue(); + this.setDataRemained(this.getDataRemained() - 8); + return dest; + } + + private int unmarshalInt32(byte[] data, int dest) throws TikaException{ + ChmAssert.assertByteArrayNotNull(data); + + if (4 > this.getDataRemained()) + throw new TikaException("4 > dataLenght"); + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; + + this.setCurrentPlace(this.getCurrentPlace() + 4); + this.setDataRemained(this.getDataRemained() - 4); + return dest; + } + + private long unmarshalUInt32(byte[] data, long dest) throws TikaException{ + ChmAssert.assertByteArrayNotNull(data); + if (4 > getDataRemained()) + throw new TikaException("4 > dataLenght"); + dest = data[this.getCurrentPlace()] + | data[this.getCurrentPlace() + 1] << 8 + | data[this.getCurrentPlace() + 2] << 16 + | data[this.getCurrentPlace() + 3] << 24; + + setDataRemained(this.getDataRemained() - 4); + this.setCurrentPlace(this.getCurrentPlace() + 4); + return dest; + } + + public static void main(String[] args) { + } + + /** + * Sets data remained to be processed + * + * @param dataRemained + */ + private void setDataRemained(int dataRemained) { + this.dataRemained = dataRemained; + } + + /** + * Returns data remained + * + * @return data_remainned + */ + private int getDataRemained() { + return dataRemained; + } + + /** + * Sets current place in the byte[] + * + * @param currentPlace + */ + private void setCurrentPlace(int currentPlace) { + this.currentPlace = currentPlace; + } + + /** + * Returns current place in the byte[] + * + * @return current place + */ + private int getCurrentPlace() { + return currentPlace; + } + + // @Override + public void parse(byte[] data, ChmItsfHeader chmItsfHeader) throws TikaException { + if (data.length < ChmConstants.CHM_ITSF_V2_LEN + || data.length > ChmConstants.CHM_ITSF_V3_LEN) + throw new TikaException("we only know how to deal with the 0x58 and 0x60 byte structures"); + + chmItsfHeader.setDataRemained(data.length); + chmItsfHeader.unmarshalCharArray(data, chmItsfHeader, ChmConstants.CHM_SIGNATURE_LEN); + chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getVersion())); + chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getHeaderLen())); + chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data, chmItsfHeader.getUnknown_000c())); + chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLastModified())); + chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data, chmItsfHeader.getLangId())); + chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getDir_uuid(), 16)); + chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data, chmItsfHeader.getStream_uuid(), 16)); + chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownOffset())); + chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getUnknownLen())); + chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirOffset())); + chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data, chmItsfHeader.getDirLen())); + if (!new String(chmItsfHeader.getSignature(), UTF_8).equals(ChmConstants.ITSF)) + throw new TikaException("seems not valid file"); + if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) { + if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN) + throw new TikaException("something wrong with header"); + } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) { + if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN) + throw new TikaException("unknown v3 header lenght"); + } else + throw new ChmParsingException("unsupported chm format"); + + /* + * now, if we have a V3 structure, unmarshal the rest, otherwise, + * compute it + */ + if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) { + if (chmItsfHeader.getDataRemained() >= 0) + chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + + chmItsfHeader.getDirLen()); + else + throw new TikaException("cannot set data offset, no data remained"); + } else + chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset() + + chmItsfHeader.getDirLen()); + } +}
Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,548 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.accessor; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.assertion.ChmAssert; +import org.apache.tika.parser.chm.core.ChmCommons; +import org.apache.tika.parser.chm.core.ChmConstants; +import org.apache.tika.parser.chm.exception.ChmParsingException; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Directory header The directory starts with a header; its format is as + * follows: 0000: char[4] 'ITSP' 0004: DWORD Version number 1 0008: DWORD Length + * of the directory header 000C: DWORD $0a (unknown) 0010: DWORD $1000 Directory + * chunk size 0014: DWORD "Density" of quickref section, usually 2 0018: DWORD + * Depth of the index tree - 1 there is no index, 2 if there is one level of + * PMGI chunks 001C: DWORD Chunk number of root index chunk, -1 if there is none + * (though at least one file has 0 despite there being no index chunk, probably + * a bug) 0020: DWORD Chunk number of first PMGL (listing) chunk 0024: DWORD + * Chunk number of last PMGL (listing) chunk 0028: DWORD -1 (unknown) 002C: + * DWORD Number of directory chunks (total) 0030: DWORD Windows language ID + * 0034: GUID {5D02926A-212E-11D0-9DF9-00A0C922E6EC} 0044: DWORD $54 (This is + * the length again) 0048: DWORD -1 (unknown) 004C: DWORD -1 (unknown) 0050: + * DWORD -1 (unknown) + * + * {@link http + * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original + * /?show-translation-form=1} + * + */ +public class ChmItspHeader implements ChmAccessor<ChmItspHeader> { + // TODO: refactor all unmarshals + private static final long serialVersionUID = 1962394421998181341L; + private byte[] signature; + private int version; /* 4 */ + private int header_len; /* 8 */ + private int unknown_000c; /* c */ + private long block_len; /* 10 */ + private int blockidx_intvl; /* 14 */ + private int index_depth; /* 18 */ + private int index_root; /* 1c */ + private int index_head; /* 20 */ + private int unknown_0024; /* 24 */ + private long num_blocks; /* 28 */ + private int unknown_002c; /* 2c */ + private long lang_id; /* 30 */ + private byte[] system_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 34 */ + private byte[] unknown_0044 = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 44 */ + + /* local usage */ + private int dataRemained; + private int currentPlace = 0; + + public ChmItspHeader() { + signature = ChmConstants.ITSP.getBytes(UTF_8); /* + * 0 + * (ITSP + * ) + */ + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("[ signature:=" + new String(getSignature(), UTF_8) + + System.getProperty("line.separator")); + sb.append("version:=\t" + getVersion() + + System.getProperty("line.separator")); + sb.append("header_len:=\t" + getHeader_len() + + System.getProperty("line.separator")); + sb.append("unknown_00c:=\t" + getUnknown_000c() + + System.getProperty("line.separator")); + sb.append("block_len:=\t" + getBlock_len() + " [directory chunk size]" + + System.getProperty("line.separator")); + sb.append("blockidx_intvl:=" + getBlockidx_intvl() + + ", density of quickref section, usually 2" + + System.getProperty("line.separator")); + sb.append("index_depth:=\t" + + getIndex_depth() + + ", depth of the index tree - 1 there is no index, 2 if there is one level of PMGI chunk" + + System.getProperty("line.separator")); + sb.append("index_root:=\t" + getIndex_root() + + ", chunk number of root index chunk, -1 if there is none" + + System.getProperty("line.separator")); + sb.append("index_head:=\t" + getIndex_head() + + ", chunk number of first PMGL (listing) chunk" + + System.getProperty("line.separator")); + sb.append("unknown_0024:=\t" + getUnknown_0024() + + ", chunk number of last PMGL (listing) chunk" + + System.getProperty("line.separator")); + sb.append("num_blocks:=\t" + getNum_blocks() + ", -1 (unknown)" + + System.getProperty("line.separator")); + sb.append("unknown_002c:=\t" + getUnknown_002c() + + ", number of directory chunks (total)" + + System.getProperty("line.separator")); + sb.append("lang_id:=\t" + getLang_id() + " - " + + ChmCommons.getLanguage(getLang_id()) + + System.getProperty("line.separator")); + sb.append("system_uuid:=" + getSystem_uuid() + + System.getProperty("line.separator")); + sb.append("unknown_0044:=" + getUnknown_0044() + " ]"); + return sb.toString(); + } + + /** + * Copies 4 bits from data[] + * + * @param data + * @param chmItspHeader + * @param count + * @throws TikaException + */ + private void unmarshalCharArray(byte[] data, ChmItspHeader chmItspHeader, + int count) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + ChmAssert.assertChmAccessorNotNull(chmItspHeader); + this.setDataRemained(data.length); + System.arraycopy(data, 0, chmItspHeader.signature, 0, count); + this.setCurrentPlace(this.getCurrentPlace() + count); + this.setDataRemained(this.getDataRemained() - count); + } + + private int unmarshalInt32(byte[] data, int dataLenght, int dest) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + if (4 > this.getDataRemained()) + throw new TikaException("4 > dataLenght"); + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; + + this.setCurrentPlace(this.getCurrentPlace() + 4); + this.setDataRemained(this.getDataRemained() - 4); + return dest; + } + + private long unmarshalUInt32(byte[] data, int dataLenght, long dest) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + if (4 > dataLenght) + throw new TikaException("4 > dataLenght"); + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; + + setDataRemained(this.getDataRemained() - 4); + this.setCurrentPlace(this.getCurrentPlace() + 4); + return dest; + } + + private byte[] unmarshalUuid(byte[] data, int dataLenght, byte[] dest, + int count) { + System.arraycopy(data, this.getCurrentPlace(), dest, 0, count); + this.setCurrentPlace(this.getCurrentPlace() + count); + this.setDataRemained(this.getDataRemained() - count); + return dest; + } + + /** + * Returns how many bytes remained + * + * @return int + */ + private int getDataRemained() { + return dataRemained; + } + + /** + * Sets how many bytes remained + * + * @param dataRemained + */ + private void setDataRemained(int dataRemained) { + this.dataRemained = dataRemained; + } + + /** + * Returns a place holder + * + * @return current place + */ + private int getCurrentPlace() { + return currentPlace; + } + + /** + * Sets current place + * + * @param currentPlace + */ + private void setCurrentPlace(int currentPlace) { + this.currentPlace = currentPlace; + } + + /** + * Returns a signature of the header + * + * @return itsp signature + */ + public byte[] getSignature() { + return signature; + } + + /** + * Sets itsp signature + * + * @param signature + */ + protected void setSignature(byte[] signature) { + this.signature = signature; + } + + /** + * Returns version of itsp header + * + * @return version + */ + public int getVersion() { + return version; + } + + /** + * Sets a version of itsp header + * + * @param version + */ + protected void setVersion(int version) { + this.version = version; + } + + /** + * Returns header length + * + * @return header length + */ + public int getHeader_len() { + return header_len; + } + + /** + * Sets itsp header length + * + * @param header_len + */ + protected void setHeader_len(int header_len) { + this.header_len = header_len; + } + + /** + * Returns 000c unknown bytes + */ + public int getUnknown_000c() { + return unknown_000c; + } + + /** + * Sets 000c unknown bytes Unknown means here that those guys who cracked + * the chm format do not know what's it purposes for + * + * @param unknown_000c + */ + protected void setUnknown_000c(int unknown_000c) { + this.unknown_000c = unknown_000c; + } + + /** + * Returns block's length + * + * @return block_length + */ + public long getBlock_len() { + return block_len; + } + + /** + * Sets block length + * + * @param block_len + */ + protected void setBlock_len(long block_len) { + this.block_len = block_len; + } + + /** + * Returns block index interval + * + * @return blockidx_intvl + */ + public int getBlockidx_intvl() { + return blockidx_intvl; + } + + /** + * Sets block index interval + * + * @param blockidx_intvl + */ + protected void setBlockidx_intvl(int blockidx_intvl) { + this.blockidx_intvl = blockidx_intvl; + } + + /** + * Returns an index depth + * + * @return index_depth + */ + public int getIndex_depth() { + return index_depth; + } + + /** + * Sets an index depth + * + * @param index_depth + */ + protected void setIndex_depth(int index_depth) { + this.index_depth = index_depth; + } + + /** + * Returns index root + * + * @return index_root + */ + public int getIndex_root() { + return index_root; + } + + /** + * Sets an index root + * + * @param index_root + */ + protected void setIndex_root(int index_root) { + this.index_root = index_root; + } + + /** + * Returns an index head + * + * @return index_head + */ + public int getIndex_head() { + return index_head; + } + + /** + * Sets an index head + * + * @param index_head + */ + protected void setIndex_head(int index_head) { + this.index_head = index_head; + } + + /** + * Returns 0024 unknown bytes + * + * @return unknown_0024 + */ + public int getUnknown_0024() { + return unknown_0024; + } + + /** + * Sets 0024 unknown bytes + * + * @param unknown_0024 + */ + protected void setUnknown_0024(int unknown_0024) { + this.unknown_0024 = unknown_0024; + } + + /** + * Returns number of blocks + * + * @return num_blocks + */ + public long getNum_blocks() { + return num_blocks; + } + + /** + * Sets number of blocks containing in the chm file + * + * @param num_blocks + */ + protected void setNum_blocks(long num_blocks) { + this.num_blocks = num_blocks; + } + + /** + * Returns 002c unknown bytes + * + * @return unknown_002c + */ + public int getUnknown_002c() { + return unknown_002c; + } + + /** + * Sets 002c unknown bytes + * + * @param unknown_002c + */ + protected void setUnknown_002c(int unknown_002c) { + this.unknown_002c = unknown_002c; + } + + /** + * Returns language id + * + * @return lang_id + */ + public long getLang_id() { + return lang_id; + } + + /** + * Sets language id + * + * @param lang_id + */ + protected void setLang_id(long lang_id) { + this.lang_id = lang_id; + } + + /** + * Returns system uuid + * + * @return system_uuid + */ + public byte[] getSystem_uuid() { + return system_uuid; + } + + /** + * Sets system uuid + * + * @param system_uuid + */ + protected void setSystem_uuid(byte[] system_uuid) { + this.system_uuid = system_uuid; + } + + /** + * Returns 0044 unknown bytes + * + * @return unknown_0044 + */ + public byte[] getUnknown_0044() { + return unknown_0044; + } + + /** + * Sets 0044 unknown bytes + * + * @param unknown_0044 + */ + protected void setUnknown_0044(byte[] unknown_0044) { + this.unknown_0044 = unknown_0044; + } + + // @Override + public void parse(byte[] data, ChmItspHeader chmItspHeader) throws TikaException { + /* we only know how to deal with the 0x58 and 0x60 byte structures */ + if (data.length != ChmConstants.CHM_ITSP_V1_LEN) + throw new ChmParsingException("we only know how to deal with the 0x58 and 0x60 byte structures"); + + /* unmarshal common fields */ + chmItspHeader.unmarshalCharArray(data, chmItspHeader, ChmConstants.CHM_SIGNATURE_LEN); + // ChmCommons.unmarshalCharArray(data, chmItspHeader, + // ChmConstants.CHM_SIGNATURE_LEN); + chmItspHeader.setVersion(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), chmItspHeader.getVersion())); + chmItspHeader + .setHeader_len(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getHeader_len())); + chmItspHeader.setUnknown_000c(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getUnknown_000c())); + chmItspHeader.setBlock_len(chmItspHeader.unmarshalUInt32(data, + chmItspHeader.getDataRemained(), chmItspHeader.getBlock_len())); + chmItspHeader.setBlockidx_intvl(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getBlockidx_intvl())); + chmItspHeader + .setIndex_depth(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getIndex_depth())); + chmItspHeader + .setIndex_root(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getIndex_root())); + chmItspHeader + .setIndex_head(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getIndex_head())); + chmItspHeader.setUnknown_0024(chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getUnknown_0024())); + chmItspHeader + .setNum_blocks(chmItspHeader.unmarshalUInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getNum_blocks())); + chmItspHeader.setUnknown_002c((chmItspHeader.unmarshalInt32(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getUnknown_002c()))); + chmItspHeader.setLang_id(chmItspHeader.unmarshalUInt32(data, + chmItspHeader.getDataRemained(), chmItspHeader.getLang_id())); + chmItspHeader + .setSystem_uuid(chmItspHeader.unmarshalUuid(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getSystem_uuid(), + ChmConstants.BYTE_ARRAY_LENGHT)); + chmItspHeader + .setUnknown_0044(chmItspHeader.unmarshalUuid(data, + chmItspHeader.getDataRemained(), + chmItspHeader.getUnknown_0044(), + ChmConstants.BYTE_ARRAY_LENGHT)); + + /* Checks validity of the itsp header */ + if (!new String(chmItspHeader.getSignature(), UTF_8).equals(ChmConstants.ITSP)) + throw new ChmParsingException("seems not valid signature"); + + if (chmItspHeader.getVersion() != ChmConstants.CHM_VER_1) + throw new ChmParsingException("!=ChmConstants.CHM_VER_1"); + + if (chmItspHeader.getHeader_len() != ChmConstants.CHM_ITSP_V1_LEN) + throw new ChmParsingException("!= ChmConstants.CHM_ITSP_V1_LEN"); + } +} Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.accessor; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.assertion.ChmAssert; +import org.apache.tika.parser.chm.core.ChmConstants; +import org.apache.tika.parser.chm.exception.ChmParsingException; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * + * ::DataSpace/Storage/<SectionName>/ControlData This file contains $20 bytes of + * information on the compression. The information is partially known: 0000: + * DWORD 6 (unknown) 0004: ASCII 'LZXC' Compression type identifier 0008: DWORD + * 2 (Possibly numeric code for LZX) 000C: DWORD The Huffman reset interval in + * $8000-byte blocks 0010: DWORD The window size in $8000-byte blocks 0014: + * DWORD unknown (sometimes 2, sometimes 1, sometimes 0) 0018: DWORD 0 (unknown) + * 001C: DWORD 0 (unknown) + * + * {@link http + * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original + * /?page=2 } + * + */ +public class ChmLzxcControlData implements ChmAccessor<ChmLzxcControlData> { + private static final long serialVersionUID = -7897854774939631565L; + /* class' members */ + private long size; /* 0 */ + private byte[] signature; + private long version; /* 8 */ + private long resetInterval; /* c */ + private long windowSize; /* 10 */ + private long windowsPerReset; /* 14 */ + private long unknown_18; /* 18 */ + + /* local usage */ + private int dataRemained; + private int currentPlace = 0; + + public ChmLzxcControlData() { + signature = ChmConstants.LZXC.getBytes(UTF_8); /* + * 4 + * (LZXC + * ) + */ + } + + /** + * Returns a remained data + * + * @return dataRemained + */ + private int getDataRemained() { + return dataRemained; + } + + /** + * Sets a remained data + * + * @param dataRemained + */ + private void setDataRemained(int dataRemained) { + this.dataRemained = dataRemained; + } + + /** + * Returns a place holder + * + * @return current_place + */ + private int getCurrentPlace() { + return currentPlace; + } + + /** + * Sets a place holder + * + * @param current_place + */ + private void setCurrentPlace(int currentPlace) { + this.currentPlace = currentPlace; + } + + /** + * Returns a size of control data + * + * @return size + */ + public long getSize() { + return size; + } + + /** + * Sets a size of control data + * + * @param size + */ + protected void setSize(long size) { + this.size = size; + } + + /** + * Returns a signature of control data block + * + * @return signature + */ + public byte[] getSignature() { + return signature; + } + + /** + * Sets a signature of control data block + * + * @param signature + */ + protected void setSignature(byte[] signature) { + this.signature = signature; + } + + /** + * Returns a version of control data block + * + * @return version + */ + public long getVersion() { + return version; + } + + /** + * Sets version of control data block + * + * @param version + */ + protected void setVersion(long version) { + this.version = version; + } + + /** + * Returns reset interval + * + * @return reset_interval + */ + public long getResetInterval() { + return resetInterval; + } + + /** + * Sets a reset interval + * + * @param resetInterval + */ + protected void setResetInterval(long resetInterval) { + this.resetInterval = resetInterval; + } + + /** + * Returns a window size + * + * @return window_size + */ + public long getWindowSize() { + return windowSize; + } + + /** + * Sets a window size + * + * @param window_size + */ + protected void setWindowSize(long windowSize) { + this.windowSize = windowSize; + } + + /** + * Returns windows per reset + * + * @return + */ + public long getWindowsPerReset() { + return windowsPerReset; + } + + /** + * Sets windows per reset + * + * @param windows_per_reset + */ + protected void setWindowsPerReset(long windowsPerReset) { + this.windowsPerReset = windowsPerReset; + } + + /** + * Returns unknown 18 bytes + * + * @return unknown_18 + */ + public long getUnknown_18() { + return unknown_18; + } + + /** + * Sets unknown 18 bytes + * + * @param unknown_18 + */ + protected void setUnknown_18(long unknown_18) { + this.unknown_18 = unknown_18; + } + + private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException { + assert (data != null && data.length > 0); + if (4 > getDataRemained()) + throw new ChmParsingException("4 > dataLenght"); + dest = data[this.getCurrentPlace()] + | data[this.getCurrentPlace() + 1] << 8 + | data[this.getCurrentPlace() + 2] << 16 + | data[this.getCurrentPlace() + 3] << 24; + + setDataRemained(this.getDataRemained() - 4); + this.setCurrentPlace(this.getCurrentPlace() + 4); + return dest; + } + + private void unmarshalCharArray(byte[] data, + ChmLzxcControlData chmLzxcControlData, int count) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + ChmAssert.assertChmAccessorNotNull(chmLzxcControlData); + ChmAssert.assertPositiveInt(count); + System.arraycopy(data, 4, chmLzxcControlData.getSignature(), 0, count); + this.setCurrentPlace(this.getCurrentPlace() + count); + this.setDataRemained(this.getDataRemained() - count); + } + + /** + * Returns textual representation of ChmLzxcControlData + */ + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("size(unknown):=" + this.getSize() + ", "); + sb.append("signature(Compression type identifier):=" + + new String(this.getSignature(), UTF_8) + ", "); + sb.append("version(Possibly numeric code for LZX):=" + + this.getVersion() + System.getProperty("line.separator")); + sb.append("resetInterval(The Huffman reset interval):=" + + this.getResetInterval() + ", "); + sb.append("windowSize:=" + this.getWindowSize() + ", "); + sb.append("windowsPerReset(unknown (sometimes 2, sometimes 1, sometimes 0):=" + + this.getWindowsPerReset() + ", "); + sb.append("unknown_18:=" + this.getUnknown_18() + + System.getProperty("line.separator")); + return sb.toString(); + } + + // @Override + public void parse(byte[] data, ChmLzxcControlData chmLzxcControlData) throws TikaException { + if (data == null || (data.length < ChmConstants.CHM_LZXC_MIN_LEN)) + throw new ChmParsingException("we want at least 0x18 bytes"); + chmLzxcControlData.setDataRemained(data.length); + chmLzxcControlData.setSize(unmarshalUInt32(data, chmLzxcControlData.getSize())); + chmLzxcControlData.unmarshalCharArray(data, chmLzxcControlData, + ChmConstants.CHM_SIGNATURE_LEN); + chmLzxcControlData.setVersion(unmarshalUInt32(data, + chmLzxcControlData.getVersion())); + chmLzxcControlData.setResetInterval(unmarshalUInt32(data, + chmLzxcControlData.getResetInterval())); + chmLzxcControlData.setWindowSize(unmarshalUInt32(data, + chmLzxcControlData.getWindowSize())); + chmLzxcControlData.setWindowsPerReset(unmarshalUInt32(data, + chmLzxcControlData.getWindowsPerReset())); + + if (data.length >= ChmConstants.CHM_LZXC_V2_LEN) + chmLzxcControlData.setUnknown_18(unmarshalUInt32(data, + chmLzxcControlData.getUnknown_18())); + else + chmLzxcControlData.setUnknown_18(0); + + if (chmLzxcControlData.getVersion() == 2) { + chmLzxcControlData.setWindowSize(getWindowSize() + * ChmConstants.CHM_WINDOW_SIZE_BLOCK); + } + + if (chmLzxcControlData.getWindowSize() == 0 + || chmLzxcControlData.getResetInterval() == 0) + throw new ChmParsingException( + "window size / resetInterval should be more than zero"); + + if (chmLzxcControlData.getWindowSize() == 1) + throw new ChmParsingException( + "window size / resetInterval should be more than 1"); + + /* checks a signature */ + if (!new String(chmLzxcControlData.getSignature(), UTF_8) + .equals(ChmConstants.LZXC)) + throw new ChmParsingException( + "the signature does not seem to be correct"); + } + + /** + * @param args + */ + public static void main(String[] args) { + } +} Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.accessor; + +import java.math.BigInteger; +import java.util.Arrays; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.assertion.ChmAssert; +import org.apache.tika.parser.chm.core.ChmConstants; +import org.apache.tika.parser.chm.exception.ChmParsingException; + +/** + * LZXC reset table For ensuring a decompression. Reads the block named + * "::DataSpace/Storage/<SectionName>/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable" + * . + * + * {@link http + * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original + * /?page=2 } + * + */ +public class ChmLzxcResetTable implements ChmAccessor<ChmLzxcResetTable> { + private static final long serialVersionUID = -8209574429411707460L; + /* class members */ + private long version; // 0000: DWORD 2 unknown (possibly a version number) + private long block_count; // 0004: DWORD Number of entries in reset table + private long unknown; // 0008: DWORD 8 unknown + private long table_offset; // 000C: DWORD $28 Length of table header (area + // before table entries) + private long uncompressed_len; // 0010: QWORD Uncompressed Length + private long compressed_len; // 0018: QWORD Compressed Length + private long block_len; // 0020: QWORD 0x8000 block size for locations below + private long[] block_address; + + /* local usage */ + private int dataRemained; + private int currentPlace = 0; + + private int getDataRemained() { + return dataRemained; + } + + private void setDataRemained(int dataRemained) { + this.dataRemained = dataRemained; + } + + /** + * Returns block addresses + * + * @return block addresses + */ + public long[] getBlockAddress() { + return block_address; + } + + /** + * Sets block addresses + * + * @param block_address + */ + public void setBlockAddress(long[] block_address) { + this.block_address = block_address; + } + + private int getCurrentPlace() { + return currentPlace; + } + + private void setCurrentPlace(int currentPlace) { + this.currentPlace = currentPlace; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("version:=" + getVersion() + + System.getProperty("line.separator")); + sb.append("block_count:=" + getBlockCount() + + System.getProperty("line.separator")); + sb.append("unknown:=" + getUnknown() + + System.getProperty("line.separator")); + sb.append("table_offset:=" + getTableOffset() + + System.getProperty("line.separator")); + sb.append("uncompressed_len:=" + getUncompressedLen() + + System.getProperty("line.separator")); + sb.append("compressed_len:=" + getCompressedLen() + + System.getProperty("line.separator")); + sb.append("block_len:=" + getBlockLen() + + System.getProperty("line.separator")); + sb.append("block_addresses:=" + Arrays.toString(getBlockAddress())); + return sb.toString(); + } + + /** + * Enumerates chm block addresses + * + * @param data + * + * @return byte[] of addresses + * @throws TikaException + */ + private long[] enumerateBlockAddresses(byte[] data) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + /* we have limit of number of blocks to be extracted */ + if (getBlockCount() > 5000) + setBlockCount(5000); + + if (getBlockCount() < 0 && (getDataRemained() / 8) > 0) + setBlockCount(getDataRemained() / 8); + + long[] addresses = new long[(int) getBlockCount()]; + int rem = getDataRemained() / 8; + for (int i = 0; i < rem; i++) { + long num = -1; + + try { + addresses[i] = unmarshalUint64(data, num); + } catch (Exception e) { + throw new TikaException(e.getMessage()); + } + } + return addresses; + } + + /** + * Validates parameters such as byte[] and chm lzxc reset table + * + * @param data + * @param chmLzxcResetTable + * + * @return boolean + * @throws TikaException + */ + private boolean validateParamaters(byte[] data, + ChmLzxcResetTable chmLzxcResetTable) throws TikaException { + int goodParameter = 0; + ChmAssert.assertByteArrayNotNull(data); + ++goodParameter; + ChmAssert.assertChmAccessorNotNull(chmLzxcResetTable); + ++goodParameter; + return (goodParameter == 2); + } + + private long unmarshalUInt32(byte[] data, long dest) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; + + setDataRemained(this.getDataRemained() - 4); + this.setCurrentPlace(this.getCurrentPlace() + 4); + return dest; + } + + private long unmarshalUint64(byte[] data, long dest) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + byte[] temp = new byte[8]; + int i, j;// counters + + for (i = 8, j = 7; i > 0; i--) { + if (data.length > this.getCurrentPlace()) { + temp[j--] = data[this.getCurrentPlace()]; + this.setCurrentPlace(this.getCurrentPlace() + 1); + } else + throw new TikaException("data is too small to calculate address block"); + } + dest = new BigInteger(temp).longValue(); + this.setDataRemained(this.getDataRemained() - 8); + return dest; + } + + /** + * Returns the version + * + * @return - long + */ + public long getVersion() { + return version; + } + + /** + * Sets the version + * + * @param version + * - long + */ + public void setVersion(long version) { + this.version = version; + } + + /** + * Gets a block count + * + * @return - int + */ + public long getBlockCount() { + return block_count; + } + + /** + * Sets a block count + * + * @param block_count + * - long + */ + public void setBlockCount(long block_count) { + this.block_count = block_count; + } + + /** + * Gets unknown + * + * @return - long + */ + public long getUnknown() { + return unknown; + } + + /** + * Sets an unknown + * + * @param unknown + * - long + */ + public void setUnknown(long unknown) { + this.unknown = unknown; + } + + /** + * Gets a table offset + * + * @return - long + */ + public long getTableOffset() { + return table_offset; + } + + /** + * Sets a table offset + * + * @param table_offset + * - long + */ + public void setTableOffset(long table_offset) { + this.table_offset = table_offset; + } + + /** + * Gets uncompressed length + * + * @return - {@link BigInteger } + */ + public long getUncompressedLen() { + return uncompressed_len; + } + + /** + * Sets uncompressed length + * + * @param uncompressed_len + * - {@link BigInteger} + */ + public void setUncompressedLen(long uncompressed_len) { + this.uncompressed_len = uncompressed_len; + } + + /** + * Gets compressed length + * + * @return - {@link BigInteger} + */ + public long getCompressedLen() { + return compressed_len; + } + + /** + * Sets compressed length + * + * @param compressed_len + * - {@link BigInteger} + */ + public void setCompressedLen(long compressed_len) { + this.compressed_len = compressed_len; + } + + /** + * Gets a block length + * + * @return - {@link BigInteger} + */ + public long getBlockLen() { + return block_len; + } + + /** + * Sets a block length + * + * @param block_len + * - {@link BigInteger} + */ + public void setBlockLlen(long block_len) { + this.block_len = block_len; + } + + // @Override + public void parse(byte[] data, ChmLzxcResetTable chmLzxcResetTable) throws TikaException { + setDataRemained(data.length); + if (validateParamaters(data, chmLzxcResetTable)) { + /* unmarshal fields */ + chmLzxcResetTable.setVersion(unmarshalUInt32(data, chmLzxcResetTable.getVersion())); + chmLzxcResetTable.setBlockCount(unmarshalUInt32(data, chmLzxcResetTable.getBlockCount())); + chmLzxcResetTable.setUnknown(unmarshalUInt32(data, chmLzxcResetTable.getUnknown())); + chmLzxcResetTable.setTableOffset(unmarshalUInt32(data, chmLzxcResetTable.getTableOffset())); + chmLzxcResetTable.setUncompressedLen(unmarshalUint64(data, chmLzxcResetTable.getUncompressedLen())); + chmLzxcResetTable.setCompressedLen(unmarshalUint64(data, chmLzxcResetTable.getCompressedLen())); + chmLzxcResetTable.setBlockLlen(unmarshalUint64(data, chmLzxcResetTable.getBlockLen())); + chmLzxcResetTable.setBlockAddress(enumerateBlockAddresses(data)); + } + + /* checks chmLzxcResetTable */ + if (chmLzxcResetTable.getVersion() != ChmConstants.CHM_VER_2) + throw new ChmParsingException( + "does not seem currect version of chmLzxcResetTable"); + } +} Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.accessor; + +import java.util.Arrays; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.assertion.ChmAssert; +import org.apache.tika.parser.chm.core.ChmCommons; +import org.apache.tika.parser.chm.core.ChmConstants; +import org.apache.tika.parser.chm.exception.ChmParsingException; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Description Note: not always exists An index chunk has the following format: + * 0000: char[4] 'PMGI' 0004: DWORD Length of quickref/free area at end of + * directory chunk 0008: Directory index entries (to quickref/free area) The + * quickref area in an PMGI is the same as in an PMGL The format of a directory + * index entry is as follows: BYTE: length of name BYTEs: name (UTF-8 encoded) + * ENCINT: directory listing chunk which starts with name Encoded Integers aka + * ENCINT An ENCINT is a variable-length integer. The high bit of each byte + * indicates "continued to the next byte". Bytes are stored most significant to + * least significant. So, for example, $EA $15 is (((0xEA&0x7F)<<7)|0x15) = + * 0x3515. + * + * <p> + * Note: This class is not in use + * + * {@link http://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original/?show-translation-form=1 } + * + * + */ +public class ChmPmgiHeader implements ChmAccessor<ChmPmgiHeader> { + private static final long serialVersionUID = -2092282339894303701L; + private byte[] signature; + private long free_space; /* 4 */ + + /* local usage */ + private int dataRemained; + private int currentPlace = 0; + + public ChmPmgiHeader() { + signature = ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8); /* 0 (PMGI) */ + } + + private int getDataRemained() { + return dataRemained; + } + + private void setDataRemained(int dataRemained) { + this.dataRemained = dataRemained; + } + + private int getCurrentPlace() { + return currentPlace; + } + + private void setCurrentPlace(int currentPlace) { + this.currentPlace = currentPlace; + } + + private void unmarshalCharArray(byte[] data, ChmPmgiHeader chmPmgiHeader, + int count) throws ChmParsingException { + int index = -1; + ChmAssert.assertByteArrayNotNull(data); + ChmAssert.assertChmAccessorNotNull(chmPmgiHeader); + ChmAssert.assertPositiveInt(count); + this.setDataRemained(data.length); + index = ChmCommons.indexOf(data, + ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8)); + + if (index >= 0) + System.arraycopy(data, index, chmPmgiHeader.getSignature(), 0, count); + else{ + //Some chm documents (actually most of them) do not contain + //PMGI header, in this case, we just notice about it. + } + this.setCurrentPlace(this.getCurrentPlace() + count); + this.setDataRemained(this.getDataRemained() - count); + } + + private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException { + ChmAssert.assertByteArrayNotNull(data); + + if (4 > getDataRemained()) + throw new ChmParsingException("4 > dataLenght"); + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; + + setDataRemained(this.getDataRemained() - 4); + this.setCurrentPlace(this.getCurrentPlace() + 4); + return dest; + } + + /** + * Returns pmgi signature if exists + * + * @return signature + */ + public byte[] getSignature() { + return signature; + } + + /** + * Sets pmgi signature + * + * @param signature + */ + protected void setSignature(byte[] signature) { + this.signature = signature; + } + + /** + * Returns pmgi free space + * + * @return free_space + */ + public long getFreeSpace() { + return free_space; + } + + /** + * Sets pmgi free space + * + * @param free_space + */ + protected void setFreeSpace(long free_space) { + this.free_space = free_space; + } + + /** + * Returns textual representation of the pmgi header + */ + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("signature:=" + new String(getSignature(), UTF_8) + ", "); + sb.append("free space:=" + getFreeSpace() + + System.getProperty("line.separator")); + return sb.toString(); + } + + // @Override + public void parse(byte[] data, ChmPmgiHeader chmPmgiHeader) throws TikaException { + /* we only know how to deal with a 0x8 byte structures */ + if (data.length < ChmConstants.CHM_PMGI_LEN) + throw new TikaException("we only know how to deal with a 0x8 byte structures"); + + /* unmarshal fields */ + chmPmgiHeader.unmarshalCharArray(data, chmPmgiHeader, ChmConstants.CHM_SIGNATURE_LEN); + chmPmgiHeader.setFreeSpace(chmPmgiHeader.unmarshalUInt32(data, chmPmgiHeader.getFreeSpace())); + + /* check structure */ + if (!Arrays.equals(chmPmgiHeader.getSignature(), + ChmConstants.CHM_PMGI_MARKER.getBytes(UTF_8))) + throw new TikaException( + "it does not seem to be valid a PMGI signature, check ChmItsp index_root if it was -1, means no PMGI, use PMGL insted"); + + } +} Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.accessor; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.assertion.ChmAssert; +import org.apache.tika.parser.chm.core.ChmConstants; +import org.apache.tika.parser.chm.exception.ChmParsingException; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Description There are two types of directory chunks -- index chunks, and + * listing chunks. The index chunk will be omitted if there is only one listing + * chunk. A listing chunk has the following format: 0000: char[4] 'PMGL' 0004: + * DWORD Length of free space and/or quickref area at end of directory chunk + * 0008: DWORD Always 0 000C: DWORD Chunk number of previous listing chunk when + * reading directory in sequence (-1 if this is the first listing chunk) 0010: + * DWORD Chunk number of next listing chunk when reading directory in sequence + * (-1 if this is the last listing chunk) 0014: Directory listing entries (to + * quickref area) Sorted by filename; the sort is case-insensitive The quickref + * area is written backwards from the end of the chunk. One quickref entry + * exists for every n entries in the file, where n is calculated as 1 + (1 << + * quickref density). So for density = 2, n = 5 Chunklen-0002: WORD Number of + * entries in the chunk Chunklen-0004: WORD Offset of entry n from entry 0 + * Chunklen-0008: WORD Offset of entry 2n from entry 0 Chunklen-000C: WORD + * Offset of entry 3n from entry 0 ... The format of a directory listing entry + * is as follows BYTE: length of name BYTEs: name (UTF-8 encoded) ENCINT: + * content section ENCINT: offset ENCINT: length The offset is from the + * beginning of the content section the file is in, after the section has been + * decompressed (if appropriate). The length also refers to length of the file + * in the section after decompression. There are two kinds of file represented + * in the directory: user data and format related files. The files which are + * format-related have names which begin with '::', the user data files have + * names which begin with "/". + * + * {@link http + * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original + * /?show-translation-form=1 } + * + * @author olegt + * + */ +public class ChmPmglHeader implements ChmAccessor<ChmPmglHeader> { + private static final long serialVersionUID = -6139486487475923593L; + private byte[] signature; + private long free_space; /* 4 */ + private long unknown_0008; /* 8 */ + private int block_prev; /* c */ + private int block_next; /* 10 */ + + /* local usage */ + private int dataRemained; + private int currentPlace = 0; + + public ChmPmglHeader() { + signature = ChmConstants.PMGL.getBytes(UTF_8); /* + * 0 + * (PMGL + * ) + */ + } + + private int getDataRemained() { + return dataRemained; + } + + private void setDataRemained(int dataRemained) { + this.dataRemained = dataRemained; + } + + private int getCurrentPlace() { + return currentPlace; + } + + private void setCurrentPlace(int currentPlace) { + this.currentPlace = currentPlace; + } + + public long getFreeSpace() { + return free_space; + } + + public void setFreeSpace(long free_space) throws TikaException { + if (free_space < 0) { + throw new TikaException("Bad PMGLheader.FreeSpace="+free_space); + } + this.free_space = free_space; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("signatute:=" + new String(getSignature(), UTF_8) + ", "); + sb.append("free space:=" + getFreeSpace() + ", "); + sb.append("unknown0008:=" + getUnknown0008() + ", "); + sb.append("prev block:=" + getBlockPrev() + ", "); + sb.append("next block:=" + getBlockNext() + + System.getProperty("line.separator")); + return sb.toString(); + } + + protected void unmarshalCharArray(byte[] data, ChmPmglHeader chmPmglHeader, + int count) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + this.setDataRemained(data.length); + System.arraycopy(data, 0, chmPmglHeader.signature, 0, count); + this.setCurrentPlace(this.getCurrentPlace() + count); + this.setDataRemained(this.getDataRemained() - count); + } + + private int unmarshalInt32(byte[] data) throws TikaException { + ChmAssert.assertByteArrayNotNull(data); + int dest; + if (4 > this.getDataRemained()) + throw new TikaException("4 > dataLenght"); + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; + + this.setCurrentPlace(this.getCurrentPlace() + 4); + this.setDataRemained(this.getDataRemained() - 4); + return dest; + } + + private long unmarshalUInt32(byte[] data) throws ChmParsingException { + ChmAssert.assertByteArrayNotNull(data); + long dest; + if (4 > getDataRemained()) + throw new ChmParsingException("4 > dataLenght"); + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; + + setDataRemained(this.getDataRemained() - 4); + this.setCurrentPlace(this.getCurrentPlace() + 4); + return dest; + } + + // @Override + public void parse(byte[] data, ChmPmglHeader chmPmglHeader) throws TikaException { + if (data.length < ChmConstants.CHM_PMGL_LEN) + throw new TikaException(ChmPmglHeader.class.getName() + + " we only know how to deal with a 0x14 byte structures"); + + /* unmarshal fields */ + chmPmglHeader.unmarshalCharArray(data, chmPmglHeader, + ChmConstants.CHM_SIGNATURE_LEN); + chmPmglHeader.setFreeSpace(chmPmglHeader.unmarshalUInt32(data)); + chmPmglHeader.setUnknown0008(chmPmglHeader.unmarshalUInt32(data)); + chmPmglHeader.setBlockPrev(chmPmglHeader.unmarshalInt32(data)); + chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data)); + + /* check structure */ + if (!new String(chmPmglHeader.getSignature(), UTF_8).equals(ChmConstants.PMGL)) + throw new ChmParsingException(ChmPmglHeader.class.getName() + + " pmgl != pmgl.signature"); + } + + public byte[] getSignature() { + return signature; + } + + protected void setSignature(byte[] signature) { + this.signature = signature; + } + + public long getUnknown0008() { + return unknown_0008; + } + + protected void setUnknown0008(long unknown_0008) { + this.unknown_0008 = unknown_0008; + } + + public int getBlockPrev() { + return block_prev; + } + + protected void setBlockPrev(int block_prev) { + this.block_prev = block_prev; + } + + public int getBlockNext() { + return block_next; + } + + protected void setBlockNext(int block_next) { + this.block_next = block_next; + } +} Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.accessor; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.assertion.ChmAssert; +import org.apache.tika.parser.chm.core.ChmCommons; + +/** + * The format of a directory listing entry is as follows: BYTE: length of name + * BYTEs: name (UTF-8 encoded) ENCINT: content section ENCINT: offset ENCINT: + * length The offset is from the beginning of the content section the file is + * in, after the section has been decompressed (if appropriate). The length also + * refers to length of the file in the section after decompression. There are + * two kinds of file represented in the directory: user data and format related + * files. The files which are format-related have names which begin with '::', + * the user data files have names which begin with "/". + * + */ +public class DirectoryListingEntry { + /* Length of the entry name */ + private int name_length; + /* Entry name or directory name */ + private String name; + /* Entry type */ + private ChmCommons.EntryType entryType; + /* Entry offset */ + private int offset; + /* Entry size */ + private int length; + + public DirectoryListingEntry() { + + } + + /** + * Constructs directoryListingEntry + * + * @param name_length + * int + * @param name + * String + * @param isCompressed + * ChmCommons.EntryType + * @param offset + * int + * @param length + * int + * @throws TikaException + */ + public DirectoryListingEntry(int name_length, String name, + ChmCommons.EntryType isCompressed, int offset, int length) throws TikaException { + ChmAssert.assertDirectoryListingEntry(name_length, name, isCompressed, offset, length); + setNameLength(name_length); + setName(name); + setEntryType(isCompressed); + setOffset(offset); + setLength(length); + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("name_length:=" + getNameLength() + System.getProperty("line.separator")); + sb.append("name:=" + getName() + System.getProperty("line.separator")); + sb.append("entryType:=" + getEntryType() + System.getProperty("line.separator")); + sb.append("offset:=" + getOffset() + System.getProperty("line.separator")); + sb.append("length:=" + getLength()); + return sb.toString(); + } + + /** + * Returns an entry name length + * + * @return int + */ + public int getNameLength() { + return name_length; + } + + /** + * Sets an entry name length + * + * @param name_length + * int + */ + protected void setNameLength(int name_length) { + this.name_length = name_length; + } + + /** + * Returns an entry name + * + * @return String + */ + public String getName() { + return name; + } + + /** + * Sets entry name + * + * @param name + * String + */ + protected void setName(String name) { + this.name = name; + } + + /** + * Returns ChmCommons.EntryType (COMPRESSED or UNCOMPRESSED) + * + * @return ChmCommons.EntryType + */ + public ChmCommons.EntryType getEntryType() { + return entryType; + } + + protected void setEntryType(ChmCommons.EntryType entryType) { + this.entryType = entryType; + } + + public int getOffset() { + return offset; + } + + protected void setOffset(int offset) { + this.offset = offset; + } + + public int getLength() { + return length; + } + + protected void setLength(int length) { + this.length = length; + } +} Added: tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java?rev=1723223&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-office-module/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java Wed Jan 6 03:50:50 2016 @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.chm.assertion; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.chm.accessor.ChmAccessor; +import org.apache.tika.parser.chm.accessor.ChmLzxcResetTable; +import org.apache.tika.parser.chm.core.ChmCommons; +import org.apache.tika.parser.chm.exception.ChmParsingException; + +/** + * Contains chm extractor assertions + */ +public class ChmAssert { + /** + * Checks a validity of the chmBlockSegment parameters + * + * @param data + * byte[] + * @param resetTable + * ChmLzxcResetTable + * @param blockNumber + * int + * @param lzxcBlockOffset + * int + * @param lzxcBlockLength + * int + * @throws TikaException + */ + public static final void assertChmBlockSegment(byte[] data, + ChmLzxcResetTable resetTable, int blockNumber, int lzxcBlockOffset, + int lzxcBlockLength) throws TikaException { + if ((data == null)) + throw new TikaException("data[] is null"); + + if ((data.length <= 0)) + throw new TikaException("data[] length should be greater than zero"); + + if (resetTable == null) + throw new TikaException("resetTable is null"); + + if (resetTable.getBlockAddress().length <= 1) + throw new TikaException("resetTable.getBlockAddress().length should be greater than zero"); + + if (blockNumber < 0) + throw new TikaException("blockNumber should be positive number"); + + if (lzxcBlockOffset < 0) + throw new TikaException("lzxcBlockOffset should be positive number"); + + if (lzxcBlockLength < 0) + throw new TikaException("lzxcBlockLength should be positive number"); + } + + /** + * Checks if InputStream is not null + * + * @param is + * InputStream + * @throws ChmParsingException + * @throws IOException + */ + public static final void assertInputStreamNotNull(InputStream is) throws IOException { + if (is == null) + throw new IOException("input sream is null"); + } + + /** + * Checks validity of ChmAccessor parameters + * + * @param data + * @param chmItsfHeader + * @param count + * @throws ChmParsingException + */ + public static final void assertChmAccessorParameters(byte[] data, + ChmAccessor<?> chmAccessor, int count) throws ChmParsingException { + assertByteArrayNotNull(data); + assertChmAccessorNotNull(chmAccessor); + } + + /** + * Checks if byte[] is not null + * + * @param data + * @throws ChmParsingException + */ + public static final void assertByteArrayNotNull(byte[] data) throws ChmParsingException { + if (data == null) + throw new ChmParsingException("byte[] data is null"); + } + + /** + * Checks if ChmAccessor is not null In case of null throws exception + * + * @param ChmAccessor + * @throws ChmParsingException + */ + public static final void assertChmAccessorNotNull(ChmAccessor<?> chmAccessor) throws ChmParsingException { + if (chmAccessor == null) + throw new ChmParsingException("chm header is null"); + } + + /** + * Checks validity of the DirectoryListingEntry's parameters In case of + * invalid parameter(s) throws an exception + * + * @param name_length + * length of the chm entry name + * @param name + * chm entry name + * @param entryType + * EntryType + * @param offset + * @param length + * @throws ChmParsingException + */ + public static final void assertDirectoryListingEntry(int name_length, + String name, ChmCommons.EntryType entryType, int offset, int length) throws ChmParsingException { + if (name_length < 0) + throw new ChmParsingException("invalid name length"); + if (name == null) + throw new ChmParsingException("invalid name"); + + if ((entryType != ChmCommons.EntryType.COMPRESSED) + && (entryType != ChmCommons.EntryType.UNCOMPRESSED)) + throw new ChmParsingException("invalid compressed type, should be EntryType.COMPRESSED | EntryType.UNCOMPRESSED"); + + if (offset < 0) + throw new ChmParsingException("invalid offset"); + + if (length < 0) + throw new ChmParsingException("invalid length"); + } + + public static void assertCopyingDataIndex(int index, int dataLength) throws ChmParsingException { + if (index >= dataLength) + throw new ChmParsingException("cannot parse chm file index > data.length"); + } + + /** + * Checks if int param is greater than zero In case param <=0 throws an + * exception + * + * @param param + * @throws ChmParsingException + */ + public static void assertPositiveInt(int param) throws ChmParsingException { + if (param <= 0) + throw new ChmParsingException("resetTable.getBlockAddress().length should be greater than zero"); + } +}
