Author: tilman
Date: Fri Jan 30 16:06:54 2026
New Revision: 1931613
Log:
PDFBOX-5660: avoid ArrayIndexOutOfBoundsException, as suggested by Ze Sheng;
closes #411
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
Fri Jan 30 16:06:49 2026 (r1931612)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
Fri Jan 30 16:06:54 2026 (r1931613)
@@ -797,6 +797,10 @@ public class CMapParser
private static boolean increment(byte[] data, int position, boolean
useStrictMode)
{
+ if (position < 0)
+ {
+ return false;
+ }
if (position > 0 && (data[position] & 0xFF) == 255)
{
// PDFBOX-4661: avoid overflow of the last byte, all following
values are undefined
Modified:
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
==============================================================================
---
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
Fri Jan 30 16:06:49 2026 (r1931612)
+++
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
Fri Jan 30 16:06:54 2026 (r1931613)
@@ -1,248 +1,259 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.fontbox.cmap;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-
-import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
-import org.junit.jupiter.api.Test;
-
-/**
- * This will test the CMapParser implementation.
- *
- */
-class TestCMapParser
-{
-
- /**
- * Check whether the parser and the resulting mapping is working correct.
- *
- * @throws IOException If something went wrong
- */
- @Test
- void testLookup() throws IOException
- {
- final String resourceDir = "src/test/resources/cmap";
- File inDir = new File(resourceDir);
-
- CMap cMap = new CMapParser()
- .parse(new RandomAccessReadBufferedFile(new File(inDir,
"CMapTest")));
-
- // char mappings
- byte[] bytes1 = {0, 1};
- assertEquals("A", cMap.toUnicode(bytes1), "bytes 00 01 from bfrange
<0001> <0005> <0041>");
-
- byte[] bytes2 = {1, 00};
- String str2 = "0";
- assertEquals(str2,
- cMap.toUnicode(bytes2), "bytes 01 00 from bfrange <0100>
<0109> <0030>");
-
- byte[] bytes3 = { 1, 32 };
- assertEquals("P", cMap.toUnicode(bytes3), "bytes 01 00 from bfrange
<0100> <0109> <0030>");
-
- byte[] bytes4 = { 1, 33 };
- assertEquals("R", cMap.toUnicode(bytes4), "bytes 01 00 from bfrange
<0100> <0109> <0030>");
-
- byte[] bytes5 = { 0, 10 };
- String str5 = "*";
- assertEquals(str5, cMap.toUnicode(bytes5), "bytes 00 0A from bfchar
<000A> <002A>");
-
- byte[] bytes6 = { 1, 10 };
- String str6 = "+";
- assertEquals(str6, cMap.toUnicode(bytes6), "bytes 01 0A from bfchar
<010A> <002B>");
-
- // CID mappings
- byte[] cid1 = { 0, 65 };
- assertEquals(65, cMap.toCID(cid1), "CID 65 from cidrange <0000> <00ff>
0 ");
-
- byte[] cid2 = { 1, 24 };
- int strCID2 = 0x0118;
- assertEquals(strCID2, cMap.toCID(cid2), "CID 280 from cidrange <0100>
<01ff> 256");
-
- byte[] cid3 = { 2, 8 };
- int strCID3 = 0x0208;
- assertEquals(strCID3, cMap.toCID(cid3), "CID 520 from cidchar <0208>
520");
-
- byte[] cid4 = { 1, 0x2c };
- int strCID4 = 0x12C;
- assertEquals(strCID4, cMap.toCID(cid4), "CID 300 from cidrange <0300>
<0300> 300");
- }
-
- @Test
- void testIdentity() throws IOException
- {
- CMap cMap = new CMapParser().parsePredefined("Identity-H");
-
- assertEquals(65, cMap.toCID(new byte[] { 0, 65 }), "Indentity-H CID
65");
- assertEquals(12345, cMap.toCID(new byte[] { 0x30, 0x39 }),
"Indentity-H CID 12345");
- assertEquals(0xFFFF, cMap.toCID(new byte[] { (byte) 0xFF, (byte) 0xFF
}),
- "Indentity-H CID 0xFFFF");
- }
-
- @Test
- void testUniJIS_UTF16_H() throws IOException
- {
- CMap cMap = new CMapParser().parsePredefined("UniJIS-UTF16-H");
-
- // the next 3 cases demonstrate the issue of possible false result
values of CMap.toCID(int code)
- assertEquals(694, cMap.toCID(0xb1), "UniJIS-UTF16-H CID 0xb1 -> 694");
- assertNotEquals(694, cMap.toCID(0xb1, 1), "UniJIS-UTF16-H CID 0xb1 ->
694");
- assertEquals(694, cMap.toCID(0xb1, 2), "UniJIS-UTF16-H CID 0x00b1 ->
694");
-
- // 1:1 cid char mapping
- assertEquals(694, cMap.toCID(new byte[] { 0x00, (byte) 0xb1 }),
- "UniJIS-UTF16-H CID 0x00b1 -> 694");
- assertEquals(20168, cMap.toCID(new byte[] { (byte) 0xd8, 0x50, (byte)
0xdc, 0x4b }),
- "UniJIS-UTF16-H CID 0xd850dc4b -> 20168");
-
- // cid range mapping
- assertEquals(19223, cMap.toCID(new byte[] { 0x54, 0x34 }),
- "UniJIS-UTF16-H CID 0x5434 -> 19223");
- assertEquals(10006, cMap.toCID(new byte[] { (byte) 0xd8, 0x3c, (byte)
0xdd, 0x12 }),
- "UniJIS-UTF16-H CID 0xd83cdd12 -> 10006");
-
- }
-
- @Test
- void testUniJIS_UCS2_H() throws IOException
- {
- CMap cMap = new CMapParser().parsePredefined("UniJIS-UCS2-H");
-
- assertEquals(34, cMap.toCID(new byte[] { 0, 65 }), "UniJIS-UCS2-H CID
65 -> 34");
- }
-
- @Test
- void testAdobe_GB1_UCS2() throws IOException
- {
- CMap cMap = new CMapParser().parsePredefined("Adobe-GB1-UCS2");
-
- assertEquals("0", cMap.toUnicode(new byte[] { 0, 0x11 }),
- "Adobe-GB1-UCS2 CID 0x11 -> \"0\"");
- }
-
- /**
- * Test the parser against a valid, but poorly formatted CMap file.
- * @throws IOException If something went wrong
- */
- @Test
- void testParserWithPoorWhitespace() throws IOException
- {
- CMap cMap = new CMapParser().parse(new RandomAccessReadBufferedFile(
- new File("src/test/resources/cmap", "CMapNoWhitespace")));
-
- assertNotNull(cMap, "Failed to parse nasty CMap file");
- }
-
- @Test
- void testParserWithMalformedbfrange1() throws IOException
- {
- CMap cMap = new CMapParser()
- .parse(new RandomAccessReadBufferedFile(
- new File("src/test/resources/cmap",
"CMapMalformedbfrange1")));
-
- assertNotNull(cMap, "Failed to parse malformed CMap file");
-
- byte[] bytes1 = { 0, 1 };
- assertEquals("A", cMap.toUnicode(bytes1), "bytes 00 01 from bfrange
<0001> <0009> <0041>");
-
- byte[] bytes2 = { 1, 00 };
- assertNull(cMap.toUnicode(bytes2));
-
- }
-
- @Test
- void testParserWithMalformedbfrange2() throws IOException
- {
- CMap cMap = new CMapParser()
- .parse(new RandomAccessReadBufferedFile(
- new File("src/test/resources/cmap",
"CMapMalformedbfrange2")));
-
- assertNotNull(cMap, "Failed to parse malformed CMap file");
-
- assertEquals("0", cMap.toUnicode(new byte[] { 0, 1 }),
- "bytes 00 01 from bfrange <0001> <0009> <0030>");
-
- assertEquals("A", cMap.toUnicode(new byte[] { 2, 0x32 }),
- "bytes 02 32 from bfrange <0232> <0432> <0041>");
-
- // check border values for non strict mode
- assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF0 }));
- assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF1 }));
-
- // use strict mode
- cMap = new CMapParser(true)
- .parse(new RandomAccessReadBufferedFile(
- new File("src/test/resources/cmap",
"CMapMalformedbfrange2")));
- // check border values for strict mode
- assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF0 }));
- assertNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF1 }));
-
- }
-
- @Test
- void testPredefinedMap() throws IOException
- {
- CMap cMap = new CMapParser().parsePredefined("Adobe-Korea1-UCS2");
- assertNotNull(cMap, "Failed to parse predefined CMap
Adobe-Korea1-UCS2");
-
- assertEquals("Adobe-Korea1-UCS2", cMap.getName(), "wrong CMap name");
- assertEquals(0, cMap.getWMode(), "wrong WMode");
- assertFalse(cMap.hasCIDMappings());
- assertTrue(cMap.hasUnicodeMappings());
-
- cMap = new CMapParser().parsePredefined("Identity-V");
- assertNotNull(cMap, "Failed to parse predefined CMap Identity-V");
- }
-
- @Test
- void testIdentitybfrange() throws IOException
- {
- // use strict mode
- CMap cMap = new CMapParser(true)
- .parse(new RandomAccessReadBufferedFile(
- new File("src/test/resources/cmap",
"Identitybfrange")));
- assertEquals("Adobe-Identity-UCS", cMap.getName(), "wrong CMap name");
-
- byte[] bytes = { 0, 65 };
- assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
- "Indentity 0x0048");
- bytes = new byte[] { 0x30, 0x39 };
- assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
- "Indentity 0x3039");
- // check border values for strict mode
- bytes = new byte[] { 0x30, (byte) 0xFF };
- assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
- "Indentity 0x30FF");
- // check border values for strict mode
- bytes = new byte[] { 0x31, 0x00 };
- assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
- "Indentity 0x3100");
- bytes = new byte[] { (byte) 0xFF, (byte) 0xFF };
- assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
- "Indentity 0xFFFF");
-
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.fontbox.cmap;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import org.apache.pdfbox.io.RandomAccessReadBuffer;
+
+import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * This will test the CMapParser implementation.
+ *
+ */
+class TestCMapParser
+{
+
+ /**
+ * Check whether the parser and the resulting mapping is working correct.
+ *
+ * @throws IOException If something went wrong
+ */
+ @Test
+ void testLookup() throws IOException
+ {
+ final String resourceDir = "src/test/resources/cmap";
+ File inDir = new File(resourceDir);
+
+ CMap cMap = new CMapParser()
+ .parse(new RandomAccessReadBufferedFile(new File(inDir,
"CMapTest")));
+
+ // char mappings
+ byte[] bytes1 = {0, 1};
+ assertEquals("A", cMap.toUnicode(bytes1), "bytes 00 01 from bfrange
<0001> <0005> <0041>");
+
+ byte[] bytes2 = {1, 00};
+ String str2 = "0";
+ assertEquals(str2,
+ cMap.toUnicode(bytes2), "bytes 01 00 from bfrange <0100>
<0109> <0030>");
+
+ byte[] bytes3 = { 1, 32 };
+ assertEquals("P", cMap.toUnicode(bytes3), "bytes 01 00 from bfrange
<0100> <0109> <0030>");
+
+ byte[] bytes4 = { 1, 33 };
+ assertEquals("R", cMap.toUnicode(bytes4), "bytes 01 00 from bfrange
<0100> <0109> <0030>");
+
+ byte[] bytes5 = { 0, 10 };
+ String str5 = "*";
+ assertEquals(str5, cMap.toUnicode(bytes5), "bytes 00 0A from bfchar
<000A> <002A>");
+
+ byte[] bytes6 = { 1, 10 };
+ String str6 = "+";
+ assertEquals(str6, cMap.toUnicode(bytes6), "bytes 01 0A from bfchar
<010A> <002B>");
+
+ // CID mappings
+ byte[] cid1 = { 0, 65 };
+ assertEquals(65, cMap.toCID(cid1), "CID 65 from cidrange <0000> <00ff>
0 ");
+
+ byte[] cid2 = { 1, 24 };
+ int strCID2 = 0x0118;
+ assertEquals(strCID2, cMap.toCID(cid2), "CID 280 from cidrange <0100>
<01ff> 256");
+
+ byte[] cid3 = { 2, 8 };
+ int strCID3 = 0x0208;
+ assertEquals(strCID3, cMap.toCID(cid3), "CID 520 from cidchar <0208>
520");
+
+ byte[] cid4 = { 1, 0x2c };
+ int strCID4 = 0x12C;
+ assertEquals(strCID4, cMap.toCID(cid4), "CID 300 from cidrange <0300>
<0300> 300");
+ }
+
+ @Test
+ void testIdentity() throws IOException
+ {
+ CMap cMap = new CMapParser().parsePredefined("Identity-H");
+
+ assertEquals(65, cMap.toCID(new byte[] { 0, 65 }), "Indentity-H CID
65");
+ assertEquals(12345, cMap.toCID(new byte[] { 0x30, 0x39 }),
"Indentity-H CID 12345");
+ assertEquals(0xFFFF, cMap.toCID(new byte[] { (byte) 0xFF, (byte) 0xFF
}),
+ "Indentity-H CID 0xFFFF");
+ }
+
+ @Test
+ void testUniJIS_UTF16_H() throws IOException
+ {
+ CMap cMap = new CMapParser().parsePredefined("UniJIS-UTF16-H");
+
+ // the next 3 cases demonstrate the issue of possible false result
values of CMap.toCID(int code)
+ assertEquals(694, cMap.toCID(0xb1), "UniJIS-UTF16-H CID 0xb1 -> 694");
+ assertNotEquals(694, cMap.toCID(0xb1, 1), "UniJIS-UTF16-H CID 0xb1 ->
694");
+ assertEquals(694, cMap.toCID(0xb1, 2), "UniJIS-UTF16-H CID 0x00b1 ->
694");
+
+ // 1:1 cid char mapping
+ assertEquals(694, cMap.toCID(new byte[] { 0x00, (byte) 0xb1 }),
+ "UniJIS-UTF16-H CID 0x00b1 -> 694");
+ assertEquals(20168, cMap.toCID(new byte[] { (byte) 0xd8, 0x50, (byte)
0xdc, 0x4b }),
+ "UniJIS-UTF16-H CID 0xd850dc4b -> 20168");
+
+ // cid range mapping
+ assertEquals(19223, cMap.toCID(new byte[] { 0x54, 0x34 }),
+ "UniJIS-UTF16-H CID 0x5434 -> 19223");
+ assertEquals(10006, cMap.toCID(new byte[] { (byte) 0xd8, 0x3c, (byte)
0xdd, 0x12 }),
+ "UniJIS-UTF16-H CID 0xd83cdd12 -> 10006");
+
+ }
+
+ @Test
+ void testUniJIS_UCS2_H() throws IOException
+ {
+ CMap cMap = new CMapParser().parsePredefined("UniJIS-UCS2-H");
+
+ assertEquals(34, cMap.toCID(new byte[] { 0, 65 }), "UniJIS-UCS2-H CID
65 -> 34");
+ }
+
+ @Test
+ void testAdobe_GB1_UCS2() throws IOException
+ {
+ CMap cMap = new CMapParser().parsePredefined("Adobe-GB1-UCS2");
+
+ assertEquals("0", cMap.toUnicode(new byte[] { 0, 0x11 }),
+ "Adobe-GB1-UCS2 CID 0x11 -> \"0\"");
+ }
+
+ /**
+ * Test the parser against a valid, but poorly formatted CMap file.
+ * @throws IOException If something went wrong
+ */
+ @Test
+ void testParserWithPoorWhitespace() throws IOException
+ {
+ CMap cMap = new CMapParser().parse(new RandomAccessReadBufferedFile(
+ new File("src/test/resources/cmap", "CMapNoWhitespace")));
+
+ assertNotNull(cMap, "Failed to parse nasty CMap file");
+ }
+
+ @Test
+ void testParserWithMalformedbfrange1() throws IOException
+ {
+ CMap cMap = new CMapParser()
+ .parse(new RandomAccessReadBufferedFile(
+ new File("src/test/resources/cmap",
"CMapMalformedbfrange1")));
+
+ assertNotNull(cMap, "Failed to parse malformed CMap file");
+
+ byte[] bytes1 = { 0, 1 };
+ assertEquals("A", cMap.toUnicode(bytes1), "bytes 00 01 from bfrange
<0001> <0009> <0041>");
+
+ byte[] bytes2 = { 1, 00 };
+ assertNull(cMap.toUnicode(bytes2));
+
+ }
+
+ @Test
+ void testParserWithMalformedbfrange2() throws IOException
+ {
+ CMap cMap = new CMapParser()
+ .parse(new RandomAccessReadBufferedFile(
+ new File("src/test/resources/cmap",
"CMapMalformedbfrange2")));
+
+ assertNotNull(cMap, "Failed to parse malformed CMap file");
+
+ assertEquals("0", cMap.toUnicode(new byte[] { 0, 1 }),
+ "bytes 00 01 from bfrange <0001> <0009> <0030>");
+
+ assertEquals("A", cMap.toUnicode(new byte[] { 2, 0x32 }),
+ "bytes 02 32 from bfrange <0232> <0432> <0041>");
+
+ // check border values for non strict mode
+ assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF0 }));
+ assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF1 }));
+
+ // use strict mode
+ cMap = new CMapParser(true)
+ .parse(new RandomAccessReadBufferedFile(
+ new File("src/test/resources/cmap",
"CMapMalformedbfrange2")));
+ // check border values for strict mode
+ assertNotNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF0 }));
+ assertNull(cMap.toUnicode(new byte[] { 2, (byte) 0xF1 }));
+
+ }
+
+ @Test
+ void testPredefinedMap() throws IOException
+ {
+ CMap cMap = new CMapParser().parsePredefined("Adobe-Korea1-UCS2");
+ assertNotNull(cMap, "Failed to parse predefined CMap
Adobe-Korea1-UCS2");
+
+ assertEquals("Adobe-Korea1-UCS2", cMap.getName(), "wrong CMap name");
+ assertEquals(0, cMap.getWMode(), "wrong WMode");
+ assertFalse(cMap.hasCIDMappings());
+ assertTrue(cMap.hasUnicodeMappings());
+
+ cMap = new CMapParser().parsePredefined("Identity-V");
+ assertNotNull(cMap, "Failed to parse predefined CMap Identity-V");
+ }
+
+ @Test
+ void testIdentitybfrange() throws IOException
+ {
+ // use strict mode
+ CMap cMap = new CMapParser(true)
+ .parse(new RandomAccessReadBufferedFile(
+ new File("src/test/resources/cmap",
"Identitybfrange")));
+ assertEquals("Adobe-Identity-UCS", cMap.getName(), "wrong CMap name");
+
+ byte[] bytes = { 0, 65 };
+ assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
+ "Indentity 0x0048");
+ bytes = new byte[] { 0x30, 0x39 };
+ assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
+ "Indentity 0x3039");
+ // check border values for strict mode
+ bytes = new byte[] { 0x30, (byte) 0xFF };
+ assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
+ "Indentity 0x30FF");
+ // check border values for strict mode
+ bytes = new byte[] { 0x31, 0x00 };
+ assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
+ "Indentity 0x3100");
+ bytes = new byte[] { (byte) 0xFF, (byte) 0xFF };
+ assertEquals(new String(bytes, StandardCharsets.UTF_16BE),
cMap.toUnicode(bytes),
+ "Indentity 0xFFFF");
+
+ }
+
+ @Test
+ void testBadIncrement() throws IOException
+ {
+ byte[] cmapData = "1 beginbfrange\n<> <>
<2223>\nendbfrange".getBytes("US-ASCII");
+ CMapParser parser = new CMapParser();
+ CMap cmap = parser.parse(new RandomAccessReadBuffer(cmapData));
+ assertNotNull(cmap);
+ }
+}