Author: tilman
Date: Mon Jan 12 19:45:23 2026
New Revision: 1931273

Log:
PDFBOX-5660: don't open twice, as suggested by Valery Bokov; closes #388

Modified:
   
pdfbox/branches/3.0/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java

Modified: 
pdfbox/branches/3.0/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
==============================================================================
--- 
pdfbox/branches/3.0/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java  
    Mon Jan 12 19:03:41 2026        (r1931272)
+++ 
pdfbox/branches/3.0/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java  
    Mon Jan 12 19:45:23 2026        (r1931273)
@@ -20,7 +20,7 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
-import java.io.InputStream;
+import java.io.BufferedInputStream;
 import java.io.InputStreamReader;
 import java.io.PrintStream;
 import java.io.Reader;
@@ -185,27 +185,30 @@ public class TextToPDF implements Callab
             setTopMargin(margins[2]);
             setBottomMargin(margins[3]);
 
-            boolean hasUtf8BOM = false;
-            if (charset.equals(StandardCharsets.UTF_8))
+            try (BufferedInputStream is = new BufferedInputStream(new 
FileInputStream(infile)))
             {
-                // check for utf8 BOM
-                // FileInputStream doesn't support mark/reset
-                try (InputStream is = new FileInputStream(infile))
+                if (charset.equals(StandardCharsets.UTF_8))
                 {
-                    if (is.read() == 0xEF && is.read() == 0xBB && is.read() == 
0xBF)
+                    final int readLimit = 3;
+                    is.mark(readLimit);
+
+                    byte[] firstBytes = new byte[readLimit];
+                    if (is.read(firstBytes) != readLimit)
                     {
-                        hasUtf8BOM = true;
+                        throw new IOException("Could not read 3 bytes, size 
changed?!");
                     }
-                }
-            }
-            try (InputStream is = new FileInputStream(infile))
-            {
-                if (hasUtf8BOM)
-                {
-                    long skipped = is.skip(3);
-                    if (skipped != 3)
+
+                    if (firstBytes[0] == (byte) 0xEF &&
+                        firstBytes[1] == (byte) 0xBB &&
+                        firstBytes[2] == (byte) 0xBF)
+                    {
+                        //UTF-8 with BOM
+                        //3 bytes already read (skipped)
+                    }
+                    else
                     {
-                        throw new IOException("Could not skip 3 bytes, size 
changed?!");
+                        //It looks like UTF with no BOM or file was corrupted
+                        is.reset();
                     }
                 }
                 try (Reader reader = new InputStreamReader(is, charset))

Reply via email to