Author: tilman
Date: Tue Jul 29 14:25:07 2025
New Revision: 1927520

Log:
PDFBOX-6047: add text extraction options submenu

Added:
   
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java
   (contents, props changed)
Modified:
   
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
   
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java

Modified: 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
==============================================================================
--- 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
        Tue Jul 29 12:29:58 2025        (r1927519)
+++ 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java
        Tue Jul 29 14:25:07 2025        (r1927520)
@@ -68,6 +68,7 @@ import org.apache.pdfbox.debugger.ui.Hig
 import org.apache.pdfbox.debugger.ui.ImageTypeMenu;
 import org.apache.pdfbox.debugger.ui.RenderDestinationMenu;
 import org.apache.pdfbox.debugger.ui.TextDialog;
+import org.apache.pdfbox.debugger.ui.TextStripperMenu;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup;
 import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup;
@@ -309,6 +310,8 @@ public class PagePane implements ActionL
             PDFTextStripper stripper = new PDFTextStripper();
             stripper.setStartPage(pageIndex + 1);
             stripper.setEndPage(pageIndex + 1);
+            stripper.setSortByPosition(TextStripperMenu.isSorted());
+            
stripper.setIgnoreContentStreamSpaceGlyphs(TextStripperMenu.isIgnoreSpaces());
             textDialog.setText(stripper.getText(document));
         }
         catch (IOException ex)

Added: 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java
      Tue Jul 29 14:25:07 2025        (r1927520)
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.debugger.ui;
+
+import javax.swing.JCheckBoxMenuItem;
+import javax.swing.JMenu;
+
+/**
+ *
+ * @author Tilman Hausherr
+ */
+public class TextStripperMenu extends MenuBase
+{
+    private static TextStripperMenu instance;
+    private static JCheckBoxMenuItem sortOptionMenuItem;
+    private static JCheckBoxMenuItem ignoreSpacesOptionMenuItem;
+
+    /**
+     * Constructor.
+     */
+    private TextStripperMenu()
+    {
+        JMenu menu = new JMenu("Text extraction options");
+
+        sortOptionMenuItem = new JCheckBoxMenuItem("sort");        
+        menu.add(sortOptionMenuItem);
+
+        ignoreSpacesOptionMenuItem = new JCheckBoxMenuItem("ignore spaces");   
     
+        menu.add(ignoreSpacesOptionMenuItem);
+
+        setMenu(menu);
+    }
+
+    /**
+     * Provides the TextStripperMenu instance.
+     *
+     * @return TextStripperMenu instance.
+     */
+    public static TextStripperMenu getInstance()
+    {
+        if (instance == null)
+        {
+            instance = new TextStripperMenu();
+        }
+        return instance;
+    }
+
+    public static boolean isSorted()
+    {
+        return sortOptionMenuItem.isSelected();
+    }
+
+    public static boolean isIgnoreSpaces()
+    {
+        return ignoreSpacesOptionMenuItem.isSelected();
+    }
+}

Modified: 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java
==============================================================================
--- 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java
      Tue Jul 29 12:29:58 2025        (r1927519)
+++ 
pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java
      Tue Jul 29 14:25:07 2025        (r1927520)
@@ -229,6 +229,10 @@ public class ViewMenu extends MenuBase
         extractTextMenuItem.setEnabled(false);
         viewMenu.add(extractTextMenuItem);
 
+        TextStripperMenu textStripperMenu = TextStripperMenu.getInstance();
+        textStripperMenu.setEnableMenu(false);
+        viewMenu.add(textStripperMenu.getMenu());
+
         viewMenu.addSeparator();
 
         repairAcroFormMenuItem = new JCheckBoxMenuItem(REPAIR_ACROFORM);

Reply via email to