Author: fanningpj
Date: Fri Mar 17 23:35:33 2023
New Revision: 1908458

URL: http://svn.apache.org/viewvc?rev=1908458&view=rev
Log:
[bug-66532] more performant way to iterate over codepoints. Thanks to Matthias 
Raschhofer

Modified:
    
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
    poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java
    poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java

Modified: 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java?rev=1908458&r1=1908457&r2=1908458&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
 (original)
+++ 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
 Fri Mar 17 23:35:33 2023
@@ -31,6 +31,7 @@ import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.nio.charset.StandardCharsets;
 import java.util.Iterator;
+import java.util.PrimitiveIterator;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -397,37 +398,39 @@ public class SheetDataWriter implements
             return;
         }
 
-        for (Iterator<String> iter = CodepointsUtil.iteratorFor(s); 
iter.hasNext(); ) {
-            String codepoint = iter.next();
+        int codepoint;
+        for (PrimitiveIterator.OfInt iter = 
CodepointsUtil.primitiveIterator(s); iter.hasNext(); ) {
+            codepoint = iter.nextInt();
             switch (codepoint) {
-                case "<":
+                case '<':
                     _out.write("&lt;");
                     break;
-                case ">":
+                case '>':
                     _out.write("&gt;");
                     break;
-                case "&":
+                case '&':
                     _out.write("&amp;");
                     break;
-                case "\"":
+                case '\"':
                     _out.write("&quot;");
                     break;
                 // Special characters
-                case "\n":
+                case '\n':
                     _out.write("&#xa;");
                     break;
-                case "\r":
+                case '\r':
                     _out.write("&#xd;");
                     break;
-                case "\t":
+                case '\t':
                     _out.write("&#x9;");
                     break;
-                case "\u00A0": // NO-BREAK SPACE
+                case '\u00A0': // NO-BREAK SPACE
                     _out.write("&#xa0;");
                     break;
                 default:
-                    if (codepoint.length() == 1) {
-                        char c = codepoint.charAt(0);
+                    final char[] chars = Character.toChars(codepoint);
+                    if (chars.length == 1) {
+                        char c = chars[0];
                         // YK: XmlBeans silently replaces all ISO control 
characters ( < 32) with question marks.
                         // the same rule applies to "not a character" symbols.
                         if (replaceWithQuestionMark(c)) {
@@ -436,7 +439,7 @@ public class SheetDataWriter implements
                             _out.write(c);
                         }
                     } else {
-                        _out.write(codepoint);
+                        _out.write(chars);
                     }
                     break;
             }

Modified: poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java?rev=1908458&r1=1908457&r2=1908458&view=diff
==============================================================================
--- poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java 
(original)
+++ poi/trunk/poi/src/main/java/org/apache/poi/util/CodepointsUtil.java Fri Mar 
17 23:35:33 2023
@@ -18,12 +18,28 @@
 package org.apache.poi.util;
 
 import java.util.Iterator;
+import java.util.PrimitiveIterator;
 
 @Internal
 public class CodepointsUtil {
+
+    /**
+     * @param text to iterate over
+     * @return iterator with Strings representing the codepoints
+     * @see #primitiveIterator(String) a more performnt iterator
+     */
     public static Iterator<String> iteratorFor(String text) {
         return text.codePoints()
                 .mapToObj(codePoint -> new 
String(Character.toChars(codePoint)))
                 .iterator();
     }
+
+    /**
+     * @param text to iterate over
+     * @return iterator with ints representing the codepoints
+     * @since POI 5.2.4
+     */
+    public static PrimitiveIterator.OfInt primitiveIterator(String text) {
+        return text.codePoints().iterator();
+    }
 }
\ No newline at end of file

Modified: 
poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java?rev=1908458&r1=1908457&r2=1908458&view=diff
==============================================================================
--- poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java 
(original)
+++ poi/trunk/poi/src/test/java/org/apache/poi/util/TestCodepointsUtil.java Fri 
Mar 17 23:35:33 2023
@@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Asse
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import java.util.function.IntConsumer;
 
 import org.junit.jupiter.api.Test;
 
@@ -38,8 +39,22 @@ class TestCodepointsUtil {
         List<String> codePoints = new ArrayList<>();
         
CodepointsUtil.iteratorFor(unicodeSurrogates).forEachRemaining(codePoints::add);
         assertEquals(17, codePoints.size());
-        for(String point : codePoints){
-            assertTrue(point.length() >=1 && point.length() <= 2, "codepoint " 
+ point + "is wrong size");
+        for (String point : codePoints) {
+            assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint 
" + point + "is wrong size");
+        }
+    }
+
+    @Test
+    void testPrimitiveIterator() {
+        final String unicodeSurrogates = 
"\uD835\uDF4A\uD835\uDF4B\uD835\uDF4C\uD835\uDF4D\uD835\uDF4E"
+                + "abcdef123456";
+        List<String> codePoints = new ArrayList<>();
+        
CodepointsUtil.primitiveIterator(unicodeSurrogates).forEachRemaining((IntConsumer)
 (i) -> {
+                    codePoints.add(new String(Character.toChars(i)));
+                });
+        assertEquals(17, codePoints.size());
+        for (String point : codePoints) {
+            assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint 
" + point + "is wrong size");
         }
     }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to