Author: fanningpj
Date: Mon Jul 3 18:49:25 2017
New Revision: 1800705
URL: http://svn.apache.org/viewvc?rev=1800705&view=rev
Log:
[Bug 61246] fix issue where SXSSF sheet data has unicode surrogate chars
replaced by '?'
Added:
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSheetDataWriter.java
(with props)
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java?rev=1800705&r1=1800704&r2=1800705&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
(original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
Mon Jul 3 18:49:25 2017
@@ -122,12 +122,16 @@ public class SheetDataWriter {
* flush and close the temp data writer.
* This method <em>must</em> be invoked before calling {@link
#getWorksheetXMLInputStream()}
*/
- public void close() throws IOException{
- _out.flush();
+ public void close() throws IOException {
+ flush();
_out.close();
}
+
+ protected void flush() throws IOException {
+ _out.flush();
+ }
- protected File getTempFile(){
+ protected File getTempFile() {
return _fd;
}
@@ -329,7 +333,7 @@ public class SheetDataWriter {
}
//Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java
- protected void outputQuotedString(String s) throws IOException {
+ protected void outputQuotedString(String s) throws IOException {
if (s == null || s.length() == 0) {
return;
}
@@ -393,15 +397,21 @@ public class SheetDataWriter {
break;
default:
// YK: XmlBeans silently replaces all ISO control
characters ( < 32) with question marks.
- // the same rule applies to unicode surrogates and "not a
character" symbols.
- if( c < ' ' || Character.isLowSurrogate(c) ||
Character.isHighSurrogate(c) ||
- ('\uFFFE' <= c && c <= '\uFFFF')) {
+ // the same rule applies to "not a character" symbols.
+ if (replaceWithQuestionMark(c)) {
if (counter > last) {
_out.write(chars, last, counter - last);
}
_out.write('?');
last = counter + 1;
}
+ else if (Character.isHighSurrogate(c) ||
Character.isLowSurrogate(c)) {
+ if (counter > last) {
+ _out.write(chars, last, counter - last);
+ }
+ _out.write(c);
+ last = counter + 1;
+ }
else if (c > 127) {
if (counter > last) {
_out.write(chars, last, counter - last);
@@ -421,6 +431,10 @@ public class SheetDataWriter {
}
}
+ static boolean replaceWithQuestionMark(char c) {
+ return c < ' ' || ('\uFFFE' <= c && c <= '\uFFFF');
+ }
+
/**
* Deletes the temporary file that backed this sheet on disk.
* @return true if the file was deleted, false if it wasn't.
Added:
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSheetDataWriter.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSheetDataWriter.java?rev=1800705&view=auto
==============================================================================
---
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSheetDataWriter.java
(added)
+++
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSheetDataWriter.java
Mon Jul 3 18:49:25 2017
@@ -0,0 +1,75 @@
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.xssf.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.poi.util.IOUtils;
+import org.junit.Test;
+
+public final class TestSheetDataWriter {
+
+ final String unicodeSurrogates =
"\uD835\uDF4A\uD835\uDF4B\uD835\uDF4C\uD835\uDF4D\uD835\uDF4E"
+ +
"\uD835\uDF4F\uD835\uDF50\uD835\uDF51\uD835\uDF52\uD835\uDF53\uD835\uDF54\uD835"
+ +
"\uDF55\uD835\uDF56\uD835\uDF57\uD835\uDF58\uD835\uDF59\uD835\uDF5A\uD835\uDF5B"
+ +
"\uD835\uDF5C\uD835\uDF5D\uD835\uDF5E\uD835\uDF5F\uD835\uDF60\uD835\uDF61\uD835"
+ +
"\uDF62\uD835\uDF63\uD835\uDF64\uD835\uDF65\uD835\uDF66\uD835\uDF67\uD835\uDF68"
+ +
"\uD835\uDF69\uD835\uDF6A\uD835\uDF6B\uD835\uDF6C\uD835\uDF6D\uD835\uDF6E\uD835"
+ +
"\uDF6F\uD835\uDF70\uD835\uDF71\uD835\uDF72\uD835\uDF73\uD835\uDF74\uD835\uDF75"
+ + "\uD835\uDF76\uD835\uDF77\uD835\uDF78\uD835\uDF79\uD835\uDF7A";
+
+ @Test
+ public void testReplaceWithQuestionMark() {
+ for(int i = 0; i < unicodeSurrogates.length(); i++) {
+
assertFalse(SheetDataWriter.replaceWithQuestionMark(unicodeSurrogates.charAt(i)));
+ }
+ assertTrue(SheetDataWriter.replaceWithQuestionMark('\uFFFE'));
+ assertTrue(SheetDataWriter.replaceWithQuestionMark('\uFFFF'));
+ assertTrue(SheetDataWriter.replaceWithQuestionMark('\u0000'));
+ assertTrue(SheetDataWriter.replaceWithQuestionMark('\u000F'));
+ assertTrue(SheetDataWriter.replaceWithQuestionMark('\u001F'));
+ }
+
+ @Test
+ public void testWriteUnicodeSurrogates() throws IOException {
+ SheetDataWriter writer = new SheetDataWriter();
+ try {
+ writer.outputQuotedString(unicodeSurrogates);
+ writer.flush();
+ File file = writer.getTempFile();
+ FileInputStream is = new FileInputStream(file);
+ String text;
+ try {
+ text = new String(IOUtils.toByteArray(is), "UTF-8");
+ } finally {
+ is.close();
+ }
+ assertEquals(unicodeSurrogates, text);
+ } finally {
+ writer.close();
+ }
+ }
+}
Propchange:
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSheetDataWriter.java
------------------------------------------------------------------------------
svn:eol-style = native
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]