This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git


The following commit(s) were added to refs/heads/master by this push:
     new 0b924b6  Ensure locale does not effect text numbers and date/times
0b924b6 is described below

commit 0b924b67de67d21226bd7674fba78a410247e3ce
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Mar 4 08:39:53 2019 -0500

    Ensure locale does not effect text numbers and date/times
    
    There were two main issues where locale could have an effect:
    
    - We previously only set grouping separator or decimal separator if the
      text number pattern contained the grouping or decimal character.
      However, even if the pattern does not contain a grouping/decimal
      character, the value from the locale can still have an effect if the
      other does exist. So, if either exists in the pattern, we need to set
      them both to ensure that the defaults from the locale do not affect
      the result.
    - When we parse an infoset string, that creates a new Calendar with some
      internal values specific to the locale. When we unparse that Calendar
      those locale specific values can have an effect on the result. To
      avoid this, copy the date/time values from the new Calendar into the
      Calendar that was created with DFDL properties, ensuring the locale
      does not effect the result.
    
    The remaining issues were related to unit tests that didn't properly set
    all the decimal format symbols, causing locale had an affect.
    
    DAFFODIL-2074
---
 .../grammar/primitives/PrimitivesTextNumber.scala  | 16 ++++++--
 .../functionality/icu/TestBigInteger.scala         |  1 +
 .../org/apache/daffodil/util/TestNumberStuff.scala | 10 ++++-
 .../unparsers/ConvertTextCalendarUnparser.scala    | 47 +++++++++++++++++-----
 .../apache/daffodil/processors/input/TestICU.scala | 30 ++++++++++++++
 .../text_number_props/TextNumberProps.tdml         |  2 +-
 6 files changed, 88 insertions(+), 18 deletions(-)

diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala
index baf5415..484082f 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala
@@ -112,17 +112,25 @@ abstract class ConvertTextNumberPrim[S](e: ElementBase)
         (Nope, Nope)
       }
 
+    // If the pattern contains any of these characters, we need to set both
+    // group and decimal separators, even if the pattern doesn't contain the
+    // associated character. This is because even when the pattern does not
+    // contain the grouping/decimal separators, ICU stills seems to take the
+    // separators into account. And since ICU provides defaut values based on
+    // locales, not setting them can cause subtle locale related bugs.
+    val requireDecGroupSeps =
+      patternStripped.contains(",") || patternStripped.contains(".") ||
+      patternStripped.contains("E") || patternStripped.contains("@")
+
     val decSep: Maybe[Evaluatable[List[String]]] =
-      if (!h.isInt && (patternStripped.contains(".") ||
-        patternStripped.contains("E") ||
-        patternStripped.contains("@"))) {
+      if (requireDecGroupSeps) {
         One(e.textStandardDecimalSeparatorEv)
       } else {
         Nope
       }
 
     val groupSep =
-      if (patternStripped.contains(",")) {
+      if (requireDecGroupSeps) {
         One(e.textStandardGroupingSeparatorEv)
       } else {
         Nope
diff --git 
a/daffodil-lib/src/test/scala/org/apache/daffodil/functionality/icu/TestBigInteger.scala
 
b/daffodil-lib/src/test/scala/org/apache/daffodil/functionality/icu/TestBigInteger.scala
index 10bea48..8374840 100644
--- 
a/daffodil-lib/src/test/scala/org/apache/daffodil/functionality/icu/TestBigInteger.scala
+++ 
b/daffodil-lib/src/test/scala/org/apache/daffodil/functionality/icu/TestBigInteger.scala
@@ -53,6 +53,7 @@ class TestBigInteger {
     val pattern = 
"000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,000"
     
     val dfs = new DecimalFormatSymbols()
+    dfs.setDecimalSeparator('.')
     dfs.setGroupingSeparator(',')
     dfs.setExponentSeparator("E")
     
diff --git 
a/daffodil-lib/src/test/scala/org/apache/daffodil/util/TestNumberStuff.scala 
b/daffodil-lib/src/test/scala/org/apache/daffodil/util/TestNumberStuff.scala
index f758033..5db167c 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/util/TestNumberStuff.scala
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/util/TestNumberStuff.scala
@@ -17,14 +17,16 @@
 
 package org.apache.daffodil.util
 
-import junit.framework.Assert._
 import com.ibm.icu.text.DecimalFormat
+import com.ibm.icu.text.DecimalFormatSymbols
 import java.text.ParsePosition
+import junit.framework.Assert._
 import org.junit.Test
-import org.apache.daffodil.Implicits._
 import org.junit.Test
 import scala.math.BigInt.int2bigInt
 
+import org.apache.daffodil.Implicits._
+
 /**
  * Tests that characterize ICU number parsing specifically with respect
  * to dealing with numbers big enough for unsignedLong.
@@ -149,7 +151,11 @@ abstract class ConvertTo[S] {
 
   lazy val parser = new NumVerifier[S] {
     def parse(str: String): S = {
+      val dfs = new DecimalFormatSymbols()
+      dfs.setDecimalSeparator('.')
+      dfs.setGroupingSeparator(',')
       val df = new DecimalFormat()
+      df.setDecimalFormatSymbols(dfs)
       val pos = new ParsePosition(0)
       val num = df.parse(str, pos)
       if (num == null) throw new Exception("not a valid representation")
diff --git 
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
 
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
index cf66c52..635cd75 100644
--- 
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
+++ 
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
@@ -45,24 +45,49 @@ case class ConvertTextCalendarUnparser(
     val locale: ULocale = localeEv.evaluate(state)
     val calendar: Calendar = calendarEv.evaluate(state)
 
-    // This initialization is needed because the calendar object may have
-    // been persisted, and that computes/completes fields that are not yet 
completed,
-    // such as the Julian day, which freezes the year to 1970.
-    // We want a fresh start on all the fields that are filled in from a parse.
-
-    calendar.clear()
-
-    val df = tlDataFormatter(locale, calendar)
-    df.setCalendar(calendar)
-
     val node = state.currentInfosetNode.asSimple
 
+    val dc = node.dataValue
+
     val calValue = node.dataValue match {
       case dc: DFDLCalendar => dc.calendar
       case x => Assert.invariantFailed("ConvertTextCalendar received 
unsupported type. %s of type %s.".format(x, Misc.getNameFromClass(x)))
     }
 
-    val str = df.format(calValue)
+    // This initialization is needed because the calendar object may have been
+    // persisted, and that computes/completes fields that are not yet
+    // completed, such as the Julian day, which freezes the year to 1970. We
+    // want a fresh start on all the fields that are filled in from a parse.
+    // Also, the tlDataFormatter below uses a cache based on the value of the
+    // calendar, so we want to ensure we always get the same data formatter if
+    // the calendar is the same
+    calendar.clear()
+
+    val df = tlDataFormatter(locale, calendar)
+    df.setCalendar(calendar)
+
+    // At this point we should be able to just do "df.format(calValue)".
+    // However, when we do that ICU actually uses some fields in calValue (e.g.
+    // minimamDaysInFirstWeek, firstDayInWeek) rather than using the fields in
+    // "calendar" set in setCalendar above. Those fields in calValue are
+    // specific to the locale used to parse the infoset value, which means the
+    // locale can effect the unparsed value. So instead of calling
+    // format(calValue), copy all the date/time fields into "calendar" (which
+    // has the appropriate settings based on DFDL properties) and then unparse
+    // that. This ensures there are no locale specific issues related to
+    // unparsing calendars.
+    calendar.set(
+      calValue.get(Calendar.EXTENDED_YEAR),
+      calValue.get(Calendar.MONTH),
+      calValue.get(Calendar.DAY_OF_MONTH),
+      calValue.get(Calendar.HOUR_OF_DAY),
+      calValue.get(Calendar.MINUTE),
+      calValue.get(Calendar.SECOND)
+    )
+    calendar.set(Calendar.MILLISECOND,   calValue.get(Calendar.MILLISECOND))
+    calendar.setTimeZone(calValue.getTimeZone)
+
+    val str = df.format(calendar)
 
     node.overwriteDataValue(str)
   }
diff --git 
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/processors/input/TestICU.scala
 
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/processors/input/TestICU.scala
index 01be886..782785f 100644
--- 
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/processors/input/TestICU.scala
+++ 
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/processors/input/TestICU.scala
@@ -141,6 +141,8 @@ class TestICU {
   @Test def test_empty_exponent_separator = {
     val dfs = new DecimalFormatSymbols()
     dfs.setExponentSeparator("")
+    dfs.setGroupingSeparator(',')
+    dfs.setDecimalSeparator('.')
     val df = new DecimalFormat("##.##E+0", dfs)
     val pp = new ParsePosition(0)
     val num = df.parse("12.34+2", pp)
@@ -149,4 +151,32 @@ class TestICU {
     //assertEquals(1234L, num)
     //assertEquals(7, pp.getIndex)
   }
+
+  // Shows that even if a decimal format pattern doesn't contain a decimal
+  // point, the decimal format separator from the locale still has an effect
+  // and can cause locale specific behavior
+  @Test def test_local_side_effect = {
+
+    // Germany's locale has a decimal separator of ','
+    val dfs = new DecimalFormatSymbols(java.util.Locale.GERMANY)
+
+    // Set the grouping separator to be the same as the decimal separator from
+    // the locale. ICU never complains
+    dfs.setGroupingSeparator(',')
+
+    // Define a pattern that only has a grouping separator--no decimal 
separator
+    val df = new DecimalFormat("###,###", dfs)
+
+    // We don't have a decimal point character in the pattern, so one might
+    // expect that only the grouping separator would be used, with a resulting
+    // value of 123456. However, the decimal separator from the locale does
+    // have an effect with ICU, which causes the result to be 123.456. This
+    // shows that it's often important to set both the grouping and decimal
+    // separators, even if the pattern only contains one. Otherwise locale
+    // information might be used when parsing.
+    val pp = new ParsePosition(0)
+    val num = df.parse("123,456", pp)
+    assertEquals(7, pp.getIndex)
+    assertEquals(123.456, num.doubleValue)
+  }
 }
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
index 8aa5a66..2879cf8 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
@@ -331,7 +331,7 @@
     <xs:element name="tnp01" type="xs:float" dfdl:textNumberRep="standard" 
dfdl:textNumberPattern="@@###E0" />
     <xs:element name="tnp02" type="xs:float" dfdl:textNumberRep="standard" 
dfdl:textNumberPattern="@@@" />
     
-    <xs:element name="tnp03" type="xs:int" dfdl:textNumberRep="standard" 
dfdl:textNumberPattern="#,###" />
+    <xs:element name="tnp03" type="xs:int" dfdl:textNumberRep="standard" 
dfdl:textNumberPattern="#,###" dfdl:textStandardDecimalSeparator="." />
   
   </tdml:defineSchema>
   

Reply via email to