This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git
The following commit(s) were added to refs/heads/master by this push:
new a01cc32 Fix non-thread safe code related to text date times
a01cc32 is described below
commit a01cc32375a50761a9688c6243c245df59ba85a0
Author: Steve Lawrence <[email protected]>
AuthorDate: Fri Mar 15 10:16:27 2019 -0400
Fix non-thread safe code related to text date times
We create an ICU Calendar object to handle parsing/unparsing text date
times. Unfortunately, this Calendar object isn't thread safe, but we
share it among all threads when all the DFDL calendar properties are
constant. This can lead to random errors.
To make it thread safe, change the text calendar code to never modify
the original Calendar object, but instead create a clone of it before
use and use the clone for all dateTime calculations. The effectively
treats the original calendar as immutable and avoids thread safety
issues. Also reorganize the code a bit and rename some variables to make
the purpose of the different Calendar objects more clear.
DAFFODIL-2097
---
.../unparsers/ConvertTextCalendarUnparser.scala | 71 ++++++++++++++--------
.../daffodil/processors/EvCalendarLanguage.scala | 6 ++
.../processors/parsers/PrimitivesDateTime1.scala | 60 ++++++++++++------
3 files changed, 93 insertions(+), 44 deletions(-)
diff --git
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
index 635cd75..3636cb7 100644
---
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
+++
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ConvertTextCalendarUnparser.scala
@@ -42,51 +42,72 @@ case class ConvertTextCalendarUnparser(
override lazy val runtimeDependencies = Vector(localeEv, calendarEv)
def unparse(state: UState): Unit = {
- val locale: ULocale = localeEv.evaluate(state)
- val calendar: Calendar = calendarEv.evaluate(state)
val node = state.currentInfosetNode.asSimple
val dc = node.dataValue
- val calValue = node.dataValue match {
+ val infosetCalendar = node.dataValue match {
case dc: DFDLCalendar => dc.calendar
case x => Assert.invariantFailed("ConvertTextCalendar received
unsupported type. %s of type %s.".format(x, Misc.getNameFromClass(x)))
}
- // This initialization is needed because the calendar object may have been
- // persisted, and that computes/completes fields that are not yet
- // completed, such as the Julian day, which freezes the year to 1970. We
- // want a fresh start on all the fields that are filled in from a parse.
- // Also, the tlDataFormatter below uses a cache based on the value of the
- // calendar, so we want to ensure we always get the same data formatter if
- // the calendar is the same
- calendar.clear()
+ val locale: ULocale = localeEv.evaluate(state)
+
+ val calendarOrig: Calendar = calendarEv.evaluate(state)
- val df = tlDataFormatter(locale, calendar)
- df.setCalendar(calendar)
+ // The clear() here actually shouldn't be necessary since we call clear()
+ // when we create the calendar in CalendarEv, and nothing ever modifies
+ // that calendar--we only modify clones. However, it looks like the act of
+ // deserializing a Calendar object causes values to be initialized again.
+ // So if someone uses a saved parser this calendar will have garbage in it
+ // that can affect the results. So clear it here to make sure that's not
+ // the case.
+ calendarOrig.clear()
- // At this point we should be able to just do "df.format(calValue)".
- // However, when we do that ICU actually uses some fields in calValue (e.g.
+ // It's important here to use the calendarOrig that results from
+ // calendarEv.evaluate() since the tlDataFormatter is a cache the uses
+ // reference equality. For everything else we want to use a clone for
+ // reasons described below.
+ val df = tlDataFormatter(locale, calendarOrig)
+
+ // When we evaluate calendarEV, if it is a constant we will always get back
+ // the same Calendar object. Because of this it is important here to clone
+ // this calendar and always use the clone below for two reasons:
+ //
+ // 1) The below code will modify the calendar object based on the dateTime
+ // in the infoset. Any changes to the object would persist and affect
+ // future unparses. By cloning, we ensure we do not modify the original
+ // calendar object that other unparses will use.
+ //
+ // 2) Multiple threads would have access to the same Calendar object, and
+ // so the below code could modify the same object at the same time
+ // across threads. By cloning, we ensure that they modify different
+ // objects.
+ val calendar = calendarOrig.clone().asInstanceOf[Calendar]
+
+ // At this point we should be able to just do "df.format(infosetCalendar)".
+ // However, when we do that ICU actually uses some fields in
infosetCalendar (e.g.
// minimamDaysInFirstWeek, firstDayInWeek) rather than using the fields in
- // "calendar" set in setCalendar above. Those fields in calValue are
+ // "calendar" set in setCalendar above. Those fields in infosetCalendar are
// specific to the locale used to parse the infoset value, which means the
// locale can effect the unparsed value. So instead of calling
- // format(calValue), copy all the date/time fields into "calendar" (which
+ // format(infosetCalendar), copy all the date/time fields into "calendar"
(which
// has the appropriate settings based on DFDL properties) and then unparse
// that. This ensures there are no locale specific issues related to
// unparsing calendars.
calendar.set(
- calValue.get(Calendar.EXTENDED_YEAR),
- calValue.get(Calendar.MONTH),
- calValue.get(Calendar.DAY_OF_MONTH),
- calValue.get(Calendar.HOUR_OF_DAY),
- calValue.get(Calendar.MINUTE),
- calValue.get(Calendar.SECOND)
+ infosetCalendar.get(Calendar.EXTENDED_YEAR),
+ infosetCalendar.get(Calendar.MONTH),
+ infosetCalendar.get(Calendar.DAY_OF_MONTH),
+ infosetCalendar.get(Calendar.HOUR_OF_DAY),
+ infosetCalendar.get(Calendar.MINUTE),
+ infosetCalendar.get(Calendar.SECOND)
)
- calendar.set(Calendar.MILLISECOND, calValue.get(Calendar.MILLISECOND))
- calendar.setTimeZone(calValue.getTimeZone)
+ calendar.set(Calendar.MILLISECOND,
infosetCalendar.get(Calendar.MILLISECOND))
+ calendar.setTimeZone(infosetCalendar.getTimeZone)
+ df.setCalendar(calendar)
val str = df.format(calendar)
node.overwriteDataValue(str)
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvCalendarLanguage.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvCalendarLanguage.scala
index 2cc5511..5d9025e 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvCalendarLanguage.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvCalendarLanguage.scala
@@ -79,6 +79,12 @@ class CalendarEv(localeEv: CalendarLanguageEv,
if (calendarTz.isDefined) calendarTz.get else TimeZone.UNKNOWN_ZONE
}
cal.setTimeZone(tz)
+
+ // The Calendar is initialized with time values based on the current system
+ // which aren't always overwritten during a parse. We don't want that, so
+ // clear out those values.
+ cal.clear()
+
cal
}
}
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PrimitivesDateTime1.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PrimitivesDateTime1.scala
index 5825ca8..0031dc1 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PrimitivesDateTime1.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PrimitivesDateTime1.scala
@@ -105,18 +105,40 @@ case class ConvertTextCalendarParser(
val pos = new ParsePosition(0)
val locale: ULocale = localeEv.evaluate(start)
- val calendar: Calendar = calendarEv.evaluate(start)
- // This initialization is needed because the calendar object may have
- // been persisted, and that computes/completes fields that are not yet
completed,
- // such as the Julian day, which freezes the year to 1970.
- // We want a fresh start on all the fields that are filled in from a parse.
-
- calendar.clear()
-
- val df = tlDataFormatter(locale, calendar)
- val cal = df.getCalendar.clone.asInstanceOf[Calendar]
- df.parse(str, cal, pos);
+ val calendarOrig: Calendar = calendarEv.evaluate(start)
+
+ // The clear here actually shouldn't be necessary since we call clear()
+ // when we create the calendar in CalendarEv, and nothing ever modifies
+ // that calendar, we only modify clones. However, it looks like the act of
+ // deserializing a Calendar object causes values to be initialized again.
+ // So if someone uses a saved parser this calendar will have garbage in it
+ // that can affect the results. So clear it here to make sure that's not
+ // the case.
+ calendarOrig.clear()
+
+ // It's important here to use the calendarOrig that results from
+ // calendarEv.evaluate() since the tlDataFormatter is a cache the uses
+ // reference equality. For everything else we want to use a clone for
+ // reasons described below.
+ val df = tlDataFormatter(locale, calendarOrig)
+
+ // When we evaluate calendarEV, if it is a constant we will always get back
+ // the same Calendar object. Because of this it is important here to clone
+ // this calendar and always use the clone below for two reasons:
+ //
+ // 1) The below code will modify modify the calendar object based on the
+ // value of the parsed string. Any changes to the object will persist
+ // and could affect future parses. By cloning, we ensure we do not
+ // modify the original calendar object.
+ //
+ // 2) Multiple threads would have access to the same Calendar object, and
+ // so the below could modify the same object at the same time. By
+ // cloning, we ensure that they modify different objects.
+ val calendar = calendarOrig.clone().asInstanceOf[Calendar]
+
+ df.setCalendar(calendar)
+ df.parse(str, calendar, pos);
// Verify that what was parsed was what was passed exactly in byte count
// Use pos to verify all characters consumed & check for errors
@@ -131,11 +153,11 @@ case class ConvertTextCalendarParser(
// try to calculate the time, which forces validation. This causes an
// exception to be thrown if a Calendar is not valid.
try {
- cal.getTime
- if ((cal.get(Calendar.YEAR) > start.tunable.maxValidYear) ||
(cal.get(Calendar.YEAR) < start.tunable.minValidYear))
+ calendar.getTime
+ if ((calendar.get(Calendar.YEAR) > start.tunable.maxValidYear) ||
(calendar.get(Calendar.YEAR) < start.tunable.minValidYear))
throw new TunableLimitExceededError(erd.schemaFileLocation,
"Year value of %s is not within the limits of the tunables
minValidYear (%s) and maxValidYear (%s)",
- cal.get(Calendar.YEAR), start.tunable.minValidYear,
start.tunable.maxValidYear)
+ calendar.get(Calendar.YEAR), start.tunable.minValidYear,
start.tunable.maxValidYear)
} catch {
case e: IllegalArgumentException => {
PE(start, "Convert to %s (for xs:%s): Failed to parse '%s': %s.",
prettyType, xsdType, str, e.getMessage())
@@ -143,14 +165,14 @@ case class ConvertTextCalendarParser(
}
}
- val newCal = xsdType.toLowerCase() match {
- case "time" => DFDLTime(cal, hasTZ)
- case "date" => DFDLDate(cal, hasTZ)
- case "datetime" => DFDLDateTime(cal, hasTZ)
+ val infosetCalendar = xsdType.toLowerCase() match {
+ case "time" => DFDLTime(calendar, hasTZ)
+ case "date" => DFDLDate(calendar, hasTZ)
+ case "datetime" => DFDLDateTime(calendar, hasTZ)
case _ => Assert.impossibleCase
}
- node.overwriteDataValue(newCal)
+ node.overwriteDataValue(infosetCalendar)
}
}