tuxji commented on code in PR #1085:
URL: https://github.com/apache/daffodil/pull/1085#discussion_r1325007515
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/TypeCalculator.scala:
##########
@@ -17,288 +17,9 @@
package org.apache.daffodil.runtime1.processors
-import scala.collection.immutable.HashMap
-import scala.collection.immutable.HashSet
-
-import org.apache.daffodil.lib.exceptions.Assert
-import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Numbers
-import org.apache.daffodil.runtime1.dpath.NodeInfo
-import org.apache.daffodil.runtime1.infoset.DataValue
import org.apache.daffodil.runtime1.infoset.DataValue.DataValuePrimitive
import
org.apache.daffodil.runtime1.infoset.DataValue.DataValuePrimitiveNullable
-import org.apache.daffodil.runtime1.processors.parsers.PState
-import org.apache.daffodil.runtime1.processors.parsers.ParseError
-import org.apache.daffodil.runtime1.processors.unparsers.UState
-
-/**
- * A TypeCalculator is a class that performs some kind of conversion from one
DataValuePrimitive
- * to another DataValuePrimitive. In general, this is done to convert a parsed
physical
- * representation from the data to a logical representation that goes in the
infoset, and
- * reverse on unparse. The former is done in the inputTypeCalc functions, and
the latter in the
- * outputTypeCalc functions. While these TypeCalculators are generic can in
theory support
- * convertting between any types, their only use is converting from and int to
a string for
- * parse, and the reverse for unparse.
- *
- * The different TypeCalculator implementations allow for different
conversions based on
- * different properties provided in the schema (e.g. repType, enumerations,
unions, repValues,
- * repValueRanges).
- */
-abstract class TypeCalculator(val srcType: NodeInfo.Kind, val dstType:
NodeInfo.Kind)
- extends Serializable {
- type Error = String
-
- /**
- * The inputTypeCalc function provides only the conversion logic, returning
either the new
- * value or an error string. This allows it to be agnostic about how to
handle the resulting
- * value or error, assuming the caller knows best. If a caller needs
normalization and
- * errors to become ParseErrors, they should instead call inputTypeCalcParse.
- */
- def inputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable]
-
- /**
- * The outputTypeCalc function provides only the conversion logic, returning
either the new
- * value or an error string. This allows it to be agnostic about how to
handle the resulting
- * value or error, assuming the caller knows best. If a caller needs
normalization and
- * errors to become UnparseErrors, they should instead call
outputTypeCalcUnparse.
- */
- def outputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable]
-
- final def inputTypeCalcParse(
- pstate: PState,
- context: RuntimeData,
- x_in: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): DataValuePrimitiveNullable = {
- val x = normalizeArg(x_in, xType)
- val res = inputTypeCalc(x, xType) match {
- case Left(err) => {
- val diag = new ParseError(
- Maybe(context.schemaFileLocation),
- Maybe(pstate.currentLocation),
- err,
- )
- pstate.setFailed(diag)
- DataValue.NoValue
- }
- case Right(ans) => ans
- }
- res
- }
-
- final def outputTypeCalcUnparse(
- ustate: UState,
- context: RuntimeData,
- x_in: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): DataValuePrimitiveNullable = {
- val x = normalizeArg(x_in, xType)
- val res = outputTypeCalc(x, xType) match {
- case Left(err) => {
- val diag = new ParseError(
- Maybe(context.schemaFileLocation),
- Maybe(ustate.currentLocation),
- err,
- )
- ustate.setFailed(diag)
- DataValue.NoValue
- }
- case Right(ans) => ans
- }
- res
- }
-
- /*
- * In theory, this normalizeArg method should not be nessasary. We know at
compile time what
- * types a given calculator is defined in terms of, so the compiler should
insert any conversion
- * nessasary so that the type being passed in is what the calculator was
defined in terms of.
- * In practice, we are not doing that. Instead, we convert all numeric types
to JBigInt.
- */
- protected def normalizeArg(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): DataValuePrimitive = {
- if (xType == NodeInfo.String) {
- x
- } else {
- Numbers.asBigInt(x.getAnyRef)
- }
- }
-}
-
-/*
- * We define valueMap in terms of AnyRef instead of DataValuePrimitive,
because HashMap is polymorphic,
- * so using DataValuePrimitive would trigger boxing
- */
-class KeysetValueTypeCalculatorOrdered(
- valueMap: HashMap[DataValuePrimitive, DataValuePrimitive],
- rangeTable: Seq[(RangeBound, RangeBound, DataValuePrimitive)],
- unparseMap: HashMap[DataValuePrimitive, DataValuePrimitive],
- srcType: NodeInfo.Kind,
- dstType: NodeInfo.Kind,
-) extends TypeCalculator(srcType, dstType) {
-
- override def inputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable] = {
- if (valueMap.contains(x)) {
- Right(valueMap.get(x).get)
- } else {
- val ans1: Option[(RangeBound, RangeBound, DataValuePrimitiveNullable)] =
rangeTable.find({
- case (min, max, _) => {
- min.testAsLower(x) && max.testAsUpper(x)
- }
- })
- ans1 match {
- case None => {
- Left(s"Value ${x} not found in enumeration dfdlx:repValues")
- }
- case Some((_, _, v)) => Right(v)
- }
- }
- }
-
- override def outputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable] = {
- unparseMap.get(x) match {
- case Some(v) => Right(v)
- case None => Left(s"Value ${x} not found in enumeration")
- }
- }
-
-}
-
-class KeysetValueTypeCalculatorUnordered(
- valueMap: HashMap[DataValuePrimitive, DataValuePrimitive],
- unparseMap: HashMap[DataValuePrimitive, DataValuePrimitive],
- srcType: NodeInfo.Kind,
- dstType: NodeInfo.Kind,
-) extends TypeCalculator(srcType, dstType) {
-
- override def inputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable] = {
- valueMap.get(x) match {
- case Some(a) => Right(a)
- case None => Left(s"Value ${x} not found in enumeration dfdlx:repValues")
- }
- }
-
- override def outputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable] = {
- unparseMap.get(x) match {
- case Some(v) => Right(v)
- case None => Left(s"Value ${x} not found in enumeration")
- }
- }
-
-}
-
-class IdentityTypeCalculator(srcType: NodeInfo.Kind) extends
TypeCalculator(srcType, srcType) {
- override def inputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable] = Right(x)
- override def outputTypeCalc(
- x: DataValuePrimitive,
- xType: NodeInfo.Kind,
- ): Either[Error, DataValuePrimitiveNullable] = Right(x)
-}
-
-/*
- * Since we can inherit the restriction from xsd facets, we also need to be
able to support an
- * aribitrary subset of: minInclusive, minExclusive, maxInclusive, and
maxExclusive
- */
-class RepValueSet(
- val valueSet: HashSet[DataValuePrimitive],
- val valueRanges: Set[(RangeBound, RangeBound)],
-) extends Serializable {
-
- def merge(other: RepValueSet): RepValueSet = {
- val valueSet_ = valueSet ++ other.valueSet
- val valueRanges_ = valueRanges ++ other.valueRanges
- new RepValueSet(valueSet_, valueRanges_)
- }
-
- lazy val isEmpty: Boolean = valueSet.isEmpty && valueRanges.isEmpty
-
-}
-
-//TODO, many of the key/values we receive will be BigInt.
-//We should check if we can safely convert them to Long
-
-object RepValueSetCompiler {
- def compile(
- valueSet: Seq[DataValuePrimitive],
- valuesRanges: Seq[(RangeBound, RangeBound)],
- ): RepValueSet = {
- val hashSet = HashSet.empty ++ valueSet
- val rangeSet = Set.empty ++ valuesRanges.filter(x => x._1.isDefined ||
x._2.isDefined)
- new RepValueSet(hashSet, rangeSet)
- }
- def empty: RepValueSet = compile(Seq.empty, Seq.empty)
-}
-
-object TypeCalculatorCompiler {
-
- // mappings: [(keySet, canonicalKey, value)]
- def compileKeysetValue(
- mappings: Seq[(RepValueSet, DataValuePrimitive, DataValuePrimitive)],
- srcType: NodeInfo.Kind,
- dstType: NodeInfo.Kind,
- ): TypeCalculator = {
- Assert.invariant(!mappings.isEmpty)
-
- /*
- * We need to cast to HashMap, because the type of HashMap.++ returns a
generic Map
- * HashMap.+ returns a HashMap, so we can avoid the case by doing the fold
ourself
- */
- val valueMap: HashMap[DataValuePrimitive, DataValuePrimitive] =
- (HashMap.empty ++ mappings.flatMap(x => {
- val (keySet, _, value) = x
- keySet.valueSet.map((_, value))
- })).asInstanceOf[HashMap[DataValuePrimitive, DataValuePrimitive]]
- val rangeTable: Seq[(RangeBound, RangeBound, DataValuePrimitive)] =
mappings.flatMap(x => {
- val (keySet, _, value) = x
- keySet.valueRanges.map({ case (min, max) => (min, max, value) })
- })
- val unparseMap: HashMap[DataValuePrimitive, DataValuePrimitive] =
- (HashMap.empty ++ mappings.map(x => {
- val (_, canonicalKey, value) = x
- (value, canonicalKey)
- })).asInstanceOf[HashMap[DataValuePrimitive, DataValuePrimitive]]
-
- /*
- * Type erasure makes dispatching based on if we have an ordered keyset or
not difficult
- * Really, the problem is that we should not be using AnyRef at all,
- * but since the DPath library is based around AnyRef, we are stuck with it
- */
- rangeTable match {
- case Seq() =>
- new KeysetValueTypeCalculatorUnordered(valueMap, unparseMap, srcType,
dstType)
- case _ => {
- new KeysetValueTypeCalculatorOrdered(valueMap, rangeTable, unparseMap,
srcType, dstType)
- }
- }
- }
-
- def compileIdentity(srcType: NodeInfo.Kind): TypeCalculator = new
IdentityTypeCalculator(
- srcType,
- )
-
-}
object Range {
Review Comment:
Should we rename TypeCalculator.scala to Range.scala or RangeBound.scala
since it defines only those types now?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]