[GitHub] [daffodil] tuxji commented on a diff in pull request #1085: Improve and narrow dfdlx:repType extension implementaion

via GitHub Wed, 13 Sep 2023 12:59:08 -0700


tuxji commented on code in PR #1085:
URL: https://github.com/apache/daffodil/pull/1085#discussion_r1325007515



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/TypeCalculator.scala:
##########
@@ -17,288 +17,9 @@
 
 package org.apache.daffodil.runtime1.processors
 
-import scala.collection.immutable.HashMap
-import scala.collection.immutable.HashSet
-
-import org.apache.daffodil.lib.exceptions.Assert
-import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Numbers
-import org.apache.daffodil.runtime1.dpath.NodeInfo
-import org.apache.daffodil.runtime1.infoset.DataValue
 import org.apache.daffodil.runtime1.infoset.DataValue.DataValuePrimitive
 import 
org.apache.daffodil.runtime1.infoset.DataValue.DataValuePrimitiveNullable
-import org.apache.daffodil.runtime1.processors.parsers.PState
-import org.apache.daffodil.runtime1.processors.parsers.ParseError
-import org.apache.daffodil.runtime1.processors.unparsers.UState
-
-/**
- * A TypeCalculator is a class that performs some kind of conversion from one 
DataValuePrimitive
- * to another DataValuePrimitive. In general, this is done to convert a parsed 
physical
- * representation from the data to a logical representation that goes in the 
infoset, and
- * reverse on unparse. The former is done in the inputTypeCalc functions, and 
the latter in the
- * outputTypeCalc functions. While these TypeCalculators are generic can in 
theory support
- * convertting between any types, their only use is converting from and int to 
a string for
- * parse, and the reverse for unparse.
- *
- * The different TypeCalculator implementations allow for different 
conversions based on
- * different properties provided in the schema (e.g. repType, enumerations, 
unions, repValues,
- * repValueRanges).
- */
-abstract class TypeCalculator(val srcType: NodeInfo.Kind, val dstType: 
NodeInfo.Kind)
-  extends Serializable {
-  type Error = String
-
-  /**
-   * The inputTypeCalc function provides only the conversion logic, returning 
either the new
-   * value or an error string. This allows it to be agnostic about how to 
handle the resulting
-   * value or error, assuming the caller knows best. If a caller needs 
normalization and
-   * errors to become ParseErrors, they should instead call inputTypeCalcParse.
-   */
-  def inputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable]
-
-  /**
-   * The outputTypeCalc function provides only the conversion logic, returning 
either the new
-   * value or an error string. This allows it to be agnostic about how to 
handle the resulting
-   * value or error, assuming the caller knows best. If a caller needs 
normalization and
-   * errors to become UnparseErrors, they should instead call 
outputTypeCalcUnparse.
-   */
-  def outputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable]
-
-  final def inputTypeCalcParse(
-    pstate: PState,
-    context: RuntimeData,
-    x_in: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): DataValuePrimitiveNullable = {
-    val x = normalizeArg(x_in, xType)
-    val res = inputTypeCalc(x, xType) match {
-      case Left(err) => {
-        val diag = new ParseError(
-          Maybe(context.schemaFileLocation),
-          Maybe(pstate.currentLocation),
-          err,
-        )
-        pstate.setFailed(diag)
-        DataValue.NoValue
-      }
-      case Right(ans) => ans
-    }
-    res
-  }
-
-  final def outputTypeCalcUnparse(
-    ustate: UState,
-    context: RuntimeData,
-    x_in: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): DataValuePrimitiveNullable = {
-    val x = normalizeArg(x_in, xType)
-    val res = outputTypeCalc(x, xType) match {
-      case Left(err) => {
-        val diag = new ParseError(
-          Maybe(context.schemaFileLocation),
-          Maybe(ustate.currentLocation),
-          err,
-        )
-        ustate.setFailed(diag)
-        DataValue.NoValue
-      }
-      case Right(ans) => ans
-    }
-    res
-  }
-
-  /*
-   * In theory, this normalizeArg method should not be nessasary. We know at 
compile time what
-   * types a given calculator is defined in terms of, so the compiler should 
insert any conversion
-   * nessasary so that the type being passed in is what the calculator was 
defined in terms of.
-   * In practice, we are not doing that. Instead, we convert all numeric types 
to JBigInt.
-   */
-  protected def normalizeArg(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): DataValuePrimitive = {
-    if (xType == NodeInfo.String) {
-      x
-    } else {
-      Numbers.asBigInt(x.getAnyRef)
-    }
-  }
-}
-
-/*
- * We define valueMap in terms of AnyRef instead of DataValuePrimitive, 
because HashMap is polymorphic,
- * so using DataValuePrimitive would trigger boxing
- */
-class KeysetValueTypeCalculatorOrdered(
-  valueMap: HashMap[DataValuePrimitive, DataValuePrimitive],
-  rangeTable: Seq[(RangeBound, RangeBound, DataValuePrimitive)],
-  unparseMap: HashMap[DataValuePrimitive, DataValuePrimitive],
-  srcType: NodeInfo.Kind,
-  dstType: NodeInfo.Kind,
-) extends TypeCalculator(srcType, dstType) {
-
-  override def inputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable] = {
-    if (valueMap.contains(x)) {
-      Right(valueMap.get(x).get)
-    } else {
-      val ans1: Option[(RangeBound, RangeBound, DataValuePrimitiveNullable)] = 
rangeTable.find({
-        case (min, max, _) => {
-          min.testAsLower(x) && max.testAsUpper(x)
-        }
-      })
-      ans1 match {
-        case None => {
-          Left(s"Value ${x} not found in enumeration dfdlx:repValues")
-        }
-        case Some((_, _, v)) => Right(v)
-      }
-    }
-  }
-
-  override def outputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable] = {
-    unparseMap.get(x) match {
-      case Some(v) => Right(v)
-      case None => Left(s"Value ${x} not found in enumeration")
-    }
-  }
-
-}
-
-class KeysetValueTypeCalculatorUnordered(
-  valueMap: HashMap[DataValuePrimitive, DataValuePrimitive],
-  unparseMap: HashMap[DataValuePrimitive, DataValuePrimitive],
-  srcType: NodeInfo.Kind,
-  dstType: NodeInfo.Kind,
-) extends TypeCalculator(srcType, dstType) {
-
-  override def inputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable] = {
-    valueMap.get(x) match {
-      case Some(a) => Right(a)
-      case None => Left(s"Value ${x} not found in enumeration dfdlx:repValues")
-    }
-  }
-
-  override def outputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable] = {
-    unparseMap.get(x) match {
-      case Some(v) => Right(v)
-      case None => Left(s"Value ${x} not found in enumeration")
-    }
-  }
-
-}
-
-class IdentityTypeCalculator(srcType: NodeInfo.Kind) extends 
TypeCalculator(srcType, srcType) {
-  override def inputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable] = Right(x)
-  override def outputTypeCalc(
-    x: DataValuePrimitive,
-    xType: NodeInfo.Kind,
-  ): Either[Error, DataValuePrimitiveNullable] = Right(x)
-}
-
-/*
- * Since we can inherit the restriction from xsd facets, we also need to be 
able to support an
- * aribitrary subset of: minInclusive, minExclusive, maxInclusive, and 
maxExclusive
- */
-class RepValueSet(
-  val valueSet: HashSet[DataValuePrimitive],
-  val valueRanges: Set[(RangeBound, RangeBound)],
-) extends Serializable {
-
-  def merge(other: RepValueSet): RepValueSet = {
-    val valueSet_ = valueSet ++ other.valueSet
-    val valueRanges_ = valueRanges ++ other.valueRanges
-    new RepValueSet(valueSet_, valueRanges_)
-  }
-
-  lazy val isEmpty: Boolean = valueSet.isEmpty && valueRanges.isEmpty
-
-}
-
-//TODO, many of the key/values we receive will be BigInt.
-//We should check if we can safely convert them to Long
-
-object RepValueSetCompiler {
-  def compile(
-    valueSet: Seq[DataValuePrimitive],
-    valuesRanges: Seq[(RangeBound, RangeBound)],
-  ): RepValueSet = {
-    val hashSet = HashSet.empty ++ valueSet
-    val rangeSet = Set.empty ++ valuesRanges.filter(x => x._1.isDefined || 
x._2.isDefined)
-    new RepValueSet(hashSet, rangeSet)
-  }
-  def empty: RepValueSet = compile(Seq.empty, Seq.empty)
-}
-
-object TypeCalculatorCompiler {
-
-  // mappings: [(keySet, canonicalKey, value)]
-  def compileKeysetValue(
-    mappings: Seq[(RepValueSet, DataValuePrimitive, DataValuePrimitive)],
-    srcType: NodeInfo.Kind,
-    dstType: NodeInfo.Kind,
-  ): TypeCalculator = {
-    Assert.invariant(!mappings.isEmpty)
-
-    /*
-     * We need to cast to HashMap, because the type of HashMap.++ returns a 
generic Map
-     * HashMap.+ returns a HashMap, so we can avoid the case by doing the fold 
ourself
-     */
-    val valueMap: HashMap[DataValuePrimitive, DataValuePrimitive] =
-      (HashMap.empty ++ mappings.flatMap(x => {
-        val (keySet, _, value) = x
-        keySet.valueSet.map((_, value))
-      })).asInstanceOf[HashMap[DataValuePrimitive, DataValuePrimitive]]
-    val rangeTable: Seq[(RangeBound, RangeBound, DataValuePrimitive)] = 
mappings.flatMap(x => {
-      val (keySet, _, value) = x
-      keySet.valueRanges.map({ case (min, max) => (min, max, value) })
-    })
-    val unparseMap: HashMap[DataValuePrimitive, DataValuePrimitive] =
-      (HashMap.empty ++ mappings.map(x => {
-        val (_, canonicalKey, value) = x
-        (value, canonicalKey)
-      })).asInstanceOf[HashMap[DataValuePrimitive, DataValuePrimitive]]
-
-    /*
-     * Type erasure makes dispatching based on if we have an ordered keyset or 
not difficult
-     * Really, the problem is that we should not be using AnyRef at all,
-     * but since the DPath library is based around AnyRef, we are stuck with it
-     */
-    rangeTable match {
-      case Seq() =>
-        new KeysetValueTypeCalculatorUnordered(valueMap, unparseMap, srcType, 
dstType)
-      case _ => {
-        new KeysetValueTypeCalculatorOrdered(valueMap, rangeTable, unparseMap, 
srcType, dstType)
-      }
-    }
-  }
-
-  def compileIdentity(srcType: NodeInfo.Kind): TypeCalculator = new 
IdentityTypeCalculator(
-    srcType,
-  )
-
-}
 
 object Range {

Review Comment:
   Should we rename TypeCalculator.scala to Range.scala or RangeBound.scala 
since it defines only those types now?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [daffodil] tuxji commented on a diff in pull request #1085: Improve and narrow dfdlx:repType extension implementaion

Reply via email to