mbeckerle commented on code in PR #1165: URL: https://github.com/apache/daffodil/pull/1165#discussion_r1501166268
########## daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/layers/LayerException.scala: ########## @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.runtime1.layers + +abstract class LayerException(msg: String, cause: Throwable) extends Exception(msg, cause) { + def this(msg: String) = this(msg, null) + def this(cause: Throwable) = this(null, cause) +} + +class LayerCompilerException(msg: String, cause: Throwable) extends LayerException(msg, cause) { Review Comment: Some of these belong in the api package because layer authors may want to throw them? Or maybe we let the user throw whatever they want, we catch and encapsulate as either compile or runtime layer exceptions? ########## daffodil-runtime1-layers/src/main/scala/org/apache/daffodil/layers/runtime1/LineFoldedTransformer.scala: ########## @@ -75,157 +73,62 @@ import org.apache.daffodil.runtime1.processors.ParseOrUnparseState * For MIME, the maximum line length is 76. */ -sealed abstract class LineFoldedLayerCompiler(mode: LineFoldMode) - extends LayerCompiler(mode.transformName) { - - override def compileLayer( - layerCompileInfo: LayerCompileInfo, - ): LineFoldedTransformerFactory = { - - layerCompileInfo.optLayerLengthKind match { - case Some(LayerLengthKind.BoundaryMark) => - layerCompileInfo.SDEUnless( - layerCompileInfo.optLayerBoundaryMarkOptConstantValue.isDefined, - "Property dfdlx:layerBoundaryMark was not defined.", - ) - case Some(LayerLengthKind.Implicit) => // ok - case Some(other) => - layerCompileInfo.SDE( - s"Property dfdlx:layerLengthKind can only be 'implicit' or 'boundaryMark', but was '$other'.", - ) - case None => - layerCompileInfo.SDE( - s"Property dfdlx:layerLengthKind must be 'implicit' or 'boundaryMark'.", - ) +sealed abstract class LineFoldedLayerBase(mode: LineFoldMode) + extends Layer( + layerName = mode.dfdlName, + supportedLayerLengthKinds = + Seq(JLayerLengthKind.BoundaryMark, JLayerLengthKind.Implicit).asJava, + supportedLayerLengthUnits = Seq().asJava, + isRequiredLayerEncoding = false, + optLayerVariables = Optional.empty(), + ) { + + /** + * The layer limiter needs to be different for boundaryMark and implicit cases. + * @return a LayerLimiter or Optional.empty to indicate the standard layer limiter implementation should be used. + */ + override def layerLimiter(layerPropertyInfo: LayerPropertyInfo): Optional[LayerLimiter] = + layerPropertyInfo.layerLengthKind() match { + case JLayerLengthKind.Implicit => + Optional.empty() // use the default implicit layer length implementation. + case JLayerLengthKind.BoundaryMark => + Optional.of(new LineFoldedLayerBoundaryMarkLimiter(this)) + case JLayerLengthKind.Explicit => + Assert.invariantFailed("layerLengthKind 'explicit' not allowed.") } - // - // This layer assumes an ascii-family encoding. - // + override def wrapLayerDecoder(jis: InputStream, lr: LayerRuntime): InputStream = + new LineFoldedInputStream(mode, jis) - val xformer = - new LineFoldedTransformerFactory(mode, layerCompileInfo.optLayerLengthKind.get) - xformer - } + override def wrapLayerEncoder(jos: OutputStream, lr: LayerRuntime): OutputStream = + new LineFoldedOutputStream(mode, jos) } -final class LineFoldedIMFLayerCompiler extends LineFoldedLayerCompiler(LineFoldMode.IMF) -final class LineFoldedICalendarLayerCompiler - extends LineFoldedLayerCompiler(LineFoldMode.iCalendar) +class LineFoldedIMFLayer extends LineFoldedLayerBase(LineFoldMode.IMF) -final class LineFoldedTransformerFactory(mode: LineFoldMode, layerLengthKind: LayerLengthKind) - extends LayerTransformerFactory(mode.transformName) { +class LineFoldedICalendarLayer extends LineFoldedLayerBase(LineFoldMode.iCalendar) - override def newInstance(layerRuntimeInfo: LayerRuntimeInfo): LayerTransformer = { - val xformer = - layerLengthKind match { - case LayerLengthKind.BoundaryMark => { - new LineFoldedTransformerDelimited(mode, layerRuntimeInfo) - } - case LayerLengthKind.Implicit => { - new LineFoldedTransformerImplicit(mode, layerRuntimeInfo) - } - case _ => - Assert.invariantFailed( - "Should already have checked that it is only one of BoundaryMark or Implicit", - ) - } - xformer - } -} +class LineFoldedLayerBoundaryMarkLimiter(layer: LineFoldedLayerBase) + extends BoundaryMarkRegexLimiter { -sealed trait LineFoldMode extends LineFoldMode.Value { - def transformName: String = s"lineFolded_${this.toString}" -} - -object LineFoldMode extends Enum[LineFoldMode] { - - case object IMF extends LineFoldMode - case object iCalendar extends LineFoldMode - override lazy val values = Array(IMF, iCalendar) - - override def apply(name: String, context: ThrowsSDE): LineFoldMode = - stringToEnum("lineFoldMode", name, context) -} - -/** - * For line folded, the notion of "delimited" means that the element is a "line" - * that ends with CRLF, except that if it is long, it will be folded, which involves - * inserting/removing CRLF+Space (or CRLF+TAB). A CRLF not followed by space or tab - * is ALWAYS the actual "delimiter". There's no means of supplying a specific delimiter. - */ -final class LineFoldedTransformerDelimited( - mode: LineFoldMode, - layerRuntimeInfo: LayerRuntimeInfo, -) extends LayerTransformer(mode.transformName, layerRuntimeInfo) { - - override protected def wrapLimitingStream( - state: ParseOrUnparseState, - jis: java.io.InputStream, - ) = { - // regex means CRLF not followed by space or tab. - // NOTE: this regex cannot contain ANY capturing groups (per scaladoc on RegexLimitingStream) - val s = - new RegexLimitingStream(jis, "\\r\\n(?!(?:\\t|\\ ))", "\r\n", StandardCharsets.ISO_8859_1) - s - } - - override protected def wrapLimitingStream( - state: ParseOrUnparseState, - jos: java.io.OutputStream, - ) = { - // - // Q: How do we insert a CRLF "not followed by tab or space" when we don't - // control what follows? - // A: We don't. This is nature of the format. If what follows could begin - // with a space or tab, then the format can't use a line-folded layer. - // - val newJOS = - new LayerBoundaryMarkInsertingJavaOutputStream(jos, "\r\n", StandardCharsets.ISO_8859_1) - newJOS - } - - override protected def wrapLayerDecoder(jis: java.io.InputStream): java.io.InputStream = { - val s = new LineFoldedInputStream(mode, jis) - s - } - override protected def wrapLayerEncoder(jos: java.io.OutputStream): java.io.OutputStream = { - val s = new LineFoldedOutputStream(mode, jos) - s - } -} + /** + * This is CRLF not followed by space or tab. + * Note that if the CR is at end-of-data that is NOT a match. (See the $ in the negative lookahead) + * + * Note: one must not use capturing groups in these regex. + */ + override protected def regexForBoundaryMarkMatch: String = "\\r\\n(?!(?:\\t|\\ |$))" Review Comment: Remove "$" from the regex. Left-over from an experiment I was doing. ########## daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/TestLayers.scala: ########## @@ -37,7 +37,7 @@ class TestLayers { @Test def test_gzipLayer1(): Unit = { runner.runOneTest("gzipLayer1") } @Test def test_foldedIMFBase64Layers1(): Unit = { - runner.runOneTest("foldedIMFBase64Layers1") + runner.trace.runOneTest("foldedIMFBase64Layers1") Review Comment: Revert ########## daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/IPv4ChecksumLayer.scala: ########## @@ -18,48 +18,77 @@ package org.apache.daffodil.runtime1.layers import java.nio.ByteBuffer +import java.util.Optional +import scala.collection.JavaConverters._ import org.apache.daffodil.lib.exceptions.Assert -import org.apache.daffodil.runtime1.processors.ParseOrUnparseState -import org.apache.daffodil.runtime1.processors.VariableRuntimeData +import org.apache.daffodil.runtime1.api.DFDLPrimType +import org.apache.daffodil.runtime1.layers.api.JLayerLengthKind +import org.apache.daffodil.runtime1.layers.api.Layer +import org.apache.daffodil.runtime1.layers.api.LayerChecksumMixin +import org.apache.daffodil.runtime1.layers.api.LayerRuntime +import org.apache.daffodil.runtime1.layers.api.LayerVariables import passera.unsigned.UShort +object IPv4ChecksumLayer { + val name: String = "IPv4Checksum" + val vars: LayerVariables = + LayerVariables( + prefix = "chksum", + namespace = "urn:org.apache.daffodil.layers.IPv4Checksum", + variables = Seq( + (name, DFDLPrimType.UnsignedShort), + ).asJava, + ) +} + /** * The layer transform computes the checksum of the header data. * per IETF RFC 791. * - * The data has a well-known fixed length, so layerLengthKind is always 'implicit'. + * The data has a well-known fixed length, so layerLengthKind is 'implicit' or not specified. */ -final class IPv4Checksum( - name: String, - layerRuntimeInfo: LayerRuntimeInfo, - outputVar: VariableRuntimeData, -) extends ByteBufferExplicitLengthLayerTransform[Int]( - layerRuntimeInfo, - name, - inputVars = Seq(), - outputVar: VariableRuntimeData, - ) { +final class IPv4ChecksumLayer() + extends Layer( + IPv4ChecksumLayer.name, + supportedLayerLengthKinds = + Seq(JLayerLengthKind.Implicit).asJava, // Implicit implies you cannot specify layerLength. + supportedLayerLengthUnits = Seq().asJava, + isRequiredLayerEncoding = false, + optLayerVariables = Optional.of(IPv4ChecksumLayer.vars), + ) + with LayerChecksumMixin // Checksums are easily expressed as a compute method on a byte buffer. + { /** * This layer is always exactly 20 bytes long. */ - override protected def layerBuiltInConstantLength = Some(20L) + private def lenInBytes = 20 // bytes + + override def optLayerBuiltInConstantLength: Optional[Int] = Optional.of(lenInBytes) Review Comment: Could just punt on this built in fixed length stuff and just use layerLengthKind explicit for these. Or, could just use layerLengthKind 'implicit' and implement 'explicit' using an element of specified length that surrounds the part that needs to have the fixed length. ########## daffodil-runtime1-layers/src/main/scala/org/apache/daffodil/layers/runtime1/GZipLayer.java: ########## Review Comment: I wrote this layer in Java, by way of making sure the API is usable from Java. ########## daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/layers/api/JLayerLengthKind.java: ########## @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.runtime1.layers.api; + +public enum JLayerLengthKind { Review Comment: I don't like the name JLayerLengthKind. ########## daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/layers/LayerImplicits.scala: ########## Review Comment: I had hoped that Scala implicits would let scala code ignore the difference between Scala Option and Java Optional, and scala code wouldn't have the clutter. I didn't achieve that, however. You still have to invoke ".asJava" or ".asScala" in lots of places. ########## daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/layers/api/JLayerLengthUnits.java: ########## Review Comment: The whole concept of LayerLengthUnits can go away. It's not needed. It was put in by analogy to dfdl:lengthUnits, but for layers, they're byte centric, so this property is either ignored, or must be 'bytes'. Should just drop the whole concept. ########## daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/AISTransformer.scala: ########## @@ -20,130 +20,78 @@ package org.apache.daffodil.runtime1.layers import java.io._ import java.nio._ import java.nio.charset._ +import java.util.Optional +import scala.collection.JavaConverters._ -import org.apache.daffodil.io.BoundaryMarkLimitingStream import org.apache.daffodil.io.FormatInfo import org.apache.daffodil.io.InputSourceDataInputStream -import org.apache.daffodil.io.LayerBoundaryMarkInsertingJavaOutputStream import org.apache.daffodil.io.processors.charset.BitsCharsetAISPayloadArmoring import org.apache.daffodil.io.processors.charset.BitsCharsetDecoder import org.apache.daffodil.io.processors.charset.BitsCharsetEncoder +import org.apache.daffodil.layers.runtime1.BoundaryMarkRegexLimiter import org.apache.daffodil.lib.api.DaffodilTunables import org.apache.daffodil.lib.exceptions.Assert import org.apache.daffodil.lib.schema.annotation.props.gen.BinaryFloatRep import org.apache.daffodil.lib.schema.annotation.props.gen.BitOrder import org.apache.daffodil.lib.schema.annotation.props.gen.ByteOrder import org.apache.daffodil.lib.schema.annotation.props.gen.EncodingErrorPolicy -import org.apache.daffodil.lib.schema.annotation.props.gen.LayerLengthKind import org.apache.daffodil.lib.schema.annotation.props.gen.UTF16Width import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.MaybeInt -import org.apache.daffodil.runtime1.processors.ParseOrUnparseState +import org.apache.daffodil.runtime1.layers.api.JLayerLengthKind +import org.apache.daffodil.runtime1.layers.api.Layer +import org.apache.daffodil.runtime1.layers.api.LayerLimiter +import org.apache.daffodil.runtime1.layers.api.LayerPropertyInfo +import org.apache.daffodil.runtime1.layers.api.LayerRuntime import org.apache.commons.io.IOUtils -final class AISPayloadArmoringLayerCompiler extends LayerCompiler("aisPayloadArmor") { - - override def compileLayer( - layerCompileInfo: LayerCompileInfo, - ): AISPayloadArmoringTransformerFactory = { - - layerCompileInfo.optLayerBoundaryMarkOptConstantValue match { - case Some(Some(",")) => // ok - case None => // ok - case Some(Some(nonComma)) => - layerCompileInfo.SDE( - "Property dfdlx:layerBoundaryMark was defined as '$nonComma'. It must be ',' or left undefined.", - ) - case Some(None) => - layerCompileInfo.SDE( - "Property dfdlx:layerBoundaryMark was defined as an expression. It must be omitted, or defined to be ','.", - ) - } - layerCompileInfo.optLayerLengthKind match { - case Some(LayerLengthKind.BoundaryMark) => // ok - case Some(other) => - layerCompileInfo.SDE( - s"Only dfdlx:layerLengthKind 'boundaryMark' is supported, but '$other' was specified", - ) - case None => // ok - } - layerCompileInfo.optLayerJavaCharsetOptConstantValue match { - case Some(Some(_)) => // ok - case None => // ok - case _ => - layerCompileInfo.SDE( - "Property dfdlx:layerEncoding must be defined, and must be an ordinary 8-bit wide encoding. ", - ) - } - val xformer = new AISPayloadArmoringTransformerFactory(name) - xformer - } -} - -class AISPayloadArmoringTransformerFactory(name: String) extends LayerTransformerFactory(name) { - - override def newInstance(layerRuntimeInfo: LayerRuntimeInfo) = { - val xformer = new AISPayloadArmoringTransformer(name, layerRuntimeInfo) - xformer - } -} - -object AISPayloadArmoringTransformer { - def iso8859 = StandardCharsets.ISO_8859_1 -} - -class AISPayloadArmoringTransformer(name: String, layerRuntimeInfo: LayerRuntimeInfo) - extends LayerTransformer(name, layerRuntimeInfo) { - - import AISPayloadArmoringTransformer._ +final class AISPayloadArmoringLayer + extends Layer( + layerName = "aisPayloadArmor", + supportedLayerLengthKinds = Seq(JLayerLengthKind.BoundaryMark).asJava, + supportedLayerLengthUnits = Seq().asJava, + isRequiredLayerEncoding = true, + optLayerVariables = Optional.empty(), + ) { /** * Decoding AIS payload armoring is encoding the ASCII text into the * underlying binary data. */ - override def wrapLayerDecoder(jis: java.io.InputStream) = { + override def wrapLayerDecoder(jis: InputStream, lr: LayerRuntime): InputStream = new AISPayloadArmoringInputStream(jis) - } - - override def wrapLimitingStream(state: ParseOrUnparseState, jis: java.io.InputStream) = { - val layerBoundaryMark = "," - val s = BoundaryMarkLimitingStream(jis, layerBoundaryMark, iso8859) - s - } - override protected def wrapLayerEncoder(jos: java.io.OutputStream): java.io.OutputStream = { + override def wrapLayerEncoder(jos: OutputStream, lr: LayerRuntime): OutputStream = new AISPayloadArmoringOutputStream(jos) - } - override protected def wrapLimitingStream( - state: ParseOrUnparseState, - jos: java.io.OutputStream, - ) = { - val layerBoundaryMark = "," - val newJOS = new LayerBoundaryMarkInsertingJavaOutputStream(jos, layerBoundaryMark, iso8859) - newJOS - } + override def layerLimiter(layerPropertyInfo: LayerPropertyInfo): Optional[LayerLimiter] = + Optional.of(new BoundaryMarkRegexLimiter { + override protected def regexForBoundaryMarkMatch: String = "," + + override protected def boundaryMarkToInsert: String = "," + + override protected def maximumLengthBoundaryMark: String = "," + }) Review Comment: The above is just unnecessary. This is just regular boundaryMark behavior, where the boundary mark is a constant built into the layer code. Why bother. Just in the AIS layer DFDL file, define dfdlx:boundaryMark="," and then drop this stuff. ########## daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/CheckDigitLayer.scala: ########## Review Comment: CheckDigit as a layer example should go away. Really check digits computation should be a UDF. For now it's useful as just another layer example. ########## daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/layers/api/LayerPropertyInfo.java: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.runtime1.layers.api; + +import java.nio.charset.Charset; +import java.util.Optional; + +/** + * Provides contextual information for the layer's own static checking. + */ +public interface LayerPropertyInfo { + + // TODO: BitsCharset needs to be a trait that is part of the supported API + + /** + * Retrieves the layerEncoding property as a java.nio.charset.Charset + * + * @return an Optional containing another Optional of type BitsCharset. + * The outer Optional will be empty if the layerEncoding property is not defined. + * The inner Optional will be empty if the layerEncoding property is not constant. + */ + Optional<Optional<Charset>> optOptConstLayerCharset(); + + /** + * @return the layerLength property value if defined and a constant. + */ + Optional<Optional<Long>> optOptConstLayerLength(); + + /** + * @return the layerLengthKind property value. This property must always be defined for layers so + * it is a schema definition error if this is requested but the property is not defined. + */ + JLayerLengthKind layerLengthKind(); + + /** + * @return the layerLengthUnits property value if defined. + */ + Optional<JLayerLengthUnits> optLayerLengthUnits(); + + /** + * @return the layerBoundaryMark property value if defined and a constant. + */ + Optional<Optional<String>> optOptConstLayerBoundaryMark(); + + // TODO: decide whether to provide these more mundane aliases +// boolean isDefinedLayerBoundaryMark(); +// boolean isConstLayerBoundaryMary(); +// String constLayerBoundaryMark(); + + /** + * Causes a Schema Definition Error when compiling the DFDL schema. + * + * @param msg A string which is included with the diagnostic information. + * @param args any number of arguments which are substituted into the msg via the same + * mechanism as String.format + * @return This method does not return. It throws to the Daffodil schema compiler context. + */ + void schemaDefinitionError(String msg, Object... args); + + /** + * Causes a Schema Definition Warning when compiling the DFDL schema. + * + * These can be suppressed using the warn ID "layerCompileWarning" + * + * @param msg A string which is included with the diagnostic information. + * @param args any number of arguments which are substituted into the msg via the same + * mechanism as String.format + */ + // TODO: Remove if not needed. Hard to implement, as the LayerRuntimeInfo object + // Doesn't have compiler context for issuing warnings. + // void schemaDefinitionWarning(String msg, Object... args); +} Review Comment: Remove SDW. Haven't needed it yet. ########## daffodil-runtime1-layers/src/main/scala/org/apache/daffodil/layers/runtime1/LineFoldedTransformer.scala: ########## @@ -393,11 +298,25 @@ class LineFoldedOutputStream(mode: LineFoldMode, jos: OutputStream) extends Outp } } + // TODO: This is broken because an isolated \r or \n is output as \r\n, but without verifying that the next character + // is space or tab. If that ends up as a \r\n not followed by a space or tab it has broken the data, as on a reparse + // that \r\n will be interpreted as an end-of-layer marker. Review Comment: That's true only if the \r\n is being used as a boundaryMark. If the layerLengthKind is 'implicit' then this insertion of \r\n doesn't harm anything. Maybe line folded layers don't need boundaryMark support at all? Maybe implicit is enough? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
