Re: [PR] Add schema validation code to C generator [daffodil]

via GitHub Fri, 20 Oct 2023 20:24:48 -0700


tuxji commented on code in PR #1098:
URL: https://github.com/apache/daffodil/pull/1098#discussion_r1367603917



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libruntime/parsers.c:
##########
@@ -563,36 +535,36 @@ void
 parse_hexBinary(HexBinary *hexBinary, PState *pstate)
 {
     read_bits(hexBinary->array, hexBinary->lengthInBytes * BYTE_WIDTH, pstate);
-    if (pstate->error) return;
-    pstate->bitPos0b += hexBinary->lengthInBytes * BYTE_WIDTH;
+    if (pstate->pu.error) return;
+    pstate->pu.bitPos0b += hexBinary->lengthInBytes * BYTE_WIDTH;
 }
 
-// Validate element's value matches its fixed attribute
+// Parse fill bits up to alignmentInBits or end_bitPos0b
 
 void
-parse_validate_fixed(bool same, const char *element, PState *pstate)
+parse_align(size_t alignmentInBits, PState *pstate)
 {
-    if (!same)
-    {
-        Diagnostics *diagnostics = get_diagnostics();
-        const Error  error = {ERR_FIXED_VALUE, {.s = element}};
-
-        add_diagnostic(diagnostics, &error);
-        pstate->diagnostics = diagnostics;
-    }
+    size_t end_bitPos0b = ((pstate->pu.bitPos0b + alignmentInBits - 1) / 
alignmentInBits) * alignmentInBits;
+    parse_fill_bits(end_bitPos0b, pstate);
 }
 
-// Check array count is within bounds
-
 void
-parse_check_bounds(const char *name, size_t count, size_t minOccurs, size_t 
maxOccurs, PState *pstate)
+parse_fill_bits(size_t end_bitPos0b, PState *pstate)

Review Comment:
   Sure, I've changed the name.



##########
daffodil-codegen-c/src/test/resources/org/apache/daffodil/codegen/c/ex_nums.dfdl.xsd:
##########
@@ -17,184 +17,197 @@
 -->
 
 <xs:schema
-        elementFormDefault="qualified"
-        targetNamespace="http://example.com";
-        xmlns="http://example.com";
-        xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/";
-        xmlns:xs="http://www.w3.org/2001/XMLSchema";>
+  elementFormDefault="qualified"
+  targetNamespace="http://example.com";
+  version="1.2.3"
+  xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/";
+  xmlns:net="urn:network/format"
+  xmlns:xs="http://www.w3.org/2001/XMLSchema";
+  xmlns="http://example.com";>
 
-    <!-- Representation property bindings -->
+  <!-- Network order binary format (net:format) -->

Review Comment:
   OK, saying `Network order big endian format (net:format)`.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libruntime/infoset.h:
##########
@@ -102,52 +102,56 @@ typedef struct ERD
 
 typedef struct HexBinary
 {
-    uint8_t *array;         // pointer to data in byte array
-    size_t   lengthInBytes; // length of data in bytes
-    bool     dynamic;       // true if byte array was malloc'ed
+    uint8_t *array;       // pointer to data in byte array
+    size_t lengthInBytes; // length of data in bytes
+    bool dynamic;         // true if byte array was malloc'ed
 } HexBinary;
 
 // InfosetBase - metadata of an infoset element
 
 typedef struct InfosetBase
 {
-    const ERD *               erd;
+    const ERD *erd;
     const struct InfosetBase *parent;
 } InfosetBase;
 
+// ParserOrUnparserState - common mutable state while parsing or unparsing

Review Comment:
   OK, done.



##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/generators/BinaryValueCodeGenerator.scala:
##########
@@ -50,6 +51,53 @@ trait BinaryValueCodeGenerator {
     if (e.hasFixedValue && e.fixedValueAsString.nonEmpty) {
       validateFixed(deref)
     }
+
+    // Check if the element's value is restricted to a set of enumerations
+    if (e.typeDef.optRestriction.exists(_.hasEnumeration)) {
+      // Split the enumeration values into an array of strings ready to be 
inserted into the C code
+      val enumerationValues = 
e.typeDef.optRestriction.flatMap(_.enumerationValues).get
+      val enums = enumerationValues.split('|').map(_.replaceAll("\\\\(\\W)", 
"$1"))
+      // Call another function which can be redefined differently if necessary

Review Comment:
   This is a place where I was not keeping things as simple as possible. I 
wrote the comment while imagining some possible future need for polymorphic 
behavior, but it turned out I didn't need such behavior. I've removed the 
comment.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libcli/daffodil_main.c:
##########
@@ -21,8 +21,8 @@
 #include <string.h>           // for strcmp
 #include "cli_errors.h"       // for CLI_DIAGNOSTICS, CLI_FILE_CLOSE, 
CLI_FILE_OPEN
 #include "daffodil_getopt.h"  // for daffodil_cli, daffodil_parse, 
daffodil_parse_cli, parse_daffodil_cli, daffodil_unparse, daffodil_unparse_cli, 
DAFFODIL_PARSE, DAFFODIL_UNPARSE
-#include "errors.h"           // for continue_or_exit, print_diagnostics, 
Error, Diagnostics, Error::(anonymous)
-#include "infoset.h"          // for get_infoset, walk_infoset, PState, 
UState, parse_data, unparse_infoset, InfosetBase, VisitEventHandler
+#include "errors.h"           // for continue_or_exit, Diagnostics, 
print_diagnostics, Error
+#include "infoset.h"          // for ParserOrUnparserState, PState, UState, 
get_infoset, walk_infoset, parse_data, unparse_infoset, InfosetBase, 
VisitEventHandler

Review Comment:
   Good idea, I've added a `// auto-maintained by iwyu` comment to each 
iwyu-maintained part.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libruntime/validators.c:
##########
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// clang-format off
+#include "validators.h"
+#include <stdbool.h>  // for bool, false, true
+#include <string.h>   // for memcmp
+#include "errors.h"   // for add_diagnostic, get_diagnostics, Error, 
Diagnostics, ERR_ENUM_MATCH, ERR_ARRAY_BOUNDS, ERR_FIXED_VALUE, 
ERR_OUTSIDE_RANGE, Error::(anonymous)
+// clang-format on
+
+// Validate element's array count is within its array bounds
+
+void
+validate_array_bounds(const char *name, size_t count, size_t minOccurs, size_t 
maxOccurs,
+                      ParserOrUnparserState *pustate)
+{
+    if (count < minOccurs || count > maxOccurs)
+    {
+        // Array count is not within bounds, so report error
+        static Error error = {ERR_ARRAY_BOUNDS, {0}};
+        error.arg.s = name;
+        pustate->error = &error;
+    }
+}
+
+// Validate element's value is same as its fixed attribute
+
+void
+validate_fixed_attribute(bool same, const char *element, ParserOrUnparserState 
*pustate)
+{
+    if (!same)
+    {
+        // Element is not same as its fixed attribute, so diagnose problem
+        Diagnostics *diagnostics = get_diagnostics();
+        const Error error = {ERR_FIXED_VALUE, {.s = element}};
+
+        add_diagnostic(diagnostics, &error);
+        pustate->diagnostics = diagnostics;
+    }
+}
+
+// Validate element's value matches an enumeration (either floating point or 
integer)

Review Comment:
   > A specialized validator for the integer case I can see. But floating 
point? What's the point (heh)?
   
   Just keeping daffodil and daffodilC as compatible as possible in the places 
where they both overlap.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libruntime/parsers.c:
##########
@@ -21,7 +21,7 @@
 #include <stdbool.h>  // for bool, false, true
 #include <stdio.h>    // for fread, fgetc, EOF
 #include <stdlib.h>   // for free, malloc
-#include "errors.h"   // for Error, eof_or_error, Error::(anonymous), 
ERR_LEFTOVER_DATA, add_diagnostic, get_diagnostics, ERR_ARRAY_BOUNDS, 
ERR_FIXED_VALUE, ERR_HEXBINARY_ALLOC, ERR_PARSE_BOOL, Diagnostics
+#include "errors.h"   // for Error, eof_or_error, ERR_LEFTOVER_DATA, 
Error::(anonymous), ERR_HEXBINARY_ALLOC, ERR_PARSE_BOOL

Review Comment:
   What you're noticing is that iwyu never puts a comment on a C source file's 
own header file.  That is, parsers.c "uses" a few symbols from errors.h, but 
errors.c "implements" nearly all of errors.h's symbols.  Even iwyu put a 
comment anyway, the comment would be too long trying to list all the header 
symbols implemented by the source file.



##########
BUILD.md:
##########
@@ -50,9 +50,9 @@ following its website's instructions. This is necessary to 
install the
 [libmxml-devel][Mini-XML] package.
 
 You can use the `dnf` package manager to install most of the tools
-needed to build Daffodil:
+used to develop Daffodil:
 
-    sudo dnf install clang gcc git java-11-openjdk-devel llvm make mxml-devel 
pkgconf
+    sudo dnf install clang gcc git iwyu java-11-openjdk-devel llvm make 
mxml-devel pkgconf
 
 If you want to use clang instead of gcc, you'll have to set your

Review Comment:
   If you're talking about the paragraph `If you want to use clang instead of 
gcc, ...`, I originally added that paragraph because you told me you didn't 
want to require GNU-licensed tools to develop or use any part of Daffodil.  I 
even made sure that the GitHub Action workflow (main.yml) would set these 
environment variables to build Daffodil with clang and lvvm-ar instead of gcc 
and ar.  I can reword the paragraph to say you can set these environment 
variables to use clang and llvm-ar instead of gcc and ar if you have a policy 
forbidding GNU-licensed tools.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libruntime/unparsers.c:
##########
@@ -401,69 +401,41 @@ unparse_le_uint8(uint8_t number, size_t num_bits, UState 
*ustate)
     unparse_endian_uint64(LITTLE_ENDIAN_DATA, number, num_bits, ustate);
 }
 
+// Unparse opaque bytes from hexBinary field
+
+void
+unparse_hexBinary(HexBinary hexBinary, UState *ustate)
+{
+    write_bits(hexBinary.array, hexBinary.lengthInBytes * BYTE_WIDTH, ustate);
+    if (ustate->pu.error) return;
+    ustate->pu.bitPos0b += hexBinary.lengthInBytes * BYTE_WIDTH;
+}
+
 // Unparse fill bits up to alignmentInBits or end_bitPos0b
 
 void
 unparse_align(size_t alignmentInBits, const uint8_t fill_byte, UState *ustate)
 {
-    size_t end_bitPos0b = ((ustate->bitPos0b + alignmentInBits - 1) / 
alignmentInBits) * alignmentInBits;
+    size_t end_bitPos0b = ((ustate->pu.bitPos0b + alignmentInBits - 1) / 
alignmentInBits) * alignmentInBits;
     unparse_fill_bits(end_bitPos0b, fill_byte, ustate);
 }
 
 void
 unparse_fill_bits(size_t end_bitPos0b, const uint8_t fill_byte, UState *ustate)

Review Comment:
   I've changed the name too.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/tests/bits.c:
##########


Review Comment:
   Both C tooling limitations and keeping things as simple as possible (it's 
more work to exclude the tests than to simply include them in generated c 
directory).



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libruntime/validators.c:
##########
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// clang-format off
+#include "validators.h"
+#include <stdbool.h>  // for bool, false, true
+#include <string.h>   // for memcmp
+#include "errors.h"   // for add_diagnostic, get_diagnostics, Error, 
Diagnostics, ERR_ENUM_MATCH, ERR_ARRAY_BOUNDS, ERR_FIXED_VALUE, 
ERR_OUTSIDE_RANGE, Error::(anonymous)
+// clang-format on
+
+// Validate element's array count is within its array bounds
+
+void
+validate_array_bounds(const char *name, size_t count, size_t minOccurs, size_t 
maxOccurs,
+                      ParserOrUnparserState *pustate)
+{
+    if (count < minOccurs || count > maxOccurs)
+    {
+        // Array count is not within bounds, so report error
+        static Error error = {ERR_ARRAY_BOUNDS, {0}};
+        error.arg.s = name;
+        pustate->error = &error;
+    }
+}
+
+// Validate element's value is same as its fixed attribute
+
+void
+validate_fixed_attribute(bool same, const char *element, ParserOrUnparserState 
*pustate)
+{
+    if (!same)
+    {
+        // Element is not same as its fixed attribute, so diagnose problem
+        Diagnostics *diagnostics = get_diagnostics();
+        const Error error = {ERR_FIXED_VALUE, {.s = element}};
+
+        add_diagnostic(diagnostics, &error);
+        pustate->diagnostics = diagnostics;
+    }
+}
+
+// Validate element's value matches an enumeration (either floating point or 
integer)
+
+void
+validate_floatpt_enumeration(double number, size_t num_enums, double enums[], 
const char *element,
+                             ParserOrUnparserState *pustate)
+{
+    bool match_found = false;
+    for (size_t i = 0; !match_found && i < num_enums; i++)
+    {
+        if (number == enums[i])
+        {
+            match_found = true;
+        }
+    }
+    if (!match_found)
+    {
+        // Number does not match any enumeration, so diagnose problem
+        Diagnostics *diagnostics = get_diagnostics();
+        const Error error = {ERR_ENUM_MATCH, {.s = element}};
+
+        add_diagnostic(diagnostics, &error);
+        pustate->diagnostics = diagnostics;
+    }
+}
+
+void
+validate_hexbinary_enumeration(const HexBinary *hexBinary, size_t num_enums, 
HexBinary enums[],

Review Comment:
   Added comment.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/libruntime/validators.c:
##########
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// clang-format off
+#include "validators.h"
+#include <stdbool.h>  // for bool, false, true
+#include <string.h>   // for memcmp
+#include "errors.h"   // for add_diagnostic, get_diagnostics, Error, 
Diagnostics, ERR_ENUM_MATCH, ERR_ARRAY_BOUNDS, ERR_FIXED_VALUE, 
ERR_OUTSIDE_RANGE, Error::(anonymous)
+// clang-format on
+
+// Validate element's array count is within its array bounds
+
+void
+validate_array_bounds(const char *name, size_t count, size_t minOccurs, size_t 
maxOccurs,
+                      ParserOrUnparserState *pustate)

Review Comment:
   > These validation functions are the only thing that I find here that 
actually uses states polymorphically via ParserOrUnparserState as a type. Is 
that correct?
   
   Yes, that's correct.



##########
daffodil-codegen-c/src/main/resources/org/apache/daffodil/codegen/c/files/tests/bits.c:
##########
@@ -20,29 +20,29 @@
 #include <criterion/new/assert.h>  // for CRI_ASSERT_OP_VAR_TAGGED, 
CRI_ASSERT_OP_TYPE_TAGGED, CRI_ASSERT_OP_VAL_TAGGED, CRI_ASSERT_TEST_TAG_u8, 
cr_user_u8_tostr, CRI_ASSERT_OP_MKNODE_TAGGED, CRI_ASSERT_OP_NAME_TAGGED, 
CRI_ASSERT_TEST_TAG_sz, cr_user_sz_tostr, CRI_ASSERT_TYPE_TAG_ID_u8, 
cr_user_u8_eq, CRI_ASSERT_TEST_TAG_ptr, cr_user_ptr_tostr, CRI_ASSERT_MKLIST_2, 
CRI_ASSERT_SPECIFIER_OP1, CRI_ASSERT_SPECIFIER_eq, CRI_ASSERT_SPEC_OPLEN_2, 
CRI_ASSERT_TEST_SPECIFIER_eq, cr_expect, CRI_ASSERT_TYPE_TAG_u8, 
CRI_ASSERT_TYPE_TAG_ID_sz, cr_user_sz_eq, CRI_ASSERT_TYPE_TAG_sz, 
CRI_ASSERT_TYPE_TAG_ID_ptr, cr_user_ptr_eq, CRI_ASSERT_TEST_TAGC_u8, 
CRI_ASSERT_TEST_TAG_int, cr_user_int_tostr, CRI_ASSERT_TYPE_TAG_ptr, 
CRI_ASSERT_TEST_TAGC_sz, CRI_ASSERT_TYPE_TAG_ID_int, cr_user_int_eq, 
CRI_ASSERT_TEST_TAGC_ptr, CRI_ASSERT_TYPE_TAG_int, CRI_ASSERT_TEST_TAG_i16, 
CRI_ASSERT_TEST_TAG_i32, CRI_ASSERT_TEST_TAG_i64, CRI_ASSERT_TEST_TAG_i8, 
CRI_ASSERT_TEST_TAG_u16, CRI_ASSERT_TEST_TAG_u32, CRI_ASSERT_TE..
 .

Review Comment:
   Changed Makefile to tell iwyu to put 110 characters max (same as 
.clang-format's column limit).



##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/generators/AlignmentFillCodeGenerator.scala:
##########
@@ -31,11 +31,11 @@ trait AlignmentFillCodeGenerator {
     val parseStatement =
       s"""$indent1$indent2    // Fill to closest alignment
          |$indent1$indent2    parse_align($alignmentInBits, pstate);
-         |$indent1$indent2    if (pstate->error) return;""".stripMargin
+         |$indent1$indent2    if (pstate->pu.error) return;""".stripMargin
     val unparseStatement =
       s"""$indent1$indent2    // Fill to closest alignment

Review Comment:
   I've considered implementing an indentStripMargin(indent) function which 
would let me remove the $indent1$indent2 and let the function generate the 
required indentation instead.



##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/generators/BinaryValueCodeGenerator.scala:
##########
@@ -50,6 +51,53 @@ trait BinaryValueCodeGenerator {
     if (e.hasFixedValue && e.fixedValueAsString.nonEmpty) {
       validateFixed(deref)
     }
+
+    // Check if the element's value is restricted to a set of enumerations
+    if (e.typeDef.optRestriction.exists(_.hasEnumeration)) {
+      // Split the enumeration values into an array of strings ready to be 
inserted into the C code
+      val enumerationValues = 
e.typeDef.optRestriction.flatMap(_.enumerationValues).get
+      val enums = enumerationValues.split('|').map(_.replaceAll("\\\\(\\W)", 
"$1"))
+      // Call another function which can be redefined differently if necessary
+      valueValidateEnumeration(e, deref, enums, cgState)
+    }
+
+    // Check if the element's value is restricted to a range (we will need to 
handle any
+    // combination of inclusive and exclusive endpoints when generating our C 
expression)
+    val hasMinExclusive = e.typeDef.optRestriction.exists(_.hasMinExclusive)
+    val hasMinInclusive = e.typeDef.optRestriction.exists(_.hasMinInclusive)
+    val hasMaxExclusive = e.typeDef.optRestriction.exists(_.hasMaxExclusive)
+    val hasMaxInclusive = e.typeDef.optRestriction.exists(_.hasMaxInclusive)
+    if (hasMinExclusive || hasMinInclusive || hasMaxExclusive || 
hasMaxInclusive) {
+      // Generate the minimum endpoint comparison
+      val minEndpoint = if (hasMinExclusive) {
+        val endpoint = e.typeDef.optRestriction.map(_.minExclusiveValue).get
+        s"""> $endpoint"""
+      } else if (hasMinInclusive) {
+        val endpoint = e.typeDef.optRestriction.map(_.minInclusiveValue).get
+        // Avoid unsigned >= 0 comparisons (causes gcc warnings)
+        e.optPrimType.get match {
+          case PrimType.UnsignedByte | PrimType.UnsignedShort | 
PrimType.UnsignedInt |
+              PrimType.UnsignedLong | PrimType.NonNegativeInteger if 
endpoint.toString == "0" =>
+            ""

Review Comment:
   I've moved a comment (`// Avoid unsigned >= 0 comparisons (causes gcc 
warnings)`) down two lines so the reason for the "" will be more visible.  I 
won't bother using Option at this time.



##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/generators/BinaryValueCodeGenerator.scala:
##########
@@ -93,11 +141,94 @@ trait BinaryValueCodeGenerator {
 
     val initERDStatement = ""
     val parseStatement =
-      s"""$indent1$indent2    parse_validate_fixed($field == $fixed, 
"$localName", pstate);
-         |$indent1$indent2    if (pstate->error) return;""".stripMargin
+      s"""$indent1$indent2    validate_fixed_attribute($field == $fixed, 
"$localName", &pstate->pu);
+         |$indent1$indent2    if (pstate->pu.error) return;""".stripMargin
+    val unparseStatement =
+      s"""$indent1$indent2    validate_fixed_attribute($field == $fixed, 
"$localName", &ustate->pu);
+         |$indent1$indent2    if (ustate->pu.error) return;""".stripMargin
+    cgState.addSimpleTypeStatements(initERDStatement, parseStatement, 
unparseStatement)
+  }
+
+  // Generate C code to validate a primitive element matches one of its 
enumeration values.
+  // May be replaced by a more specialized function in another trait for some 
types.

Review Comment:
   I deleted this `"May be replaced by` comment as well since I ended up doing 
all the necessary implementation for all types in the same function anyway (no 
mixin trait needed).  



##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/generators/BinaryValueCodeGenerator.scala:
##########
@@ -50,6 +51,53 @@ trait BinaryValueCodeGenerator {
     if (e.hasFixedValue && e.fixedValueAsString.nonEmpty) {
       validateFixed(deref)
     }
+
+    // Check if the element's value is restricted to a set of enumerations
+    if (e.typeDef.optRestriction.exists(_.hasEnumeration)) {
+      // Split the enumeration values into an array of strings ready to be 
inserted into the C code
+      val enumerationValues = 
e.typeDef.optRestriction.flatMap(_.enumerationValues).get
+      val enums = enumerationValues.split('|').map(_.replaceAll("\\\\(\\W)", 
"$1"))
+      // Call another function which can be redefined differently if necessary
+      valueValidateEnumeration(e, deref, enums, cgState)
+    }
+
+    // Check if the element's value is restricted to a range (we will need to 
handle any
+    // combination of inclusive and exclusive endpoints when generating our C 
expression)
+    val hasMinExclusive = e.typeDef.optRestriction.exists(_.hasMinExclusive)
+    val hasMinInclusive = e.typeDef.optRestriction.exists(_.hasMinInclusive)
+    val hasMaxExclusive = e.typeDef.optRestriction.exists(_.hasMaxExclusive)
+    val hasMaxInclusive = e.typeDef.optRestriction.exists(_.hasMaxInclusive)
+    if (hasMinExclusive || hasMinInclusive || hasMaxExclusive || 
hasMaxInclusive) {
+      // Generate the minimum endpoint comparison
+      val minEndpoint = if (hasMinExclusive) {
+        val endpoint = e.typeDef.optRestriction.map(_.minExclusiveValue).get
+        s"""> $endpoint"""
+      } else if (hasMinInclusive) {
+        val endpoint = e.typeDef.optRestriction.map(_.minInclusiveValue).get
+        // Avoid unsigned >= 0 comparisons (causes gcc warnings)
+        e.optPrimType.get match {
+          case PrimType.UnsignedByte | PrimType.UnsignedShort | 
PrimType.UnsignedInt |
+              PrimType.UnsignedLong | PrimType.NonNegativeInteger if 
endpoint.toString == "0" =>
+            ""
+          case _ =>
+            s""">= $endpoint"""
+        }
+      } else {
+        ""
+      }
+      // Generate the maximum endpoint comparison
+      val maxEndpoint = if (hasMaxExclusive) {
+        val endpoint = e.typeDef.optRestriction.map(_.maxExclusiveValue).get
+        s"""< $endpoint"""
+      } else if (hasMaxInclusive) {
+        val endpoint = e.typeDef.optRestriction.map(_.maxInclusiveValue).get
+        s"""<= $endpoint"""
+      } else {
+        ""
+      }
+      // Call another function which can be redefined differently if necessary

Review Comment:
   Agreed, I just deleted the comment to keep things as simple as possible.



##########
daffodil-codegen-c/src/main/scala/org/apache/daffodil/codegen/c/generators/BinaryValueCodeGenerator.scala:
##########
@@ -93,11 +141,94 @@ trait BinaryValueCodeGenerator {
 
     val initERDStatement = ""
     val parseStatement =
-      s"""$indent1$indent2    parse_validate_fixed($field == $fixed, 
"$localName", pstate);
-         |$indent1$indent2    if (pstate->error) return;""".stripMargin
+      s"""$indent1$indent2    validate_fixed_attribute($field == $fixed, 
"$localName", &pstate->pu);
+         |$indent1$indent2    if (pstate->pu.error) return;""".stripMargin
+    val unparseStatement =
+      s"""$indent1$indent2    validate_fixed_attribute($field == $fixed, 
"$localName", &ustate->pu);
+         |$indent1$indent2    if (ustate->pu.error) return;""".stripMargin
+    cgState.addSimpleTypeStatements(initERDStatement, parseStatement, 
unparseStatement)
+  }
+
+  // Generate C code to validate a primitive element matches one of its 
enumeration values.
+  // May be replaced by a more specialized function in another trait for some 
types.
+  private def valueValidateEnumeration(
+    e: ElementBase,
+    deref: String,
+    enums: Array[String],
+    cgState: CodeGeneratorState,
+  ): Unit = {
+    val indent1 = if (cgState.hasChoice) INDENT else NO_INDENT
+    val indent2 = if (deref.nonEmpty) INDENT else NO_INDENT
+    val localName = cgState.cName(e)
+    val field = s"instance->$localName$deref"
+
+    // Extra initialization only for hexBinary enumerations
+    val arraysName = s"arrays_$localName"
+    val hexEnums = enums.zipWithIndex.map { case (s, index) =>
+      s"{$arraysName[$index], ${s.length / 2}, false}"
+    }
+    val hexEnumsInit = enums.map(_.grouped(2).map("0x" + _).mkString("{", ", 
", "}"))
+    val enumsLenMax = enums.map(_.length / 2).max
+    val arraysInit =
+      s"""$indent1$indent2    uint8_t $arraysName[][$enumsLenMax] = 
${hexEnumsInit.mkString(
+          "{",
+          ", ",
+          "}",
+        )};\n"""
+
+    val (enumsInit, extraInit, primType, valType, fieldArg) = 
e.optPrimType.get match {
+      case PrimType.Double | PrimType.Float => (enums, "", "double", 
"floatpt", field)
+      case PrimType.HexBinary => (hexEnums, arraysInit, "HexBinary", 
"hexbinary", s"&$field")
+      case _ => (enums, "", "int64_t", "integer", field)
+    }
+
+    // Initialization for all enumerations
+    val enumsArray = s"enums_$localName"
+    val varsInit = extraInit +
+      s"""$indent1$indent2    $primType $enumsArray[] = ${enumsInit.mkString(

Review Comment:
   C compilers are pretty robust programs, so I doubt giant enums would blow 
them up.  But we very well might need to refactor the C implementation to be 
more readable when let loose on 4096 enums.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Add schema validation code to C generator [daffodil]

Reply via email to