(datafusion-comet) branch main updated: chore: Improve process for generating dynamic content into documentation (#2017)

mbutrovich Fri, 11 Jul 2025 12:12:22 -0700

This is an automated email from the ASF dual-hosted git repository.

mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git



The following commit(s) were added to refs/heads/main by this push:
     new 3654973d9 chore: Improve process for generating dynamic content into 
documentation (#2017)
3654973d9 is described below

commit 3654973d910c442d97791cc5e031524acf8a1cc0
Author: Andy Grove <agr...@apache.org>
AuthorDate: Fri Jul 11 13:11:12 2025 -0600

    chore: Improve process for generating dynamic content into documentation 
(#2017)
---
 docs/source/user-guide/compatibility.md            | 174 +++++++++++----------
 docs/source/user-guide/configs.md                  |   4 +
 docs/templates/compatibility-template.md           | 149 ------------------
 docs/templates/configs-template.md                 |  30 ----
 .../main/scala/org/apache/comet/GenerateDocs.scala |  54 +++++--
 5 files changed, 132 insertions(+), 279 deletions(-)

diff --git a/docs/source/user-guide/compatibility.md 
b/docs/source/user-guide/compatibility.md
index 84c4aab0e..ab911474e 100644
--- a/docs/source/user-guide/compatibility.md
+++ b/docs/source/user-guide/compatibility.md
@@ -131,94 +131,102 @@ Cast operations in Comet fall into three levels of 
support:
 
 The following cast operations are generally compatible with Spark except for 
the differences noted here.
 
-| From Type | To Type | Notes                                                  
                                                         |
-| --------- | ------- | 
---------------------------------------------------------------------------------------------------------------
 |
-| boolean   | byte    |                                                        
                                                         |
-| boolean   | short   |                                                        
                                                         |
-| boolean   | integer |                                                        
                                                         |
-| boolean   | long    |                                                        
                                                         |
-| boolean   | float   |                                                        
                                                         |
-| boolean   | double  |                                                        
                                                         |
-| boolean   | string  |                                                        
                                                         |
-| byte      | boolean |                                                        
                                                         |
-| byte      | short   |                                                        
                                                         |
-| byte      | integer |                                                        
                                                         |
-| byte      | long    |                                                        
                                                         |
-| byte      | float   |                                                        
                                                         |
-| byte      | double  |                                                        
                                                         |
-| byte      | decimal |                                                        
                                                         |
-| byte      | string  |                                                        
                                                         |
-| short     | boolean |                                                        
                                                         |
-| short     | byte    |                                                        
                                                         |
-| short     | integer |                                                        
                                                         |
-| short     | long    |                                                        
                                                         |
-| short     | float   |                                                        
                                                         |
-| short     | double  |                                                        
                                                         |
-| short     | decimal |                                                        
                                                         |
-| short     | string  |                                                        
                                                         |
-| integer   | boolean |                                                        
                                                         |
-| integer   | byte    |                                                        
                                                         |
-| integer   | short   |                                                        
                                                         |
-| integer   | long    |                                                        
                                                         |
-| integer   | float   |                                                        
                                                         |
-| integer   | double  |                                                        
                                                         |
-| integer   | string  |                                                        
                                                         |
-| long      | boolean |                                                        
                                                         |
-| long      | byte    |                                                        
                                                         |
-| long      | short   |                                                        
                                                         |
-| long      | integer |                                                        
                                                         |
-| long      | float   |                                                        
                                                         |
-| long      | double  |                                                        
                                                         |
-| long      | string  |                                                        
                                                         |
-| float     | boolean |                                                        
                                                         |
-| float     | byte    |                                                        
                                                         |
-| float     | short   |                                                        
                                                         |
-| float     | integer |                                                        
                                                         |
-| float     | long    |                                                        
                                                         |
-| float     | double  |                                                        
                                                         |
-| float     | string  | There can be differences in precision. For example, 
the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
-| double    | boolean |                                                        
                                                         |
-| double    | byte    |                                                        
                                                         |
-| double    | short   |                                                        
                                                         |
-| double    | integer |                                                        
                                                         |
-| double    | long    |                                                        
                                                         |
-| double    | float   |                                                        
                                                         |
-| double    | string  | There can be differences in precision. For example, 
the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
-| decimal   | byte    |                                                        
                                                         |
-| decimal   | short   |                                                        
                                                         |
-| decimal   | integer |                                                        
                                                         |
-| decimal   | long    |                                                        
                                                         |
-| decimal   | float   |                                                        
                                                         |
-| decimal   | double  |                                                        
                                                         |
-| decimal   | decimal |                                                        
                                                         |
-| decimal   | string  | There can be formatting differences in some case due 
to Spark using scientific notation where Comet does not    |
-| string    | boolean |                                                        
                                                         |
-| string    | byte    |                                                        
                                                         |
-| string    | short   |                                                        
                                                         |
-| string    | integer |                                                        
                                                         |
-| string    | long    |                                                        
                                                         |
-| string    | binary  |                                                        
                                                         |
-| string    | date    | Only supports years between 262143 BC and 262142 AD    
                                                         |
-| date      | string  |                                                        
                                                         |
-| timestamp | long    |                                                        
                                                         |
-| timestamp | string  |                                                        
                                                         |
-| timestamp | date    |                                                        
                                                         |
+<!-- WARNING! DO NOT MANUALLY MODIFY CONTENT BETWEEN THE BEGIN AND END TAGS -->
+
+<!--BEGIN:COMPAT_CAST_TABLE-->
+| From Type | To Type | Notes |
+|-|-|-|
+| boolean | byte |  |
+| boolean | short |  |
+| boolean | integer |  |
+| boolean | long |  |
+| boolean | float |  |
+| boolean | double |  |
+| boolean | string |  |
+| byte | boolean |  |
+| byte | short |  |
+| byte | integer |  |
+| byte | long |  |
+| byte | float |  |
+| byte | double |  |
+| byte | decimal |  |
+| byte | string |  |
+| short | boolean |  |
+| short | byte |  |
+| short | integer |  |
+| short | long |  |
+| short | float |  |
+| short | double |  |
+| short | decimal |  |
+| short | string |  |
+| integer | boolean |  |
+| integer | byte |  |
+| integer | short |  |
+| integer | long |  |
+| integer | float |  |
+| integer | double |  |
+| integer | string |  |
+| long | boolean |  |
+| long | byte |  |
+| long | short |  |
+| long | integer |  |
+| long | float |  |
+| long | double |  |
+| long | string |  |
+| float | boolean |  |
+| float | byte |  |
+| float | short |  |
+| float | integer |  |
+| float | long |  |
+| float | double |  |
+| float | string | There can be differences in precision. For example, the 
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| double | boolean |  |
+| double | byte |  |
+| double | short |  |
+| double | integer |  |
+| double | long |  |
+| double | float |  |
+| double | string | There can be differences in precision. For example, the 
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| decimal | byte |  |
+| decimal | short |  |
+| decimal | integer |  |
+| decimal | long |  |
+| decimal | float |  |
+| decimal | double |  |
+| decimal | decimal |  |
+| decimal | string | There can be formatting differences in some case due to 
Spark using scientific notation where Comet does not |
+| string | boolean |  |
+| string | byte |  |
+| string | short |  |
+| string | integer |  |
+| string | long |  |
+| string | binary |  |
+| string | date | Only supports years between 262143 BC and 262142 AD |
+| date | string |  |
+| timestamp | long |  |
+| timestamp | string |  |
+| timestamp | date |  |
+<!--END:COMPAT_CAST_TABLE-->
 
 ### Incompatible Casts
 
 The following cast operations are not compatible with Spark for all inputs and 
are disabled by default.
 
-| From Type | To Type   | Notes                                                
                                                                                
                       |
-| --------- | --------- | 
-----------------------------------------------------------------------------------------------------------------------------------------------------------
 |
-| integer   | decimal   | No overflow check                                    
                                                                                
                       |
-| long      | decimal   | No overflow check                                    
                                                                                
                       |
-| float     | decimal   | There can be rounding differences                    
                                                                                
                       |
-| double    | decimal   | There can be rounding differences                    
                                                                                
                       |
-| string    | float     | Does not support inputs ending with 'd' or 'f'. Does 
not support 'inf'. Does not support ANSI mode.                                  
                       |
-| string    | double    | Does not support inputs ending with 'd' or 'f'. Does 
not support 'inf'. Does not support ANSI mode.                                  
                       |
-| string    | decimal   | Does not support inputs ending with 'd' or 'f'. Does 
not support 'inf'. Does not support ANSI mode. Returns 0.0 instead of null if 
input contains no digits |
-| string    | timestamp | Not all valid formats are supported                  
                                                                                
                       |
-| binary    | string    | Only works for binary data representing valid UTF-8 
strings                                                                         
                        |
+<!-- WARNING! DO NOT MANUALLY MODIFY CONTENT BETWEEN THE BEGIN AND END TAGS -->
+
+<!--BEGIN:INCOMPAT_CAST_TABLE-->
+| From Type | To Type | Notes |
+|-|-|-|
+| integer | decimal  | No overflow check |
+| long | decimal  | No overflow check |
+| float | decimal  | There can be rounding differences |
+| double | decimal  | There can be rounding differences |
+| string | float  | Does not support inputs ending with 'd' or 'f'. Does not 
support 'inf'. Does not support ANSI mode. |
+| string | double  | Does not support inputs ending with 'd' or 'f'. Does not 
support 'inf'. Does not support ANSI mode. |
+| string | decimal  | Does not support inputs ending with 'd' or 'f'. Does not 
support 'inf'. Does not support ANSI mode. Returns 0.0 instead of null if input 
contains no digits |
+| string | timestamp  | Not all valid formats are supported |
+| binary | string  | Only works for binary data representing valid UTF-8 
strings |
+<!--END:INCOMPAT_CAST_TABLE-->
 
 ### Unsupported Casts
 
diff --git a/docs/source/user-guide/configs.md 
b/docs/source/user-guide/configs.md
index 6544909aa..00adc5173 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -27,6 +27,9 @@ TO MODIFY THIS CONTENT MAKE SURE THAT YOU MAKE YOUR CHANGES 
TO THE TEMPLATE FILE
 
 Comet provides the following configuration settings.
 
+<!-- WARNING! DO NOT MANUALLY MODIFY CONTENT BETWEEN THE BEGIN AND END TAGS -->
+
+<!--BEGIN:CONFIG_TABLE-->
 | Config | Description | Default Value |
 |--------|-------------|---------------|
 | spark.comet.batchSize | The columnar batch size, i.e., the maximum number of 
rows that a batch can contain. | 8192 |
@@ -93,3 +96,4 @@ Comet provides the following configuration settings.
 | spark.comet.shuffle.preferDictionary.ratio | The ratio of total values to 
distinct values in a string column to decide whether to prefer dictionary 
encoding when shuffling the column. If the ratio is higher than this config, 
dictionary encoding will be used on shuffling string column. This config is 
effective if it is higher than 1.0. Note that this config is only used when 
`spark.comet.exec.shuffle.mode` is `jvm`. | 10.0 |
 | spark.comet.shuffle.sizeInBytesMultiplier | Comet reports smaller sizes for 
shuffle due to using Arrow's columnar memory format and this can result in 
Spark choosing a different join strategy due to the estimated size of the 
exchange being smaller. Comet will multiple sizeInBytes by this amount to avoid 
regressions in join strategy. | 1.0 |
 | spark.comet.sparkToColumnar.supportedOperatorList | A comma-separated list 
of operators that will be converted to Arrow columnar format when 
'spark.comet.sparkToColumnar.enabled' is true | Range,InMemoryTableScan |
+<!--END:CONFIG_TABLE-->
diff --git a/docs/templates/compatibility-template.md 
b/docs/templates/compatibility-template.md
deleted file mode 100644
index d26874820..000000000
--- a/docs/templates/compatibility-template.md
+++ /dev/null
@@ -1,149 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<!--
-  TO MODIFY THIS CONTENT MAKE SURE THAT YOU MAKE YOUR CHANGES TO THE TEMPLATE 
FILE
-  (docs/templates/compatibility-template.md) AND NOT THE GENERATED FILE
-  (docs/source/user-guide/compatibility.md) OTHERWISE YOUR CHANGES MAY BE LOST
--->
-
-# Compatibility Guide
-
-Comet aims to provide consistent results with the version of Apache Spark that 
is being used.
-
-This guide offers information about areas of functionality where there are 
known differences.
-
-## Parquet
-
-### Data Type Support
-
-Comet does not support reading decimals encoded in binary format.
-
-### Parquet Scans
-
-Comet currently has three distinct implementations of the Parquet scan 
operator. The configuration property
-`spark.comet.scan.impl` is used to select an implementation.
-
-| Implementation          | Description                                        
                                                                                
                                                       |
-| ----------------------- | 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 |
-| `native_comet`          | This is the default implementation. It provides 
strong compatibility with Spark but does not support complex types.             
                                                          |
-| `native_datafusion`     | This implementation delegates to DataFusion's 
`DataSourceExec`.                                                               
                                                            |
-| `native_iceberg_compat` | This implementation also delegates to DataFusion's 
`DataSourceExec` but uses a hybrid approach of JVM and native code. This scan 
is designed to be integrated with Iceberg in the future. |
-
-The new (and currently experimental) `native_datafusion` and 
`native_iceberg_compat` scans provide the following benefits over the 
`native_comet`
-implementation:
-
-- Leverages the DataFusion community's ongoing improvements to `DataSourceExec`
-- Provides support for reading complex types (structs, arrays, and maps)
-- Removes the use of reusable mutable-buffers in Comet, which is complex to 
maintain
-- Improves performance
-
-The new scans currently have the following limitations:
-
-Issues common to both `native_datafusion` and `native_iceberg_compat`:
-
-- When reading Parquet files written by systems other than Spark that contain 
columns with the logical types `UINT_8`
-  or `UINT_16`, Comet will produce different results than Spark because Spark 
does not preserve or understand these
-  logical types. Arrow-based readers, such as DataFusion and Comet do respect 
these types and read the data as unsigned
-  rather than signed. By default, Comet will fall back to Spark when scanning 
Parquet files containing `byte` or `short`
-  types (regardless of the logical type). This behavior can be disabled by 
setting
-  `spark.comet.scan.allowIncompatible=true`.
-- There is a known performance issue when pushing filters down to Parquet. See 
the [Comet Tuning Guide] for more
-  information.
-- Reading maps containing complex types can result in errors or incorrect 
results [#1754]
-- `PARQUET_FIELD_ID_READ_ENABLED` is not respected [#1758]
-- There are failures in the Spark SQL test suite when enabling these new scans 
(tracking issues: [#1542] and [#1545]).
-- No support for default values that are nested types (e.g., maps, arrays, 
structs). Literal default values are supported.
-- Setting Spark configs `ignoreMissingFiles` or `ignoreCorruptFiles` to `true` 
is not compatible with `native_datafusion` scan.
-
-Issues specific to `native_datafusion`:
-
-- Bucketed scans are not supported
-- No support for row indexes
-
-[#1545]: https://github.com/apache/datafusion-comet/issues/1545
-[#1542]: https://github.com/apache/datafusion-comet/issues/1542
-[#1754]: https://github.com/apache/datafusion-comet/issues/1754
-[#1758]: https://github.com/apache/datafusion-comet/issues/1758
-[Comet Tuning Guide]: tuning.md
-
-## ANSI mode
-
-Comet currently ignores ANSI mode in most cases, and therefore can produce 
different results than Spark. By default,
-Comet will fall back to Spark if ANSI mode is enabled. To enable Comet to 
accelerate queries when ANSI mode is enabled,
-specify `spark.comet.ansi.enabled=true` in the Spark configuration. Comet's 
ANSI support is experimental and should not
-be used in production.
-
-There is an [epic](https://github.com/apache/datafusion-comet/issues/313) 
where we are tracking the work to fully implement ANSI support.
-
-## Floating number comparison
-
-Spark normalizes NaN and zero for floating point numbers for several cases. 
See `NormalizeFloatingNumbers` optimization rule in Spark.
-However, one exception is comparison. Spark does not normalize NaN and zero 
when comparing values
-because they are handled well in Spark (e.g., 
`SQLOrderingUtil.compareFloats`). But the comparison
-functions of arrow-rs used by DataFusion do not normalize NaN and zero (e.g., 
[arrow::compute::kernels::cmp::eq](https://docs.rs/arrow/latest/arrow/compute/kernels/cmp/fn.eq.html#)).
-So Comet will add additional normalization expression of NaN and zero for 
comparison.
-
-There is a known bug with using count(distinct) within aggregate queries, 
where each NaN value will be counted 
-separately [#1824](https://github.com/apache/datafusion-comet/issues/1824).
-
-## Incompatible Expressions
-
-Some Comet native expressions are not 100% compatible with Spark and are 
disabled by default. These expressions
-will fall back to Spark but can be enabled by setting 
`spark.comet.expression.allowIncompatible=true`.
-
-## Array Expressions
-
-Comet has experimental support for a number of array expressions. These are 
experimental and currently marked
-as incompatible and can be enabled by setting 
`spark.comet.expression.allowIncompatible=true`.
-
-## Regular Expressions
-
-Comet uses the Rust regexp crate for evaluating regular expressions, and this 
has different behavior from Java's
-regular expression engine. Comet will fall back to Spark for patterns that are 
known to produce different results, but
-this can be overridden by setting `spark.comet.regexp.allowIncompatible=true`.
-
-## Cast
-
-Cast operations in Comet fall into three levels of support:
-
-- **Compatible**: The results match Apache Spark
-- **Incompatible**: The results may match Apache Spark for some inputs, but 
there are known issues where some inputs
-  will result in incorrect results or exceptions. The query stage will fall 
back to Spark by default. Setting
-  `spark.comet.cast.allowIncompatible=true` will allow all incompatible casts 
to run natively in Comet, but this is not
-  recommended for production use.
-- **Unsupported**: Comet does not provide a native version of this cast 
expression and the query stage will fall back to
-  Spark.
-
-### Compatible Casts
-
-The following cast operations are generally compatible with Spark except for 
the differences noted here.
-
-<!--COMPAT_CAST_TABLE-->
-
-### Incompatible Casts
-
-The following cast operations are not compatible with Spark for all inputs and 
are disabled by default.
-
-<!--INCOMPAT_CAST_TABLE-->
-
-### Unsupported Casts
-
-Any cast not listed in the previous tables is currently unsupported. We are 
working on adding more. See the
-[tracking issue](https://github.com/apache/datafusion-comet/issues/286) for 
more details.
diff --git a/docs/templates/configs-template.md 
b/docs/templates/configs-template.md
deleted file mode 100644
index c6076afd7..000000000
--- a/docs/templates/configs-template.md
+++ /dev/null
@@ -1,30 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<!-- 
-  TO MODIFY THIS CONTENT MAKE SURE THAT YOU MAKE YOUR CHANGES TO THE TEMPLATE 
FILE  
-  (docs/templates/configs-template.md) AND NOT THE GENERATED FILE
-  (docs/source/user-guide/configs.md) OTHERWISE YOUR CHANGES MAY BE LOST
--->
-
-# Comet Configuration Settings
-
-Comet provides the following configuration settings.
-
-<!--CONFIG_TABLE-->
diff --git a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala 
b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
index d69eecfa0..d8cc62cf9 100644
--- a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
+++ b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
@@ -19,9 +19,9 @@
 
 package org.apache.comet
 
-import java.io.{BufferedOutputStream, FileOutputStream}
+import java.io.{BufferedOutputStream, BufferedReader, FileOutputStream, 
FileReader}
 
-import scala.io.Source
+import scala.collection.mutable.ListBuffer
 
 import org.apache.spark.sql.catalyst.expressions.Cast
 
@@ -40,11 +40,12 @@ object GenerateDocs {
   }
 
   private def generateConfigReference(): Unit = {
-    val templateFilename = "docs/templates/configs-template.md"
-    val outputFilename = "docs/source/user-guide/configs.md"
-    val w = new BufferedOutputStream(new FileOutputStream(outputFilename))
-    for (line <- Source.fromFile(templateFilename).getLines()) {
-      if (line.trim == "<!--CONFIG_TABLE-->") {
+    val filename = "docs/source/user-guide/configs.md"
+    val lines = readFile(filename)
+    val w = new BufferedOutputStream(new FileOutputStream(filename))
+    for (line <- lines) {
+      w.write(s"${line.stripTrailing()}\n".getBytes)
+      if (line.trim == "<!--BEGIN:CONFIG_TABLE-->") {
         val publicConfigs = CometConf.allConfs.filter(_.isPublic)
         val confs = publicConfigs.sortBy(_.key)
         w.write("| Config | Description | Default Value |\n".getBytes)
@@ -56,19 +57,18 @@ object GenerateDocs {
             w.write(s"| ${conf.key} | ${conf.doc.trim} | 
${conf.defaultValueString} |\n".getBytes)
           }
         }
-      } else {
-        w.write(s"${line.trim}\n".getBytes)
       }
     }
     w.close()
   }
 
   private def generateCompatibilityGuide(): Unit = {
-    val templateFilename = "docs/templates/compatibility-template.md"
-    val outputFilename = "docs/source/user-guide/compatibility.md"
-    val w = new BufferedOutputStream(new FileOutputStream(outputFilename))
-    for (line <- Source.fromFile(templateFilename).getLines()) {
-      if (line.trim == "<!--COMPAT_CAST_TABLE-->") {
+    val filename = "docs/source/user-guide/compatibility.md"
+    val lines = readFile(filename)
+    val w = new BufferedOutputStream(new FileOutputStream(filename))
+    for (line <- lines) {
+      w.write(s"${line.stripTrailing()}\n".getBytes)
+      if (line.trim == "<!--BEGIN:COMPAT_CAST_TABLE-->") {
         w.write("| From Type | To Type | Notes |\n".getBytes)
         w.write("|-|-|-|\n".getBytes)
         for (fromType <- CometCast.supportedTypes) {
@@ -86,7 +86,7 @@ object GenerateDocs {
             }
           }
         }
-      } else if (line.trim == "<!--INCOMPAT_CAST_TABLE-->") {
+      } else if (line.trim == "<!--BEGIN:INCOMPAT_CAST_TABLE-->") {
         w.write("| From Type | To Type | Notes |\n".getBytes)
         w.write("|-|-|-|\n".getBytes)
         for (fromType <- CometCast.supportedTypes) {
@@ -103,10 +103,30 @@ object GenerateDocs {
             }
           }
         }
-      } else {
-        w.write(s"${line.trim}\n".getBytes)
       }
     }
     w.close()
   }
+
+  /** Read file into memory */
+  private def readFile(filename: String): Seq[String] = {
+    val r = new BufferedReader(new FileReader(filename))
+    val buffer = new ListBuffer[String]()
+    var line = r.readLine()
+    var skipping = false
+    while (line != null) {
+      if (line.startsWith("<!--BEGIN:")) {
+        buffer += line
+        skipping = true
+      } else if (line.startsWith("<!--END:")) {
+        buffer += line
+        skipping = false
+      } else if (!skipping) {
+        buffer += line
+      }
+      line = r.readLine()
+    }
+    r.close()
+    buffer.toSeq
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

(datafusion-comet) branch main updated: chore: Improve process for generating dynamic content into documentation (#2017)

Reply via email to