This is an automated email from the ASF dual-hosted git repository.
jadams-tresys pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil-sbt.git
The following commit(s) were added to refs/heads/main by this push:
new 20ef87e Add flowerpress functionality to daffodil-sbt
20ef87e is described below
commit 20ef87eada59df2e05774edf0c0d0166e2070dc3
Author: Josh Adams <[email protected]>
AuthorDate: Mon Apr 27 08:34:36 2026 -0400
Add flowerpress functionality to daffodil-sbt
Notable improvements from the current released flowerpress:
- Works on XSLT files as well
- Only packages files that are referenced by the projects files
---
README.md | 66 +++++
.../scala/org/apache/daffodil/DaffodilPlugin.scala | 271 ++++++++++++++++++++-
2 files changed, 336 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 969c65f..f71e118 100644
--- a/README.md
+++ b/README.md
@@ -323,6 +323,72 @@ root `src/` directory, and all test source and resource
files to be in a root
`test/` directory. Source files are those that end with `*.scala` or `*.java`,
and resource files are anything else.
+### Flatten Schemas
+
+Many non-Daffodil XML/XSD programs (such as XML validators) do not resolve
+schemaLocation's in the same way that Daffodil does and will often stuggle to
+find schemas that aren't in the same directory as the root schema. By
flattening
+the directory structure at a common root (ie the directory containing 'com/'
+and/or 'org/' we can rename all of the schemas while avoiding any conflicts in
+schema name for generically named schemas, like 'baseFormat.dfdl.xsd'. Having
+all of the schemas and schemaLocation's renamed should allow tools with less
+robust schemaLocation resolvers to just work.
+
+This plugin has functionality to flatten the directory structure of 1 or more
+schema projects, renaming the schema files and upating schemaLocation's as
+necessary.
+
+```bash
+sbt daffodilFlattenSchemas
+```
+
+The renaming works as follows:
+
+`org/apache/daffodil/xsd/main.dfdl.xsd`
+
+will be renamed to:
+
+`org__apache__daffodil__xsd__main.dfdl.xsd`
+
+Note: Original files are not modified, they are simply copied to the specified
+output directory with the new name and updated schemaLocation's.
+
+#### Flatten Schemas Settings
+
+
+##### daffodilFlattenSchemasTarget
+
+Sets the target path for outputting the flattened schema zip file.
+
+Default location: `<name>-<version>-flat.zip` in the `target/` directory.
+
+##### daffodilFlattenSchemasReferencePatterns
+
+Defines the regular expression patterns used for matching references in schema
+documents. The regular expressions must define a capture group to the
reference
+string.
+
+By default it supports XSD include/import `schemaLocation` and XSLT
+`href/document` attributes.
+
+##### daffodilFlattenSchemas / includeFilter
+
+Defines a filter for detecting which files in the resource directories of the
+current project should be included in the flattened schema package. Files from
+the current projects dependencies will be added if they are referenced by the
+current project's schema files.
+
+By default only files with the following extensions are included:
+`*.xsd | *.xsl | *.xslt | *.xml`
+
+##### daffodilFlattenSchemas / excludeFilter
+
+Defines a filter for which files to exclude from the flattened schema package.
+Again this setting only effects files from the current project. It will not
+effect files from dependencies of the current project.
+
+Hidden files/directories are excluded by default.
+
### Cross-Building
In some cases it is helpful to have a single SBT project that supports the
diff --git a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala
b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala
index 4592b40..ef4e123 100644
--- a/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala
+++ b/src/main/scala/org/apache/daffodil/DaffodilPlugin.scala
@@ -18,8 +18,12 @@
package org.apache.daffodil
import java.io.File
+import java.net.{ URI, URL, URLClassLoader }
+import java.nio.charset.Charset
+import java.nio.file.{ FileSystem, FileSystems, Files, Paths }
import scala.language.implicitConversions
import scala.util.Properties
+import scala.util.matching.Regex
import sbt.Keys._
import sbt._
@@ -63,6 +67,15 @@ object DaffodilPlugin extends AutoPlugin {
val daffodilTdmlUsesPackageBin = settingKey[Boolean](
"Whether or not TDML files use the saved parsers created by
daffodilPackageBin"
)
+ val daffodilFlattenTarget = settingKey[File](
+ "File to write the flattened schemas package to"
+ )
+ val daffodilFlattenSchemas = taskKey[Unit](
+ "flatten the directory structure of all schemas and schema dependencies
to a single common directory and update 'schemaLocation' paths to match"
+ )
+ val daffodilFlattenResourceReferencePatterns = settingKey[Seq[Regex]](
+ """Sequence of patterns that will match resource references, ie
'schemaLocation="/org/apache/..."'. Note that the pattern must capture the
reference in the first capture group"""
+ )
/**
* Class to define daffodilPackageBinInfos, auto-imported to simplify sbt
configs
@@ -593,6 +606,259 @@ object DaffodilPlugin extends AutoPlugin {
} else {
existingMappings
}
+ },
+
+ /**
+ * Everything below here is for daffodilFlattenSchemas
+ */
+ daffodilFlattenTarget := target.value /
s"${name.value}-${version.value}-flat.zip",
+
+ /**
+ * Whether or not to publish the flattened schemas zip. Defaults to false.
+ *
+ * If projects want to publish flattened schemas then they must explicitly
enable it by
+ * setting 'daffodilFlattenSchemas / publishArtifact := true'.
+ *
+ * If false, flattened schemas will not be created unless you explicitly
run the
+ * daffodilFlattenSchemas.
+ */
+ daffodilFlattenSchemas / publishArtifact := true,
+
+ daffodilFlattenSchemas / artifact := Artifact(
+ name.value,
+ "flat",
+ "zip",
+ Some("flat"),
+ Vector(),
+ None
+ ),
+
+ /**
+ * Only grab the file types that need to be flattened. These files are only
+ * grabbed from the root project. Any referenced files will be pulled in
+ * from the classpath. Note that XML files are commonly used in XSLT for
+ * settings.
+ */
+ daffodilFlattenSchemas / includeFilter := "*.xsd" | "*.xsl" | "*.xslt" |
"*.xml",
+ daffodilFlattenSchemas / excludeFilter := HiddenFileFilter,
+
+ daffodilFlattenResourceReferencePatterns := List(
+ """(?<!xsi:)schemaLocation=\"([^\"]*)\"""".r,
+ "href=\"([^\"]*)\"".r,
+ "document[(]'([^']*)'[)]".r
+ ),
+
+ daffodilFlattenSchemas / products := {
+
+ val logger = streams.value.log
+ val filter =
+ (daffodilFlattenSchemas / includeFilter).value --
(daffodilFlattenSchemas / excludeFilter).value
+
+ val flatDir = target.value / "flatDir"
+ if (flatDir.exists())
+ IO.delete(flatDir)
+ IO.createDirectory(flatDir)
+
+ val projectURIs = (Compile / resourceDirectories).value.map { root =>
+ (root ** filter).get.map(_.toURI).toList
+ }.flatten
+
+ val resourceDirURLs = (Compile / resourceDirectories).value
+ .filter(_.exists())
+ .map(_.toURI.toURL)
+
+ /**
+ * Create a URLClassLoader object with URLs to all resources used by the
+ * project. The class loader will be used to resolve references made
+ * within the flattened files.
+ */
+ val allClasspathURLs = (Test /
externalDependencyClasspath).value.map(_.data.toURI.toURL)
+ val classLoader = new URLClassLoader((resourceDirURLs ++
allClasspathURLs).toArray)
+
+ val referenceRegexes = daffodilFlattenResourceReferencePatterns.value
+ val jarRegex = "jar:file:(.*/(.*).jar)!(.*)".r
+ val jarFileSystems = scala.collection.mutable.Map[String, FileSystem]()
+
+ def getRootPath(path: URI) = {
+ path.getScheme match {
+ case "file" => {
+ (Compile / resourceDirectories).value
+ .find(dir => Paths.get(path).startsWith(dir.getPath))
+ .get
+ .toPath
+ }
+ }
+ }
+
+ val seen = scala.collection.mutable.Set[URL]()
+ val unprocessed = scala.collection.mutable.Stack[URI]()
+ val unresolved = scala.collection.mutable.ArrayBuffer[(URI, String)]()
+ unprocessed.pushAll(projectURIs)
+ seen ++= projectURIs.map(_.toURL)
+ while (!unprocessed.isEmpty) {
+ val contextURI = unprocessed.pop
+ val bytes = contextURI.toURL.openStream().readAllBytes()
+
+ // Get a Java Path for the current file (whether it is in a JAR or
+ // just a regular file) and a Path for where the current file will be
+ // moved to in the flattened directory structure. Note that using a
+ // Java Path on a regular file will use the default FileSystem while
for
+ // JARs a FileSystem is created for interacting with the files inside
+ // the JAR. This allows all Path functions like resolveSibling/exists
to
+ // work with both regular files or files contained inside a JAR.
+ val (contextPath, flatPath) = contextURI.getScheme match {
+ case "jar" =>
+ contextURI.toString match {
+ case jarRegex(jarPath, jarName, path) => {
+ val fs = jarFileSystems.getOrElseUpdate(
+ jarPath.replaceAll("/+", "/"),
+ FileSystems.newFileSystem(contextURI, new
java.util.HashMap[String, Any]())
+ )
+ val cPath = fs.getPath(path)
+ (
+ cPath,
+ Paths.get(
+ flatDir.toString,
+ s"${jarName}__${cPath.toString.tail.replaceAll("/", "__")}"
+ )
+ )
+ }
+ case _ =>
+ throw new IllegalArgumentException(s"Unable to parse JAR URI:
$contextURI")
+ }
+ case "file" => {
+ val path = Paths.get(contextURI)
+ val root = getRootPath(contextURI)
+ (
+ path,
+ Paths.get(
+ flatDir.toString,
+
root.relativize(path).toString.replaceAllLiterally(File.separator, "__")
+ )
+ )
+ }
+ case _ =>
+ throw new IllegalArgumentException(s"Unrecognized URI scheme:
$contextURI")
+ }
+
+ assert(
+ !Files.exists(flatPath),
+ s"File $flatPath already exists and would be overwritten, aborting!"
+ )
+ val bw = Files.newBufferedWriter(flatPath)
+ val fileAsString = new String(bytes, Charset.defaultCharset())
+
+ val references = referenceRegexes.flatMap { re =>
+ re.findAllIn(fileAsString).matchData.map(_.group(1))
+ }
+
+ val resolvedRefs = references.flatMap { ref =>
+ val optResolved = if (ref.startsWith("/")) {
+ // Dealing with an absolute path
+ Option(classLoader.findResource(ref.tail))
+ } else {
+ // Relative path
+ val refPath = contextPath.resolveSibling(ref).normalize()
+ if (Files.exists(refPath)) {
+ // Referenced path exists in either the same root resource
+ // diretory or jar file as the context schema
+ Some(refPath.toUri.toURL)
+ } else {
+ val optResolvedRelative = contextURI.getScheme match {
+ case "file" => {
+ // Need to check other resource directories
+ val contextRoot = getRootPath(contextURI)
+ val relPath = contextRoot.relativize(refPath)
+ val resolvedRoot = (Compile /
resourceDirectories).value.find(root =>
+ Files.exists(root.toPath.resolve(relPath))
+ )
+ resolvedRoot.map(_.toPath.resolve(relPath).toUri.toURL)
+ }
+ case "jar" =>
+ // Nothing else to check, resolveSibling should have found
it in the same jar
+ None
+ }
+ // The orElse call is to support the deprecated behavior of
+ // resolving relative paths asif they were absolute
+ optResolvedRelative.orElse(Option(classLoader.findResource(ref)))
+ }
+ }
+ if (optResolved.isEmpty)
+ unresolved.append((contextURI, ref))
+ optResolved.map(resolved => ref -> resolved)
+ }.toMap
+
+ val updatedFileAsString = {
+ resolvedRefs.foldLeft(fileAsString) {
+ case (input, (ref, optResolvedURL)) => {
+ // For each reference replace each instance of it in the
+ // file with the same reference but with "/" changed to "__"
+ val resolvedURI = optResolvedURL.toURI
+ val relativized = resolvedURI.getScheme match {
+ case "jar" =>
+ resolvedURI.toString match {
+ case jarRegex(_, jarName, path) =>
+ s"$jarName/${path.tail}"
+ }
+ case _ => {
+ val resolvedRoot = getRootPath(resolvedURI)
+ resolvedRoot.relativize(Paths.get(resolvedURI)).toString
+ }
+ }
+ input.replaceAll(ref, relativized.replaceAll("/", "__"))
+ }
+ }
+ }
+
+ bw.write(updatedFileAsString)
+ bw.close()
+ val unseen = resolvedRefs.values.filter(seen.add(_))
+ unprocessed.pushAll(unseen.map(_.toURI))
+ }
+
+ // Close any open JAR FileSystems
+ jarFileSystems.values.foreach(_.close())
+
+ // Error unresolved references
+ unresolved.foreach { case (context, ref) =>
+ logger.error(s"Unable to resolve reference to $ref from source file
$context")
+ }
+
+ // Error out if we have any unresolved references
+ if (!unresolved.isEmpty)
+ throw new MessageOnlyException(
+ "Unable to resolve one or more references while flattening"
+ )
+
+ /* Create zip file containing all flattened schemas */
+ val flattenedFiles = IO.listFiles(flatDir).sorted
+ val sources = flattenedFiles.map(file => file -> file.getName())
+ IO.zip(sources, daffodilFlattenTarget.value, Package.defaultTimestamp)
+ logger.info(
+ s"Generated flattened schema package at
${daffodilFlattenTarget.value.toString}"
+ )
+ Seq(daffodilFlattenTarget.value)
+ },
+
+ daffodilFlattenSchemas := {
+ (daffodilFlattenSchemas / products).value.head
+ },
+
+ artifacts ++= {
+ if ((daffodilFlattenSchemas / publishArtifact).value) {
+ Seq((daffodilFlattenSchemas / artifact).value)
+ } else {
+ Seq.empty
+ }
+ },
+ packagedArtifacts ++= {
+ if ((daffodilFlattenSchemas / publishArtifact).value) {
+ Map(
+ (daffodilFlattenSchemas / artifact).value -> (daffodilFlattenSchemas
/ products).value.head
+ )
+ } else {
+ Map.empty[Artifact, File]
+ }
}
) ++
inConfig(Compile)(packageDaffodilBinSettings) ++
@@ -950,7 +1216,10 @@ object DaffodilPlugin extends AutoPlugin {
publish := {},
publishLocal := {},
publishM2 := {},
- publish / skip := true
+ publish / skip := true,
+ // Only the root project should create/publish flattened schemas
+ daffodilFlattenSchemas / products := Seq(),
+ daffodilFlattenSchemas / publishArtifact := false
)
// create a subproject for each of the daffodil cross versions. Each
subproject is a clone