GavinRay97 edited a comment on issue #12570:
URL: https://github.com/apache/arrow/issues/12570#issuecomment-1059852761


   Here is a Node.js script to download from the Nightlies and extract the 
assets into Maven repository structure:
   
   ```json
   {
       "name": "arrow-download-nightly-as-maven-repo",
       "scripts": {
           "start": "node index.mjs"
       },
       "dependencies": {
           "cross-fetch": "^3.1.5",
           "jsdom": "^19.0.0"
       }
   }
   ```
   ```js
   // index.mjs
   // Run with: $ node index.mjs
   import fetch from "cross-fetch"
   import fs from "fs"
   import asyncFS from "fs/promises"
   import { JSDOM } from "jsdom"
   import path from "path"
   import { fileURLToPath } from "url"
   
   // Polyfill "__dirname" for Node.js ECMAScript Module filetype
   const __dirname = path.dirname(fileURLToPath(import.meta.url))
   
   const ARROW_NIGHTLY_TAG_URL =
       
"https://github.com/ursacomputing/crossbow/releases/tag/nightly-2022-03-03-0-github-java-jars";
   
   async function main() {
       extractArrowNightlyJarsToLocalM2Repo(ARROW_NIGHTLY_TAG_URL)
   }
   
   main().catch((err) => {
       console.error(err)
       process.exit(1)
   })
   
   async function extractArrowNightlyJarsToLocalM2Repo(arrowNightlyTagUrl) {
       // Parse HTML to DOM
       const dom = await JSDOM.fromURL(arrowNightlyTagUrl)
       const document = dom.window.document
   
       // Get all <li> tags containing the asset name and download URL
       const assetLinkEls = document.querySelectorAll("li.Box-row")
       const assets = []
       for (const el of assetLinkEls) {
           const anchorTag = el.querySelector("a")
           const assetFilename = anchorTag.textContent.trim()
           const link = anchorTag.href
           if (assetFilename.includes("Source code")) continue
           const { library, version } = 
getLibraryAndVersionFromAssetFilename(assetFilename)
           if (assets[library]) {
               assets[library].push({ version, link, assetFilename })
           } else {
               assets[library] = [{ version, link, assetFilename }]
           }
       }
   
       for (const [library, versions] of Object.entries(assets)) {
           for (const { version, link, assetFilename } of versions) {
               const basePath = "org/apache/arrow"
               const libPath = `${library}/${version}`
               const fullPath = path.join(__dirname, "../", basePath, libPath)
               asyncFS.mkdir(fullPath, { recursive: true })
               console.log("Downloading " + assetFilename + " to " + fullPath)
               await downloadUrlAssetToPath(link, path.join(fullPath, 
assetFilename))
           }
       }
   }
   
   async function downloadUrlAssetToPath(url, filepath) {
       const request = await fetch(url)
       const fileStream = fs.createWriteStream(filepath)
       return new Promise((resolve, reject) => {
           request.body.pipe(fileStream)
           request.body.on("error", reject)
           fileStream.on("finish", resolve)
       })
   }
   
   // M2 repo folder format:
   // org/apache/arrow/<lib-name>/<version>/<lib-name>-<version>.(ext)
   function getLibraryAndVersionFromAssetFilename(filename) {
       const libraryAndVersionRegex = 
/(?<library>.+)-(?<version>\d\.\d\.\d.dev\d+)/
       return filename.match(libraryAndVersionRegex)?.groups
   }
   ```
   
   ```sh
   user@MSI:~/projects/arrow-download-nightly-as-maven-repo$ tree org/
   org/
   └── apache
       └── arrow
           ├── arrow-algorithm
           │   └── 8.0.0.dev165
           │       ├── arrow-algorithm-8.0.0.dev165-javadoc.jar
           │       ├── arrow-algorithm-8.0.0.dev165-sources.jar
           │       ├── arrow-algorithm-8.0.0.dev165-tests.jar
           │       ├── arrow-algorithm-8.0.0.dev165.jar
           │       └── arrow-algorithm-8.0.0.dev165.pom
           ├── arrow-avro
           │   └── 8.0.0.dev165
           │       ├── arrow-avro-8.0.0.dev165-javadoc.jar
           │       ├── arrow-avro-8.0.0.dev165-sources.jar
           │       ├── arrow-avro-8.0.0.dev165-tests.jar
           │       ├── arrow-avro-8.0.0.dev165.jar
           │       └── arrow-avro-8.0.0.dev165.pom
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to