pgwhalen commented on code in PR #77:
URL: https://github.com/apache/datafusion-java/pull/77#discussion_r3298498467
##########
core/src/main/java/org/apache/datafusion/NativeLibraryLoader.java:
##########
@@ -19,16 +19,150 @@
package org.apache.datafusion;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.FileAlreadyExistsException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+/**
+ * Loads the {@code datafusion_jni} native library on demand.
+ *
+ * <p>The loader first tries {@link System#loadLibrary(String)} so that
+ * operators can override the bundled library by placing a build on
+ * {@code java.library.path} (for example via
+ * {@code -Djava.library.path=...} or {@code LD_LIBRARY_PATH}). If that
+ * fails the loader extracts the platform-specific library from the JAR
+ * resource tree and loads it via {@link System#load(String)}.
+ *
+ * <p>The bundled libraries live at the conventional path
+ * {@code org/apache/datafusion/<os>/<arch>/<libfile>}.
+ * Extracted files are written under
+ * {@code $TMPDIR/datafusion-java/<sha256>/} so that concurrent JVMs
+ * sharing a temp directory converge on the same file rather than each
+ * extracting their own copy.
+ */
public final class NativeLibraryLoader {
- private static final String LIBRARY_NAME = "datafusion_jni";
- private static boolean loaded = false;
+
+ private static final String TMP_DIR_NAME = "datafusion-java";
+
+ private static volatile boolean loaded;
private NativeLibraryLoader() {}
public static synchronized void loadLibrary() {
- if (!loaded) {
- System.loadLibrary(LIBRARY_NAME);
+ if (loaded) {
+ return;
+ }
+ if (tryLoadFromLibraryPath()) {
loaded = true;
+ return;
+ }
+ loadFromClasspath();
+ loaded = true;
+ }
+
+ private static boolean tryLoadFromLibraryPath() {
+ try {
+ System.loadLibrary(Platform.LIBRARY_NAME);
+ return true;
+ } catch (UnsatisfiedLinkError ignored) {
+ return false;
+ }
+ }
+
+ private static void loadFromClasspath() {
+ Platform platform = Platform.current();
+ String resource = platform.resourcePath();
+ try (InputStream check =
NativeLibraryLoader.class.getResourceAsStream(resource)) {
+ if (check == null) {
+ throw new UnsatisfiedLinkError(
+ "No bundled datafusion_jni library for " + platform
+ + " (expected classpath:" + resource + ")."
+ + " Build the native crate and add it to java.library.path,"
+ + " or depend on a JAR built for this platform.");
+ }
+ } catch (IOException e) {
+ throw linkError("Failed to probe " + resource, e);
+ }
+
+ try {
+ Path extracted = extractToTempDir(resource, platform.libFileName());
+ System.load(extracted.toAbsolutePath().toString());
+ } catch (IOException e) {
+ throw linkError("Failed to extract " + resource, e);
+ }
+ }
+
+ private static Path extractToTempDir(String resource, String fileName)
throws IOException {
+ Path tmpRoot = Files.createDirectories(
+ Paths.get(System.getProperty("java.io.tmpdir"), TMP_DIR_NAME));
+ Path staging = Files.createTempFile(tmpRoot, fileName + ".", ".part");
+
+ String hash;
+ try (InputStream raw =
NativeLibraryLoader.class.getResourceAsStream(resource);
+ DigestInputStream in = new DigestInputStream(raw, sha256());
+ OutputStream out = Files.newOutputStream(staging)) {
+ in.transferTo(out);
+ hash = toHex(in.getMessageDigest().digest());
+ } catch (IOException e) {
+ Files.deleteIfExists(staging);
+ throw e;
+ }
+
+ Path versionedDir = Files.createDirectories(tmpRoot.resolve(hash));
+ Path target = versionedDir.resolve(fileName);
+
+ if (Files.exists(target) && Files.size(target) == Files.size(staging)) {
+ Files.deleteIfExists(staging);
+ return target;
+ }
+
+ try {
+ Files.move(staging, target, StandardCopyOption.ATOMIC_MOVE);
+ } catch (FileAlreadyExistsException e) {
+ // Another JVM extracted the same content while we were writing.
+ // Their copy is identical (same SHA-256), so discard ours.
+ Files.deleteIfExists(staging);
+ } catch (IOException e) {
+ // Atomic move not supported on this filesystem. Fall back to a
+ // replacement move; the hash directory guarantees content equality.
+ try {
+ Files.move(staging, target, StandardCopyOption.REPLACE_EXISTING);
+ } catch (IOException retry) {
+ Files.deleteIfExists(staging);
+ throw retry;
+ }
}
+ return target;
Review Comment:
I am _far_ from a security expert, but I think this allows for an attack
where the JVM loads a malicious library rather than the intended one. If the
attacker knows the hash and the size of the native library, they can create the
temp dir based on the hash and the malicious lib with the right size, and this
code will load it. I believe checking the hash of the actual library too would
prevent that.
Maybe it's not an attack surface we care about, but since this library might
be used pretty generally at some point, I figured I'd mentioned with approval.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]