pitrou commented on code in PR #40520:
URL: https://github.com/apache/arrow/pull/40520#discussion_r1555899249


##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:
+            symbol_info = [re.split('@@', line)[0] for line in symbol_info]
+        return symbol_info
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        if dependency == 'linux-vdso.so.1':
+            return []
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def extract_library_paths(self, file_path):
+        system = platform.system()
+        paths = {}
+        if system == 'Linux':
+            result = _ldd.run(file_path, stdout=subprocess.PIPE)
+            lines = result.stdout.decode('utf-8').splitlines()
+            for line in lines:
+                # Input:
+                #    librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 
(0x00007f8c9dd90000)
+                # Match:
+                #   group(1): librt.so.1
+                #   group(2): /lib/x86_64-linux-gnu/librt.so.1
+                match = re.search(r'(\S*) => (\S*)', line)
+                if match:
+                    paths[match.group(1)] = match.group(2)
+                else:
+                    match = re.search(r'(\S*) \(.*\)', line)
+                    # Input:
+                    #    /lib64/ld-linux-x86-64.so.2 (0x00007c1af3a26000)
+                    # Match:
+                    #  group(1): /lib64/ld-linux-x86-64.so.2
+                    if match:
+                        paths[match.group(1)] = match.group(1)
+        else:
+            raise ValueError(f"{system} is not supported")
+        return paths
+
+
+def _check_undefined_symbols(dylib, allowed):

Review Comment:
   What is "allowed"? It does not seem used here.



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:
+            symbol_info = [re.split('@@', line)[0] for line in symbol_info]
+        return symbol_info
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        if dependency == 'linux-vdso.so.1':
+            return []
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def extract_library_paths(self, file_path):
+        system = platform.system()
+        paths = {}
+        if system == 'Linux':
+            result = _ldd.run(file_path, stdout=subprocess.PIPE)
+            lines = result.stdout.decode('utf-8').splitlines()
+            for line in lines:
+                # Input:
+                #    librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 
(0x00007f8c9dd90000)
+                # Match:
+                #   group(1): librt.so.1
+                #   group(2): /lib/x86_64-linux-gnu/librt.so.1
+                match = re.search(r'(\S*) => (\S*)', line)
+                if match:
+                    paths[match.group(1)] = match.group(2)
+                else:
+                    match = re.search(r'(\S*) \(.*\)', line)
+                    # Input:
+                    #    /lib64/ld-linux-x86-64.so.2 (0x00007c1af3a26000)
+                    # Match:
+                    #  group(1): /lib64/ld-linux-x86-64.so.2
+                    if match:
+                        paths[match.group(1)] = match.group(1)
+        else:
+            raise ValueError(f"{system} is not supported")

Review Comment:
   `NotImplementedError`



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:
+            symbol_info = [re.split('@@', line)[0] for line in symbol_info]

Review Comment:
   1. You don't need a regular expression for this, do you?
   2. Why the double '@@'? Shouldn't it be '@'?



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:

Review Comment:
   With GNU `nm` you can probably use the `--without-symbol-versions` option.



##########
dev/archery/archery/linking.py:
##########
@@ -73,3 +150,18 @@ def check_dynamic_library_dependencies(path, allowed, 
disallowed):
             raise DependencyError(
                 f"Disallowed shared dependency found in {dylib.path}: `{dep}`"
             )
+
+    system = platform.system()
+
+    if system == 'Linux':
+        _check_undefined_symbols(dylib, allowed)
+    elif system == 'Darwin':
+        # TODO: Implement undefined symbol checking for macOS
+        # https://github.com/apache/arrow/issues/40965
+        pass
+    elif system == 'Windows':
+        # TODO: Implement undefined symbol checking for Windows
+        # https://github.com/apache/arrow/issues/40966
+        pass
+    else:
+        raise ValueError(f"{system} is not supported for checking undefined 
symbols.")

Review Comment:
   Please make this `NotImplementedError`.



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:
+            symbol_info = [re.split('@@', line)[0] for line in symbol_info]
+        return symbol_info
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        if dependency == 'linux-vdso.so.1':
+            return []
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)

Review Comment:
   If you use `nm -P`, you will get the POSIX portability format as described 
in https://pubs.opengroup.org/onlinepubs/9699919799/utilities/nm.html, which 
should also be easier to parse (you can just split the output on whitespace).



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:
+            symbol_info = [re.split('@@', line)[0] for line in symbol_info]
+        return symbol_info
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        if dependency == 'linux-vdso.so.1':
+            return []
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def extract_library_paths(self, file_path):
+        system = platform.system()
+        paths = {}
+        if system == 'Linux':
+            result = _ldd.run(file_path, stdout=subprocess.PIPE)
+            lines = result.stdout.decode('utf-8').splitlines()
+            for line in lines:
+                # Input:
+                #    librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 
(0x00007f8c9dd90000)
+                # Match:
+                #   group(1): librt.so.1
+                #   group(2): /lib/x86_64-linux-gnu/librt.so.1
+                match = re.search(r'(\S*) => (\S*)', line)
+                if match:
+                    paths[match.group(1)] = match.group(2)
+                else:
+                    match = re.search(r'(\S*) \(.*\)', line)
+                    # Input:
+                    #    /lib64/ld-linux-x86-64.so.2 (0x00007c1af3a26000)
+                    # Match:
+                    #  group(1): /lib64/ld-linux-x86-64.so.2
+                    if match:
+                        paths[match.group(1)] = match.group(1)
+        else:
+            raise ValueError(f"{system} is not supported")
+        return paths
+
+
+def _check_undefined_symbols(dylib, allowed):
+    # Check for undefined symbols
+    undefined_symbols = 
dylib.list_undefined_symbols_for_dependency(dylib.path, True)

Review Comment:
   Can you pass boolean options by name?
   ```suggestion
       undefined_symbols = 
dylib.list_undefined_symbols_for_dependency(dylib.path, xxx=True)
   ```



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:
+            symbol_info = [re.split('@@', line)[0] for line in symbol_info]
+        return symbol_info
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        if dependency == 'linux-vdso.so.1':
+            return []
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        lines = self._capture_symbols(remove_symbol_versions, lines)
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def extract_library_paths(self, file_path):
+        system = platform.system()
+        paths = {}
+        if system == 'Linux':
+            result = _ldd.run(file_path, stdout=subprocess.PIPE)
+            lines = result.stdout.decode('utf-8').splitlines()
+            for line in lines:
+                # Input:
+                #    librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 
(0x00007f8c9dd90000)
+                # Match:
+                #   group(1): librt.so.1
+                #   group(2): /lib/x86_64-linux-gnu/librt.so.1
+                match = re.search(r'(\S*) => (\S*)', line)
+                if match:
+                    paths[match.group(1)] = match.group(2)
+                else:
+                    match = re.search(r'(\S*) \(.*\)', line)
+                    # Input:
+                    #    /lib64/ld-linux-x86-64.so.2 (0x00007c1af3a26000)
+                    # Match:
+                    #  group(1): /lib64/ld-linux-x86-64.so.2
+                    if match:
+                        paths[match.group(1)] = match.group(1)
+        else:
+            raise ValueError(f"{system} is not supported")
+        return paths
+
+
+def _check_undefined_symbols(dylib, allowed):
+    # Check for undefined symbols
+    undefined_symbols = 
dylib.list_undefined_symbols_for_dependency(dylib.path, True)
+    expected_lib_paths = dylib.extract_library_paths(dylib.path)
+    all_paths = list(expected_lib_paths.values())
+
+    for lib_path in all_paths:
+        if lib_path:
+            expected_symbols = dylib.list_symbols_for_dependency(lib_path, 
True)

Review Comment:
   Same here.



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, symbol_info):
+        return [line for line in symbol_info if not re.search(r'\s[Ww]\s', 
line)]
+
+    def _capture_symbols(self, remove_symbol_versions, symbol_info):
+        if remove_symbol_versions:
+            symbol_info = [re.split('@@', line)[0] for line in symbol_info]
+        return symbol_info
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        if dependency == 'linux-vdso.so.1':
+            return []

Review Comment:
   Why? Can you add a comment?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to