pitrou commented on code in PR #40520:
URL: https://github.com/apache/arrow/pull/40520#discussion_r1624489668
##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
names.append(name)
return names
+ def _remove_weak_symbols(self, symbol_info):
+ return [line for line in symbol_info if not line.endswith(
+ (" v", " V", " w", " W"))]
+
+ def _remove_symbol_versions(self, symbol_info):
+ return [line.split('@')[0].strip() for line in symbol_info]
+
+ def list_symbols_for_dependency(self, dependency,
remove_symbol_versions=False):
+ if dependency == 'linux-vdso.so.1':
+ # this is a virtual library, thus symbols cannot be listed
+ return []
+ result = _nm.run('-D', '-P', dependency, stdout=subprocess.PIPE)
+ lines = result.stdout.decode('utf-8').splitlines()
+ if remove_symbol_versions:
+ lines = self._remove_symbol_versions(lines)
+ return self._remove_weak_symbols(lines)
+
+ def list_undefined_symbols_for_dependency(self, dependency,
+ remove_symbol_versions=False):
+ result = _nm.run('-u', '-P', dependency, stdout=subprocess.PIPE)
+ lines = result.stdout.decode('utf-8').splitlines()
+ if remove_symbol_versions:
+ lines = self._remove_symbol_versions(lines)
+ return self._remove_weak_symbols(lines)
+
+ def extract_library_paths(self, file_path):
+ system = platform.system()
+ paths = {}
+ if system == 'Linux':
+ result = _ldd.run(file_path, stdout=subprocess.PIPE)
+ lines = result.stdout.decode('utf-8').splitlines()
+ for line in lines:
+ # Input:
+ # librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1
(0x00007f8c9dd90000)
+ # Match:
+ # group(1): librt.so.1
+ # group(2): /lib/x86_64-linux-gnu/librt.so.1
+ match = re.search(r'(\S*) => (\S*)', line)
+ if match:
+ paths[match.group(1)] = match.group(2)
+ else:
+ match = re.search(r'(\S*) \(0x[0-9a-fA-F]*\)', line)
+ # Input:
+ # /lib64/ld-linux-x86-64.so.2 (0x00007c1af3a26000)
+ # Match:
+ # group(1): /lib64/ld-linux-x86-64.so.2
+ if match:
+ paths[match.group(1)] = match.group(1)
+ else:
+ raise NotImplementedError(f"{system} is not supported")
+ return paths
+
+
+def _check_undefined_symbols(dylib):
+ # Check for undefined symbols
+ undefined_symbols = dylib.list_undefined_symbols_for_dependency(
+ dylib.path, remove_symbol_versions=True)
+ expected_lib_paths = dylib.extract_library_paths(dylib.path)
+ all_paths = list(expected_lib_paths.values())
+
+ for lib_path in all_paths:
+ if lib_path:
+ expected_symbols = dylib.list_symbols_for_dependency(
+ lib_path, remove_symbol_versions=True)
Review Comment:
You can probably make this a set for faster lookup in case the list of
exported library symbols is large:
```suggestion
expected_symbols = set(dylib.list_symbols_for_dependency(
lib_path, remove_symbol_versions=True))
```
##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
names.append(name)
return names
+ def _remove_weak_symbols(self, symbol_info):
+ return [line for line in symbol_info if not line.endswith(
+ (" v", " V", " w", " W"))]
+
+ def _remove_symbol_versions(self, symbol_info):
+ return [line.split('@')[0].strip() for line in symbol_info]
+
+ def list_symbols_for_dependency(self, dependency,
remove_symbol_versions=False):
Review Comment:
Rather than take `dependency` as an argument, why not just access
`self.path`?
##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
names.append(name)
return names
+ def _remove_weak_symbols(self, symbol_info):
+ return [line for line in symbol_info if not line.endswith(
+ (" v", " V", " w", " W"))]
+
+ def _remove_symbol_versions(self, symbol_info):
+ return [line.split('@')[0].strip() for line in symbol_info]
+
+ def list_symbols_for_dependency(self, dependency,
remove_symbol_versions=False):
+ if dependency == 'linux-vdso.so.1':
+ # this is a virtual library, thus symbols cannot be listed
+ return []
+ result = _nm.run('-D', '-P', dependency, stdout=subprocess.PIPE)
+ lines = result.stdout.decode('utf-8').splitlines()
+ if remove_symbol_versions:
+ lines = self._remove_symbol_versions(lines)
+ return self._remove_weak_symbols(lines)
+
+ def list_undefined_symbols_for_dependency(self, dependency,
+ remove_symbol_versions=False):
+ result = _nm.run('-u', '-P', dependency, stdout=subprocess.PIPE)
+ lines = result.stdout.decode('utf-8').splitlines()
+ if remove_symbol_versions:
+ lines = self._remove_symbol_versions(lines)
+ return self._remove_weak_symbols(lines)
+
+ def extract_library_paths(self, file_path):
+ system = platform.system()
+ paths = {}
+ if system == 'Linux':
+ result = _ldd.run(file_path, stdout=subprocess.PIPE)
+ lines = result.stdout.decode('utf-8').splitlines()
+ for line in lines:
+ # Input:
+ # librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1
(0x00007f8c9dd90000)
+ # Match:
+ # group(1): librt.so.1
+ # group(2): /lib/x86_64-linux-gnu/librt.so.1
+ match = re.search(r'(\S*) => (\S*)', line)
+ if match:
+ paths[match.group(1)] = match.group(2)
+ else:
+ match = re.search(r'(\S*) \(0x[0-9a-fA-F]*\)', line)
+ # Input:
+ # /lib64/ld-linux-x86-64.so.2 (0x00007c1af3a26000)
+ # Match:
+ # group(1): /lib64/ld-linux-x86-64.so.2
+ if match:
+ paths[match.group(1)] = match.group(1)
+ else:
+ raise NotImplementedError(f"{system} is not supported")
+ return paths
+
+
+def _check_undefined_symbols(dylib):
Review Comment:
I'm curious: why is this a function and not a `DynamicLibrary` method?
##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
names.append(name)
return names
+ def _remove_weak_symbols(self, symbol_info):
+ return [line for line in symbol_info if not line.endswith(
+ (" v", " V", " w", " W"))]
+
+ def _remove_symbol_versions(self, symbol_info):
+ return [line.split('@')[0].strip() for line in symbol_info]
Review Comment:
These could be `classmethod`s as they are not accessing any instance
variables.
##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +63,84 @@ def list_dependency_names(self):
names.append(name)
return names
+ def _remove_weak_symbols(self, symbol_info):
+ return [line for line in symbol_info if not line.endswith(
+ (" v", " V", " w", " W"))]
+
+ def _remove_symbol_versions(self, symbol_info):
+ return [line.split('@')[0].strip() for line in symbol_info]
+
+ def list_symbols_for_dependency(self, dependency,
remove_symbol_versions=False):
Review Comment:
Same question for the other methods below?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]