[
https://issues.apache.org/jira/browse/FLINK-30232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Martijn Visser reassigned FLINK-30232:
--------------------------------------
Assignee: Martijn Visser
> shading of netty epoll shared library does not account for ARM64 platform
> -------------------------------------------------------------------------
>
> Key: FLINK-30232
> URL: https://issues.apache.org/jira/browse/FLINK-30232
> Project: Flink
> Issue Type: Bug
> Components: BuildSystem / Shaded
> Affects Versions: 1.15.2
> Environment: Kubernetes 1.23 provided by AWS managed Kubernetes
> service (EKS) with Graviton 2 based EC2 instances (ARM64) using Flink 1.15.2,
> native epoll enabled (taskmanager.network.netty.transport: epoll)
> Reporter: Chris Thomson
> Assignee: Martijn Visser
> Priority: Major
>
> While evaluating migration of Flink application to Graviton 2 based EC2
> instances in a AWS managed Kubernetes service (EKS) using Kubernetes 1.23,
> found that the shaded Netty library renames the AMD64 version of the shared
> library as part of relocation of the Netty library but does not rename the
> matching ARM64 shared library. This results in the following error when
> `taskmanager.network.netty.transport: epoll` is used:
>
>
> {{Suppressed: java.lang.UnsatisfiedLinkError: no
> org_apache_flink_shaded_netty4_netty_transport_native_epoll in
> java.library.path}}
> {{at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1860) ~[?:1.8.0_352]}}
> {{at java.lang.Runtime.loadLibrary0(Runtime.java:843) ~[?:1.8.0_352]}}
> {{at java.lang.System.loadLibrary(System.java:1136) ~[?:1.8.0_352]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryUtil.loadLibrary(NativeLibraryUtil.java:38)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> ~[?:1.8.0_352]}}
> {{at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> ~[?:1.8.0_352]}}
> {{at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> ~[?:1.8.0_352]}}
> {{at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_352]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader$1.run(NativeLibraryLoader.java:335)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at java.security.AccessController.doPrivileged(Native Method)
> ~[?:1.8.0_352]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.loadLibraryByHelper(NativeLibraryLoader.java:327)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.loadLibrary(NativeLibraryLoader.java:293)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.load(NativeLibraryLoader.java:136)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.loadNativeLibrary(Native.java:309)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.<clinit>(Native.java:85)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Epoll.<clinit>(Epoll.java:40)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.<clinit>(EpollEventLoop.java:51)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.newChild(EpollEventLoopGroup.java:185)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.newChild(EpollEventLoopGroup.java:36)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.MultithreadEventExecutorGroup.<init>(MultithreadEventExecutorGroup.java:84)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.MultithreadEventExecutorGroup.<init>(MultithreadEventExecutorGroup.java:60)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.MultithreadEventExecutorGroup.<init>(MultithreadEventExecutorGroup.java:49)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.MultithreadEventLoopGroup.<init>(MultithreadEventLoopGroup.java:59)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.<init>(EpollEventLoopGroup.java:113)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.<init>(EpollEventLoopGroup.java:100)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.<init>(EpollEventLoopGroup.java:77)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.io.network.netty.NettyClient.initEpollBootstrap(NettyClient.java:164)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.io.network.netty.NettyClient.init(NettyClient.java:79)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.start(NettyConnectionManager.java:87)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.io.network.NettyShuffleEnvironment.start(NettyShuffleEnvironment.java:329)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerServices.fromConfiguration(TaskManagerServices.java:293)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.startTaskManager(TaskManagerRunner.java:623)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.createTaskExecutorService(TaskManagerRunner.java:559)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.startTaskManagerRunnerServices(TaskManagerRunner.java:245)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.start(TaskManagerRunner.java:288)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.runTaskManager(TaskManagerRunner.java:481)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.lambda$runTaskManagerProcessSecurely$5(TaskManagerRunner.java:525)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.runTaskManagerProcessSecurely(TaskManagerRunner.java:525)
> [flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.runTaskManagerProcessSecurely(TaskManagerRunner.java:505)
> [flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.kubernetes.taskmanager.KubernetesTaskExecutorRunner.main(KubernetesTaskExecutorRunner.java:39)
> [flink-dist-1.15.2.jar:1.15.2]}}
> {{Caused by: java.io.FileNotFoundException:
> META-INF/native/liborg_apache_flink_shaded_netty4_netty_transport_native_epoll_aarch_64.so}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.load(NativeLibraryLoader.java:170)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.loadNativeLibrary(Native.java:306)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.<clinit>(Native.java:85)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Epoll.<clinit>(Epoll.java:40)
> ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{... 25 more}}
>
> [https://github.com/apache/flink-shaded/blob/3082afc952e68366e9fefe4d1181c4666969ee67/flink-shaded-netty-4/pom.xml#L97]
> appears to be where the problem is, it only renames the x86_64 shared
> library, it doesn’t account for aarch_64 shared library.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)