[ 
https://issues.apache.org/jira/browse/FLINK-30232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Martijn Visser reassigned FLINK-30232:
--------------------------------------

    Assignee: Martijn Visser

> shading of netty epoll shared library does not account for ARM64 platform
> -------------------------------------------------------------------------
>
>                 Key: FLINK-30232
>                 URL: https://issues.apache.org/jira/browse/FLINK-30232
>             Project: Flink
>          Issue Type: Bug
>          Components: BuildSystem / Shaded
>    Affects Versions: 1.15.2
>         Environment: Kubernetes 1.23 provided by AWS managed Kubernetes 
> service (EKS) with Graviton 2 based EC2 instances (ARM64) using Flink 1.15.2, 
> native epoll enabled (taskmanager.network.netty.transport: epoll)
>            Reporter: Chris Thomson
>            Assignee: Martijn Visser
>            Priority: Major
>
> While evaluating migration of Flink application to Graviton 2 based EC2 
> instances in a AWS managed Kubernetes service (EKS) using Kubernetes 1.23, 
> found that the shaded Netty library renames the AMD64 version of the shared 
> library as part of relocation of the Netty library but does not rename the 
> matching ARM64 shared library. This results in the following error when 
> `taskmanager.network.netty.transport: epoll` is used:
>  
>  
> {{Suppressed: java.lang.UnsatisfiedLinkError: no 
> org_apache_flink_shaded_netty4_netty_transport_native_epoll in 
> java.library.path}}
> {{at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1860) ~[?:1.8.0_352]}}
> {{at java.lang.Runtime.loadLibrary0(Runtime.java:843) ~[?:1.8.0_352]}}
> {{at java.lang.System.loadLibrary(System.java:1136) ~[?:1.8.0_352]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryUtil.loadLibrary(NativeLibraryUtil.java:38)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
> ~[?:1.8.0_352]}}
> {{at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
> ~[?:1.8.0_352]}}
> {{at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  ~[?:1.8.0_352]}}
> {{at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_352]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader$1.run(NativeLibraryLoader.java:335)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at java.security.AccessController.doPrivileged(Native Method) 
> ~[?:1.8.0_352]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.loadLibraryByHelper(NativeLibraryLoader.java:327)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.loadLibrary(NativeLibraryLoader.java:293)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.load(NativeLibraryLoader.java:136)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.loadNativeLibrary(Native.java:309)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.<clinit>(Native.java:85)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Epoll.<clinit>(Epoll.java:40)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.<clinit>(EpollEventLoop.java:51)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.newChild(EpollEventLoopGroup.java:185)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.newChild(EpollEventLoopGroup.java:36)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.MultithreadEventExecutorGroup.<init>(MultithreadEventExecutorGroup.java:84)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.MultithreadEventExecutorGroup.<init>(MultithreadEventExecutorGroup.java:60)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.MultithreadEventExecutorGroup.<init>(MultithreadEventExecutorGroup.java:49)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.MultithreadEventLoopGroup.<init>(MultithreadEventLoopGroup.java:59)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.<init>(EpollEventLoopGroup.java:113)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.<init>(EpollEventLoopGroup.java:100)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoopGroup.<init>(EpollEventLoopGroup.java:77)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.io.network.netty.NettyClient.initEpollBootstrap(NettyClient.java:164)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.io.network.netty.NettyClient.init(NettyClient.java:79)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.io.network.netty.NettyConnectionManager.start(NettyConnectionManager.java:87)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.io.network.NettyShuffleEnvironment.start(NettyShuffleEnvironment.java:329)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerServices.fromConfiguration(TaskManagerServices.java:293)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.startTaskManager(TaskManagerRunner.java:623)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.createTaskExecutorService(TaskManagerRunner.java:559)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.startTaskManagerRunnerServices(TaskManagerRunner.java:245)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.start(TaskManagerRunner.java:288)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.runTaskManager(TaskManagerRunner.java:481)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.lambda$runTaskManagerProcessSecurely$5(TaskManagerRunner.java:525)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.security.contexts.NoOpSecurityContext.runSecured(NoOpSecurityContext.java:28)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.runTaskManagerProcessSecurely(TaskManagerRunner.java:525)
>  [flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.runtime.taskexecutor.TaskManagerRunner.runTaskManagerProcessSecurely(TaskManagerRunner.java:505)
>  [flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.kubernetes.taskmanager.KubernetesTaskExecutorRunner.main(KubernetesTaskExecutorRunner.java:39)
>  [flink-dist-1.15.2.jar:1.15.2]}}
> {{Caused by: java.io.FileNotFoundException: 
> META-INF/native/liborg_apache_flink_shaded_netty4_netty_transport_native_epoll_aarch_64.so}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.util.internal.NativeLibraryLoader.load(NativeLibraryLoader.java:170)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.loadNativeLibrary(Native.java:306)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Native.<clinit>(Native.java:85)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{at 
> org.apache.flink.shaded.netty4.io.netty.channel.epoll.Epoll.<clinit>(Epoll.java:40)
>  ~[flink-dist-1.15.2.jar:1.15.2]}}
> {{... 25 more}}
>  
> [https://github.com/apache/flink-shaded/blob/3082afc952e68366e9fefe4d1181c4666969ee67/flink-shaded-netty-4/pom.xml#L97]
>  appears to be where the problem is, it only renames the x86_64 shared 
> library, it doesn’t account for aarch_64 shared library.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to