[ 
https://issues.apache.org/jira/browse/HUDI-5865?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ethan Guo updated HUDI-5865:
----------------------------
    Description: 
On 0.13.0 and latest master, the table service client 
`BaseHoodieTableServiceClient` is instantiated without any timeline server 
instance, even if the regular write client has one.  This causes the table 
service client to start a new embedded timeline server and overwrite the write 
config passed in from the constructor so that the write config points to the 
newly started timeline server.

As the regular write client such as `SparkRDDWriteClient` directly passes in 
the same writeConfig instance, the regular write client's write config is also 
affected, causing the regular write client to use the newly started embedded 
timeline server always, instead of the timeline server instance passed in from 
the constructor or the one instantiated by the regular write client itself.

This means that the Deltastreamer's long-lived timeline server is never going 
to be used because of this issue.
{code:java}
BaseHoodieTableServiceClient:

protected BaseHoodieTableServiceClient(HoodieEngineContext context, 
HoodieWriteConfig clientConfig) {
  super(context, clientConfig, Option.empty());
}
->
BaseHoodieClient:

protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig 
clientConfig,
    Option<EmbeddedTimelineService> timelineServer) {
  ...
  startEmbeddedServerView();
}

private synchronized void startEmbeddedServerView() {
  if (config.isEmbeddedTimelineServerEnabled()) {
    if (!timelineServer.isPresent()) {
      // Run Embedded Timeline Server
      try {
        timelineServer = 
EmbeddedTimelineServerHelper.createEmbeddedTimelineService(context, config);
      } catch (IOException e) {
        LOG.warn("Unable to start timeline service. Proceeding as if embedded 
server is disabled", e);
        stopEmbeddedServerView(false);
      }
    } else {
      LOG.info("Timeline Server already running. Not restarting the service");
    }
  } else {
    LOG.info("Embedded Timeline Server is disabled. Not starting timeline 
service");
  }
}

public static synchronized Option<EmbeddedTimelineService> 
createEmbeddedTimelineService(
    HoodieEngineContext context, HoodieWriteConfig config) throws IOException {
  if (config.isEmbeddedTimelineServerReuseEnabled()) {
    if (!TIMELINE_SERVER.isPresent() || 
!TIMELINE_SERVER.get().canReuseFor(config.getBasePath())) {
      TIMELINE_SERVER = Option.of(startTimelineService(context, config));
    } else {
      updateWriteConfigWithTimelineServer(TIMELINE_SERVER.get(), config);
    }
    return TIMELINE_SERVER;
  }
  if (config.isEmbeddedTimelineServerEnabled()) {
    return Option.of(startTimelineService(context, config));
  } else {
    return Option.empty();
  }
}

public static void updateWriteConfigWithTimelineServer(EmbeddedTimelineService 
timelineServer,
    HoodieWriteConfig config) {
  // Allow executor to find this newly instantiated timeline service
  if (config.isEmbeddedTimelineServerEnabled()) {
    config.setViewStorageConfig(timelineServer.getRemoteFileSystemViewConfig());
  }
}{code}
SparkRDDWriteClient:
{code:java}
public SparkRDDWriteClient(HoodieEngineContext context, HoodieWriteConfig 
writeConfig,
                           Option<EmbeddedTimelineService> timelineService) {
  super(context, writeConfig, timelineService, 
SparkUpgradeDowngradeHelper.getInstance());
  this.tableServiceClient = new SparkRDDTableServiceClient<>(context, 
writeConfig);
} {code}
 

> Table service client overrides the timeline service and write config of 
> regular write client
> --------------------------------------------------------------------------------------------
>
>                 Key: HUDI-5865
>                 URL: https://issues.apache.org/jira/browse/HUDI-5865
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: Ethan Guo
>            Assignee: Ethan Guo
>            Priority: Blocker
>             Fix For: 0.13.1
>
>
> On 0.13.0 and latest master, the table service client 
> `BaseHoodieTableServiceClient` is instantiated without any timeline server 
> instance, even if the regular write client has one.  This causes the table 
> service client to start a new embedded timeline server and overwrite the 
> write config passed in from the constructor so that the write config points 
> to the newly started timeline server.
> As the regular write client such as `SparkRDDWriteClient` directly passes in 
> the same writeConfig instance, the regular write client's write config is 
> also affected, causing the regular write client to use the newly started 
> embedded timeline server always, instead of the timeline server instance 
> passed in from the constructor or the one instantiated by the regular write 
> client itself.
> This means that the Deltastreamer's long-lived timeline server is never going 
> to be used because of this issue.
> {code:java}
> BaseHoodieTableServiceClient:
> protected BaseHoodieTableServiceClient(HoodieEngineContext context, 
> HoodieWriteConfig clientConfig) {
>   super(context, clientConfig, Option.empty());
> }
> ->
> BaseHoodieClient:
> protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig 
> clientConfig,
>     Option<EmbeddedTimelineService> timelineServer) {
>   ...
>   startEmbeddedServerView();
> }
> private synchronized void startEmbeddedServerView() {
>   if (config.isEmbeddedTimelineServerEnabled()) {
>     if (!timelineServer.isPresent()) {
>       // Run Embedded Timeline Server
>       try {
>         timelineServer = 
> EmbeddedTimelineServerHelper.createEmbeddedTimelineService(context, config);
>       } catch (IOException e) {
>         LOG.warn("Unable to start timeline service. Proceeding as if embedded 
> server is disabled", e);
>         stopEmbeddedServerView(false);
>       }
>     } else {
>       LOG.info("Timeline Server already running. Not restarting the service");
>     }
>   } else {
>     LOG.info("Embedded Timeline Server is disabled. Not starting timeline 
> service");
>   }
> }
> public static synchronized Option<EmbeddedTimelineService> 
> createEmbeddedTimelineService(
>     HoodieEngineContext context, HoodieWriteConfig config) throws IOException 
> {
>   if (config.isEmbeddedTimelineServerReuseEnabled()) {
>     if (!TIMELINE_SERVER.isPresent() || 
> !TIMELINE_SERVER.get().canReuseFor(config.getBasePath())) {
>       TIMELINE_SERVER = Option.of(startTimelineService(context, config));
>     } else {
>       updateWriteConfigWithTimelineServer(TIMELINE_SERVER.get(), config);
>     }
>     return TIMELINE_SERVER;
>   }
>   if (config.isEmbeddedTimelineServerEnabled()) {
>     return Option.of(startTimelineService(context, config));
>   } else {
>     return Option.empty();
>   }
> }
> public static void 
> updateWriteConfigWithTimelineServer(EmbeddedTimelineService timelineServer,
>     HoodieWriteConfig config) {
>   // Allow executor to find this newly instantiated timeline service
>   if (config.isEmbeddedTimelineServerEnabled()) {
>     
> config.setViewStorageConfig(timelineServer.getRemoteFileSystemViewConfig());
>   }
> }{code}
> SparkRDDWriteClient:
> {code:java}
> public SparkRDDWriteClient(HoodieEngineContext context, HoodieWriteConfig 
> writeConfig,
>                            Option<EmbeddedTimelineService> timelineService) {
>   super(context, writeConfig, timelineService, 
> SparkUpgradeDowngradeHelper.getInstance());
>   this.tableServiceClient = new SparkRDDTableServiceClient<>(context, 
> writeConfig);
> } {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to