[
https://issues.apache.org/jira/browse/HUDI-5865?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Ethan Guo updated HUDI-5865:
----------------------------
Description:
On 0.13.0 and latest master, the table service client
`BaseHoodieTableServiceClient` is instantiated without any timeline server
instance, even if the regular write client has one. This causes the table
service client to start a new embedded timeline server and overwrite the write
config passed in from the constructor so that the write config points to the
newly started timeline server.
As the regular write client such as `SparkRDDWriteClient` directly passes in
the same writeConfig instance, the regular write client's write config is also
affected, causing the regular write client to use the newly started embedded
timeline server always, instead of the timeline server instance passed in from
the constructor or the one instantiated by the regular write client itself.
This means that the Deltastreamer's long-lived timeline server is never going
to be used because of this issue.
{code:java}
BaseHoodieTableServiceClient:
protected BaseHoodieTableServiceClient(HoodieEngineContext context,
HoodieWriteConfig clientConfig) {
super(context, clientConfig, Option.empty());
}
->
BaseHoodieClient:
protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig
clientConfig,
Option<EmbeddedTimelineService> timelineServer) {
...
startEmbeddedServerView();
}
private synchronized void startEmbeddedServerView() {
if (config.isEmbeddedTimelineServerEnabled()) {
if (!timelineServer.isPresent()) {
// Run Embedded Timeline Server
try {
timelineServer =
EmbeddedTimelineServerHelper.createEmbeddedTimelineService(context, config);
} catch (IOException e) {
LOG.warn("Unable to start timeline service. Proceeding as if embedded
server is disabled", e);
stopEmbeddedServerView(false);
}
} else {
LOG.info("Timeline Server already running. Not restarting the service");
}
} else {
LOG.info("Embedded Timeline Server is disabled. Not starting timeline
service");
}
}
public static synchronized Option<EmbeddedTimelineService>
createEmbeddedTimelineService(
HoodieEngineContext context, HoodieWriteConfig config) throws IOException {
if (config.isEmbeddedTimelineServerReuseEnabled()) {
if (!TIMELINE_SERVER.isPresent() ||
!TIMELINE_SERVER.get().canReuseFor(config.getBasePath())) {
TIMELINE_SERVER = Option.of(startTimelineService(context, config));
} else {
updateWriteConfigWithTimelineServer(TIMELINE_SERVER.get(), config);
}
return TIMELINE_SERVER;
}
if (config.isEmbeddedTimelineServerEnabled()) {
return Option.of(startTimelineService(context, config));
} else {
return Option.empty();
}
}
public static void updateWriteConfigWithTimelineServer(EmbeddedTimelineService
timelineServer,
HoodieWriteConfig config) {
// Allow executor to find this newly instantiated timeline service
if (config.isEmbeddedTimelineServerEnabled()) {
config.setViewStorageConfig(timelineServer.getRemoteFileSystemViewConfig());
}
}{code}
SparkRDDWriteClient:
{code:java}
public SparkRDDWriteClient(HoodieEngineContext context, HoodieWriteConfig
writeConfig,
Option<EmbeddedTimelineService> timelineService) {
super(context, writeConfig, timelineService,
SparkUpgradeDowngradeHelper.getInstance());
this.tableServiceClient = new SparkRDDTableServiceClient<>(context,
writeConfig);
} {code}
> Table service client overrides the timeline service and write config of
> regular write client
> --------------------------------------------------------------------------------------------
>
> Key: HUDI-5865
> URL: https://issues.apache.org/jira/browse/HUDI-5865
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Ethan Guo
> Assignee: Ethan Guo
> Priority: Blocker
> Fix For: 0.13.1
>
>
> On 0.13.0 and latest master, the table service client
> `BaseHoodieTableServiceClient` is instantiated without any timeline server
> instance, even if the regular write client has one. This causes the table
> service client to start a new embedded timeline server and overwrite the
> write config passed in from the constructor so that the write config points
> to the newly started timeline server.
> As the regular write client such as `SparkRDDWriteClient` directly passes in
> the same writeConfig instance, the regular write client's write config is
> also affected, causing the regular write client to use the newly started
> embedded timeline server always, instead of the timeline server instance
> passed in from the constructor or the one instantiated by the regular write
> client itself.
> This means that the Deltastreamer's long-lived timeline server is never going
> to be used because of this issue.
> {code:java}
> BaseHoodieTableServiceClient:
> protected BaseHoodieTableServiceClient(HoodieEngineContext context,
> HoodieWriteConfig clientConfig) {
> super(context, clientConfig, Option.empty());
> }
> ->
> BaseHoodieClient:
> protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig
> clientConfig,
> Option<EmbeddedTimelineService> timelineServer) {
> ...
> startEmbeddedServerView();
> }
> private synchronized void startEmbeddedServerView() {
> if (config.isEmbeddedTimelineServerEnabled()) {
> if (!timelineServer.isPresent()) {
> // Run Embedded Timeline Server
> try {
> timelineServer =
> EmbeddedTimelineServerHelper.createEmbeddedTimelineService(context, config);
> } catch (IOException e) {
> LOG.warn("Unable to start timeline service. Proceeding as if embedded
> server is disabled", e);
> stopEmbeddedServerView(false);
> }
> } else {
> LOG.info("Timeline Server already running. Not restarting the service");
> }
> } else {
> LOG.info("Embedded Timeline Server is disabled. Not starting timeline
> service");
> }
> }
> public static synchronized Option<EmbeddedTimelineService>
> createEmbeddedTimelineService(
> HoodieEngineContext context, HoodieWriteConfig config) throws IOException
> {
> if (config.isEmbeddedTimelineServerReuseEnabled()) {
> if (!TIMELINE_SERVER.isPresent() ||
> !TIMELINE_SERVER.get().canReuseFor(config.getBasePath())) {
> TIMELINE_SERVER = Option.of(startTimelineService(context, config));
> } else {
> updateWriteConfigWithTimelineServer(TIMELINE_SERVER.get(), config);
> }
> return TIMELINE_SERVER;
> }
> if (config.isEmbeddedTimelineServerEnabled()) {
> return Option.of(startTimelineService(context, config));
> } else {
> return Option.empty();
> }
> }
> public static void
> updateWriteConfigWithTimelineServer(EmbeddedTimelineService timelineServer,
> HoodieWriteConfig config) {
> // Allow executor to find this newly instantiated timeline service
> if (config.isEmbeddedTimelineServerEnabled()) {
>
> config.setViewStorageConfig(timelineServer.getRemoteFileSystemViewConfig());
> }
> }{code}
> SparkRDDWriteClient:
> {code:java}
> public SparkRDDWriteClient(HoodieEngineContext context, HoodieWriteConfig
> writeConfig,
> Option<EmbeddedTimelineService> timelineService) {
> super(context, writeConfig, timelineService,
> SparkUpgradeDowngradeHelper.getInstance());
> this.tableServiceClient = new SparkRDDTableServiceClient<>(context,
> writeConfig);
> } {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)