Copilot commented on code in PR #17291: URL: https://github.com/apache/pinot/pull/17291#discussion_r2659005435
########## pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/SystemTableBrokerRequestHandler.java: ########## @@ -0,0 +1,550 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.broker.requesthandler; + +import com.fasterxml.jackson.databind.JsonNode; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.OptionalLong; +import java.util.Set; +import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; +import javax.ws.rs.core.HttpHeaders; +import javax.ws.rs.core.MediaType; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hc.client5.http.classic.methods.HttpPost; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; +import org.apache.hc.client5.http.impl.classic.HttpClients; +import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager; +import org.apache.hc.client5.http.io.HttpClientConnectionManager; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.http.io.entity.StringEntity; +import org.apache.hc.core5.util.Timeout; +import org.apache.helix.HelixDataAccessor; +import org.apache.helix.HelixManager; +import org.apache.helix.model.InstanceConfig; +import org.apache.pinot.broker.api.AccessControl; +import org.apache.pinot.broker.broker.AccessControlFactory; +import org.apache.pinot.broker.querylog.QueryLogger; +import org.apache.pinot.broker.queryquota.QueryQuotaManager; +import org.apache.pinot.common.config.provider.TableCache; +import org.apache.pinot.common.datatable.DataTable; +import org.apache.pinot.common.datatable.DataTableFactory; +import org.apache.pinot.common.datatable.DataTableImplV4; +import org.apache.pinot.common.http.PoolingHttpClientConnectionManagerHelper; +import org.apache.pinot.common.metrics.BrokerMeter; +import org.apache.pinot.common.request.BrokerRequest; +import org.apache.pinot.common.request.PinotQuery; +import org.apache.pinot.common.request.QuerySource; +import org.apache.pinot.common.response.BrokerResponse; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.systemtable.SystemTableProvider; +import org.apache.pinot.common.systemtable.SystemTableRegistry; +import org.apache.pinot.common.utils.NamedThreadFactory; +import org.apache.pinot.common.utils.config.InstanceUtils; +import org.apache.pinot.common.utils.config.QueryOptionsUtils; +import org.apache.pinot.common.utils.request.RequestUtils; +import org.apache.pinot.core.operator.blocks.InstanceResponseBlock; +import org.apache.pinot.core.plan.Plan; +import org.apache.pinot.core.plan.maker.InstancePlanMakerImplV2; +import org.apache.pinot.core.plan.maker.PlanMaker; +import org.apache.pinot.core.query.reduce.BrokerReduceService; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils; +import org.apache.pinot.core.routing.MultiClusterRoutingContext; +import org.apache.pinot.core.routing.RoutingManager; +import org.apache.pinot.core.transport.ServerRoutingInstance; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.SegmentContext; +import org.apache.pinot.spi.accounting.ThreadAccountant; +import org.apache.pinot.spi.auth.AuthorizationResult; +import org.apache.pinot.spi.auth.broker.RequesterIdentity; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.exception.BadQueryRequestException; +import org.apache.pinot.spi.exception.QueryErrorCode; +import org.apache.pinot.spi.query.QueryExecutionContext; +import org.apache.pinot.spi.query.QueryThreadContext; +import org.apache.pinot.spi.trace.RequestContext; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.InstanceTypeUtils; +import org.apache.pinot.spi.utils.builder.TableNameBuilder; +import org.apache.pinot.sql.parsers.CalciteSqlParser; +import org.apache.pinot.sql.parsers.SqlNodeAndOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Broker request handler for system tables (handled entirely on the broker). + */ +public class SystemTableBrokerRequestHandler extends BaseBrokerRequestHandler { + private static final Logger LOGGER = LoggerFactory.getLogger(SystemTableBrokerRequestHandler.class); + private static final String SYSTEM_TABLE_PSEUDO_HOST = "localhost"; + private static final int SYSTEM_TABLE_PSEUDO_PORT = 0; + private static final String SYSTEM_TABLE_DATATABLE_API_PATH = "/query/systemTable/datatable"; + private static final Set<String> HOP_BY_HOP_HEADERS_TO_SKIP = + Set.of("connection", "content-length", "transfer-encoding", "host"); + + private final BrokerReduceService _brokerReduceService; + private final PlanMaker _planMaker; + private final ExecutorService _executorService; + private final ExecutorService _scatterGatherExecutorService; + private final PoolingHttpClientConnectionManager _scatterGatherConnMgr; + private final CloseableHttpClient _scatterGatherHttpClient; + @Nullable + private final HelixManager _helixManager; + + public SystemTableBrokerRequestHandler(PinotConfiguration config, String brokerId, + BrokerRequestIdGenerator requestIdGenerator, RoutingManager routingManager, + AccessControlFactory accessControlFactory, QueryQuotaManager queryQuotaManager, TableCache tableCache, + ThreadAccountant threadAccountant, @Nullable MultiClusterRoutingContext multiClusterRoutingContext, + @Nullable HelixManager helixManager) { + super(config, brokerId, requestIdGenerator, routingManager, accessControlFactory, queryQuotaManager, tableCache, + threadAccountant, multiClusterRoutingContext); + _brokerReduceService = new BrokerReduceService(_config); + _planMaker = new InstancePlanMakerImplV2(); + _planMaker.init(_config); + _helixManager = helixManager; + int executorPoolSize = config.getProperty(CommonConstants.Broker.CONFIG_OF_SYSTEM_TABLE_EXECUTOR_POOL_SIZE, + CommonConstants.Broker.DEFAULT_SYSTEM_TABLE_EXECUTOR_POOL_SIZE); + executorPoolSize = Math.max(1, executorPoolSize); + _executorService = QueryThreadContext.contextAwareExecutorService(Executors.newFixedThreadPool(executorPoolSize, + new NamedThreadFactory("system-table-query-executor"))); + _scatterGatherExecutorService = + QueryThreadContext.contextAwareExecutorService(Executors.newFixedThreadPool(executorPoolSize, + new NamedThreadFactory("system-table-scatter-gather-executor"))); + _scatterGatherConnMgr = PoolingHttpClientConnectionManagerHelper.createWithSocketFactory(); + Timeout timeout = Timeout.of(_brokerTimeoutMs, TimeUnit.MILLISECONDS); + RequestConfig defaultRequestConfig = + RequestConfig.custom().setConnectionRequestTimeout(timeout).setResponseTimeout(timeout).build(); + _scatterGatherHttpClient = + HttpClients.custom().setConnectionManager(_scatterGatherConnMgr).setDefaultRequestConfig(defaultRequestConfig) + .build(); + } + + @Override + public void start() { + } + + @Override + public void shutDown() { + _executorService.shutdownNow(); + _scatterGatherExecutorService.shutdownNow(); + try { + _scatterGatherHttpClient.close(); + } catch (Exception e) { + LOGGER.debug("Failed to close system table scatter-gather http client: {}", e.toString()); + } + try { + _scatterGatherConnMgr.close(); + } catch (Exception e) { + LOGGER.debug("Failed to close system table scatter-gather connection manager: {}", e.toString()); + } + _brokerReduceService.shutDown(); + } + + public boolean canHandle(String tableName) { + return isSystemTable(tableName) && SystemTableRegistry.isRegistered(tableName); + } + + @Override + protected BrokerResponse handleRequest(long requestId, String query, SqlNodeAndOptions sqlNodeAndOptions, + JsonNode request, @Nullable RequesterIdentity requesterIdentity, RequestContext requestContext, + @Nullable HttpHeaders httpHeaders, AccessControl accessControl) + throws Exception { + long startTimeMs = requestContext.getRequestArrivalTimeMillis(); + long deadlineMs = startTimeMs + _brokerTimeoutMs; + QueryExecutionContext executionContext = + new QueryExecutionContext(QueryExecutionContext.QueryType.STE, requestId, Long.toString(requestId), + QueryOptionsUtils.getWorkloadName(sqlNodeAndOptions.getOptions()), startTimeMs, deadlineMs, deadlineMs, + _brokerId, _brokerId, org.apache.pinot.spi.utils.CommonConstants.Broker.DEFAULT_QUERY_HASH); + try (QueryThreadContext ignore = QueryThreadContext.open(executionContext, _threadAccountant)) { + PinotQuery pinotQuery; + try { + pinotQuery = CalciteSqlParser.compileToPinotQuery(sqlNodeAndOptions); + } catch (Exception e) { + requestContext.setErrorCode(QueryErrorCode.SQL_PARSING); + return new BrokerResponseNative(QueryErrorCode.SQL_PARSING, e.getMessage()); + } + + Set<String> tableNames = RequestUtils.getTableNames(pinotQuery); + if (tableNames == null || tableNames.isEmpty()) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + return new BrokerResponseNative(QueryErrorCode.QUERY_VALIDATION, "Failed to extract table name"); + } + if (tableNames.size() != 1) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + return new BrokerResponseNative(QueryErrorCode.QUERY_VALIDATION, "System tables do not support joins"); + } + String tableName = tableNames.iterator().next(); + if (!isSystemTable(tableName)) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + return new BrokerResponseNative(QueryErrorCode.QUERY_VALIDATION, "Not a system table query"); + } + AuthorizationResult authorizationResult = + hasTableAccess(requesterIdentity, Set.of(tableName), requestContext, httpHeaders); + if (!authorizationResult.hasAccess()) { + requestContext.setErrorCode(QueryErrorCode.ACCESS_DENIED); + return new BrokerResponseNative(QueryErrorCode.ACCESS_DENIED, authorizationResult.getFailureMessage()); + } + + return handleSystemTableQuery(request, pinotQuery, tableName, requestContext, requesterIdentity, query, + httpHeaders); + } + } + + @Override + protected boolean handleCancel(long queryId, int timeoutMs, Executor executor, + HttpClientConnectionManager connMgr, Map<String, Integer> serverResponses) { + return false; + } + + @Override + public boolean cancelQueryByClientId(String clientQueryId, int timeoutMs, Executor executor, + HttpClientConnectionManager connMgr, Map<String, Integer> serverResponses) + throws Exception { + return false; + } + + @Override + public Map<Long, String> getRunningQueries() { + return Collections.emptyMap(); + } + + @Override + public OptionalLong getRequestIdByClientId(String clientQueryId) { + return OptionalLong.empty(); + } + + private boolean isSystemTable(String tableName) { + return tableName != null && tableName.toLowerCase(Locale.ROOT).startsWith("system."); + } + + /** + * Executes a system table query against the local broker and returns the raw {@link DataTable} results. + * <p> + * This method is used by the internal broker-to-broker scatter-gather endpoint and must never perform fanout. + */ + public DataTable handleSystemTableDataTableRequest(JsonNode request, @Nullable RequesterIdentity requesterIdentity, + RequestContext requestContext, @Nullable HttpHeaders httpHeaders) { + long startTimeMs = requestContext.getRequestArrivalTimeMillis(); + if (startTimeMs <= 0) { + startTimeMs = System.currentTimeMillis(); + requestContext.setRequestArrivalTimeMillis(startTimeMs); + } + long requestId = _requestIdGenerator.get(); + long deadlineMs = startTimeMs + _brokerTimeoutMs; + + JsonNode sql = request.get(CommonConstants.Broker.Request.SQL); + if (sql == null || !sql.isTextual()) { + return exceptionDataTable(QueryErrorCode.JSON_PARSING, "Failed to find 'sql' in the request: " + request); + } + String query = sql.textValue(); + requestContext.setQuery(query); + + SqlNodeAndOptions sqlNodeAndOptions; + try { + sqlNodeAndOptions = RequestUtils.parseQuery(query, request); + } catch (Exception e) { + requestContext.setErrorCode(QueryErrorCode.SQL_PARSING); + return exceptionDataTable(QueryErrorCode.SQL_PARSING, e.getMessage()); + } + + QueryExecutionContext executionContext = + new QueryExecutionContext(QueryExecutionContext.QueryType.STE, requestId, Long.toString(requestId), + QueryOptionsUtils.getWorkloadName(sqlNodeAndOptions.getOptions()), startTimeMs, deadlineMs, deadlineMs, + _brokerId, _brokerId, org.apache.pinot.spi.utils.CommonConstants.Broker.DEFAULT_QUERY_HASH); + try (QueryThreadContext ignore = QueryThreadContext.open(executionContext, _threadAccountant)) { + AccessControl accessControl = _accessControlFactory.create(); + AuthorizationResult authorizationResult = accessControl.authorize(requesterIdentity); + if (!authorizationResult.hasAccess()) { + requestContext.setErrorCode(QueryErrorCode.ACCESS_DENIED); + return exceptionDataTable(QueryErrorCode.ACCESS_DENIED, authorizationResult.getFailureMessage()); + } + + PinotQuery pinotQuery; + try { + pinotQuery = CalciteSqlParser.compileToPinotQuery(sqlNodeAndOptions); + } catch (Exception e) { + requestContext.setErrorCode(QueryErrorCode.SQL_PARSING); + return exceptionDataTable(QueryErrorCode.SQL_PARSING, e.getMessage()); + } + + Set<String> tableNames = RequestUtils.getTableNames(pinotQuery); + if (tableNames == null || tableNames.isEmpty()) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + return exceptionDataTable(QueryErrorCode.QUERY_VALIDATION, "Failed to extract table name"); + } + if (tableNames.size() != 1) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + return exceptionDataTable(QueryErrorCode.QUERY_VALIDATION, "System tables do not support joins"); + } + String tableName = tableNames.iterator().next(); + if (!isSystemTable(tableName)) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + return exceptionDataTable(QueryErrorCode.QUERY_VALIDATION, "Not a system table query"); + } + + AuthorizationResult tableAuthorizationResult = + hasTableAccess(requesterIdentity, Set.of(tableName), requestContext, httpHeaders); + if (!tableAuthorizationResult.hasAccess()) { + requestContext.setErrorCode(QueryErrorCode.ACCESS_DENIED); + return exceptionDataTable(QueryErrorCode.ACCESS_DENIED, tableAuthorizationResult.getFailureMessage()); + } + + SystemTableProvider provider = SystemTableRegistry.get(tableName); + if (provider == null) { + requestContext.setErrorCode(QueryErrorCode.TABLE_DOES_NOT_EXIST); + return exceptionDataTable(QueryErrorCode.TABLE_DOES_NOT_EXIST, "System table does not exist: " + tableName); + } + + try { + return executeLocalSystemTableQuery(pinotQuery, provider); + } catch (BadQueryRequestException e) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + _brokerMetrics.addMeteredGlobalValue(BrokerMeter.QUERY_VALIDATION_EXCEPTIONS, 1); + return exceptionDataTable(QueryErrorCode.QUERY_VALIDATION, e.getMessage()); + } catch (Exception e) { + LOGGER.warn("Caught exception while handling system table datatable query {}: {}", tableName, e.getMessage(), + e); + requestContext.setErrorCode(QueryErrorCode.QUERY_EXECUTION); + return exceptionDataTable(QueryErrorCode.QUERY_EXECUTION, e.getMessage()); + } + } + } + + private BrokerResponse handleSystemTableQuery(JsonNode request, PinotQuery pinotQuery, String tableName, + RequestContext requestContext, @Nullable RequesterIdentity requesterIdentity, String query, + @Nullable HttpHeaders httpHeaders) { + if (pinotQuery.isExplain()) { + return BrokerResponseNative.BROKER_ONLY_EXPLAIN_PLAN_OUTPUT; + } + SystemTableProvider provider = SystemTableRegistry.get(tableName); + if (provider == null) { + requestContext.setErrorCode(QueryErrorCode.TABLE_DOES_NOT_EXIST); + return BrokerResponseNative.TABLE_DOES_NOT_EXIST; + } + try { + Map<ServerRoutingInstance, DataTable> dataTableMap; + if (provider.getExecutionMode() == SystemTableProvider.ExecutionMode.BROKER_SCATTER_GATHER) { + dataTableMap = scatterGatherSystemTableDataTables(provider, pinotQuery, tableName, request, httpHeaders); + } else { + dataTableMap = new HashMap<>(1); + // Use a synthetic routing instance for broker-local execution of system table queries. + dataTableMap.put(new ServerRoutingInstance(SYSTEM_TABLE_PSEUDO_HOST, SYSTEM_TABLE_PSEUDO_PORT, + TableType.OFFLINE), executeLocalSystemTableQuery(pinotQuery, provider)); + } + + BrokerResponseNative brokerResponse; + BrokerRequest brokerRequest = new BrokerRequest(); + QuerySource querySource = new QuerySource(); + querySource.setTableName(tableName); + brokerRequest.setQuerySource(querySource); + brokerRequest.setPinotQuery(pinotQuery); + brokerResponse = _brokerReduceService.reduceOnDataTable(brokerRequest, brokerRequest, dataTableMap, + _brokerTimeoutMs, _brokerMetrics); + brokerResponse.setTablesQueried(Set.of(TableNameBuilder.extractRawTableName(tableName))); + brokerResponse.setTimeUsedMs(System.currentTimeMillis() - requestContext.getRequestArrivalTimeMillis()); + _queryLogger.log(new QueryLogger.QueryLogParams(requestContext, tableName, brokerResponse, + QueryLogger.QueryLogParams.QueryEngine.SINGLE_STAGE, requesterIdentity, null)); + return brokerResponse; + } catch (BadQueryRequestException e) { + requestContext.setErrorCode(QueryErrorCode.QUERY_VALIDATION); + _brokerMetrics.addMeteredGlobalValue(BrokerMeter.QUERY_VALIDATION_EXCEPTIONS, 1); + return new BrokerResponseNative(QueryErrorCode.QUERY_VALIDATION, e.getMessage()); + } catch (Exception e) { + LOGGER.warn("Caught exception while handling system table query {}: {}", tableName, e.getMessage(), e); + requestContext.setErrorCode(QueryErrorCode.QUERY_EXECUTION); + return new BrokerResponseNative(QueryErrorCode.QUERY_EXECUTION, e.getMessage()); + } + } + + private DataTable executeLocalSystemTableQuery(PinotQuery pinotQuery, SystemTableProvider provider) + throws Exception { + IndexSegment dataSource = provider.getDataSource(); + try { + QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery); + queryContext.setSchema(provider.getSchema()); + queryContext.setEndTimeMs(System.currentTimeMillis() + _brokerTimeoutMs); + + // Pass null for serverMetrics because system table queries run broker-local against an in-memory IndexSegment. + Plan plan = _planMaker.makeInstancePlan(List.of(new SegmentContext(dataSource)), queryContext, _executorService, + null); + InstanceResponseBlock instanceResponse = plan.execute(); + return instanceResponse.toDataTable(); + } finally { + dataSource.destroy(); + } + } + + static final class BrokerTarget { + final ServerRoutingInstance _routingInstance; + final String _dataTableUrl; + + BrokerTarget(ServerRoutingInstance routingInstance, String dataTableUrl) { + _routingInstance = routingInstance; + _dataTableUrl = dataTableUrl; + } + } + + @com.google.common.annotations.VisibleForTesting + protected Map<ServerRoutingInstance, DataTable> scatterGatherSystemTableDataTables(SystemTableProvider provider, + PinotQuery pinotQuery, String tableName, JsonNode request, @Nullable HttpHeaders httpHeaders) { + if (_helixManager == null) { + throw new IllegalStateException( + "HelixManager is required for scatter-gather execution of system table: " + tableName); + } + + HelixDataAccessor dataAccessor = _helixManager.getHelixDataAccessor(); + List<String> liveInstances = dataAccessor.getChildNames(dataAccessor.keyBuilder().liveInstances()); + if (liveInstances == null || liveInstances.isEmpty()) { + throw new IllegalStateException("No live instances found for scatter-gather execution of system table: " + + tableName); + } + + String localInstanceId = _brokerId; + List<BrokerTarget> remoteTargets = new ArrayList<>(); + @Nullable ServerRoutingInstance localRoutingInstance = null; + for (String instanceId : liveInstances) { + if (!InstanceTypeUtils.isBroker(instanceId)) { + continue; + } + InstanceConfig instanceConfig = dataAccessor.getProperty(dataAccessor.keyBuilder().instanceConfig(instanceId)); + if (instanceConfig == null) { + continue; + } + URI baseUri = URI.create(InstanceUtils.getInstanceBaseUri(instanceConfig)); + ServerRoutingInstance routingInstance = new ServerRoutingInstance(baseUri.getHost(), baseUri.getPort(), + TableType.OFFLINE); + if (instanceId.equals(localInstanceId)) { + localRoutingInstance = routingInstance; + } else { + remoteTargets.add(new BrokerTarget(routingInstance, baseUri.toString() + SYSTEM_TABLE_DATATABLE_API_PATH)); + } + } + + Map<ServerRoutingInstance, DataTable> dataTableMap = new HashMap<>(remoteTargets.size() + 1); + ServerRoutingInstance routingInstance = localRoutingInstance != null ? localRoutingInstance + : new ServerRoutingInstance(SYSTEM_TABLE_PSEUDO_HOST, SYSTEM_TABLE_PSEUDO_PORT, TableType.OFFLINE); Review Comment: The ternary expression spans multiple lines without clear formatting. Consider breaking this into an explicit if-else statement or ensuring the continuation is more clearly indented for better readability. ```suggestion ServerRoutingInstance routingInstance; if (localRoutingInstance != null) { routingInstance = localRoutingInstance; } else { routingInstance = new ServerRoutingInstance(SYSTEM_TABLE_PSEUDO_HOST, SYSTEM_TABLE_PSEUDO_PORT, TableType.OFFLINE); } ``` ########## pinot-plugins/pinot-system-table/src/main/java/org/apache/pinot/systemtable/provider/TablesSystemTableProvider.java: ########## @@ -0,0 +1,662 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.systemtable.provider; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.JsonNode; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; +import java.util.function.IntFunction; +import javax.annotation.Nullable; +import org.apache.helix.HelixAdmin; +import org.apache.pinot.client.admin.PinotAdminClient; +import org.apache.pinot.client.admin.PinotAdminTransport; +import org.apache.pinot.common.config.provider.TableCache; +import org.apache.pinot.common.systemtable.SystemTable; +import org.apache.pinot.common.systemtable.SystemTableProvider; +import org.apache.pinot.common.systemtable.datasource.InMemorySystemTableSegment; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.apache.pinot.spi.utils.builder.TableNameBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Basic system table exposing table-level metadata populated from the broker {@link TableCache}. + */ +@SystemTable +public final class TablesSystemTableProvider implements SystemTableProvider { + private static final Logger LOGGER = LoggerFactory.getLogger(TablesSystemTableProvider.class); + public static final String TABLE_NAME = "system.tables"; + private static final String SIZE_CACHE_TTL_MS_PROPERTY = "pinot.systemtable.tables.sizeCacheTtlMs"; + private static final long DEFAULT_SIZE_CACHE_TTL_MS = Duration.ofMinutes(1).toMillis(); + private static final long SIZE_CACHE_TTL_MS = getNonNegativeLongProperty(SIZE_CACHE_TTL_MS_PROPERTY, + DEFAULT_SIZE_CACHE_TTL_MS); + + private static final String CONTROLLER_TIMEOUT_MS_PROPERTY = "pinot.systemtable.tables.controllerTimeoutMs"; + private static final long DEFAULT_CONTROLLER_TIMEOUT_MS = Duration.ofSeconds(5).toMillis(); + private static final long CONTROLLER_TIMEOUT_MS = getPositiveLongProperty(CONTROLLER_TIMEOUT_MS_PROPERTY, + DEFAULT_CONTROLLER_TIMEOUT_MS); + + private static final long SIZE_FETCH_FAILURE_WARN_INTERVAL_MS = Duration.ofHours(1).toMillis(); + + private static final Schema SCHEMA = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension("tableName", FieldSpec.DataType.STRING) + .addSingleValueDimension("type", FieldSpec.DataType.STRING) + .addSingleValueDimension("status", FieldSpec.DataType.STRING) + .addSingleValueDimension("segments", FieldSpec.DataType.INT) + .addSingleValueDimension("totalDocs", FieldSpec.DataType.LONG) + .addMetric("reportedSize", FieldSpec.DataType.LONG) + .addMetric("estimatedSize", FieldSpec.DataType.LONG) + .addSingleValueDimension("brokerTenant", FieldSpec.DataType.STRING) + .addSingleValueDimension("serverTenant", FieldSpec.DataType.STRING) + .addSingleValueDimension("replicas", FieldSpec.DataType.INT) + .addSingleValueDimension("tableConfig", FieldSpec.DataType.STRING) + .build(); + + private final TableCache _tableCache; + private final @Nullable HelixAdmin _helixAdmin; + private final @Nullable String _clusterName; + private final @Nullable Function<String, TableSize> _tableSizeFetcherOverride; + private final List<String> _configuredControllerUrls; + private final Map<String, CachedSize> _sizeCache = new ConcurrentHashMap<>(); + private final Map<String, PinotAdminClient> _adminClientCache = new ConcurrentHashMap<>(); + private final AtomicLong _lastSizeFetchFailureWarnLogMs = new AtomicLong(); + + public TablesSystemTableProvider() { + this(null, null, null, null, null); + } + + public TablesSystemTableProvider(TableCache tableCache) { + this(tableCache, null, null, null, null); + } + + public TablesSystemTableProvider(TableCache tableCache, @Nullable HelixAdmin helixAdmin) { + this(tableCache, helixAdmin, null, null, null); + } + + public TablesSystemTableProvider(TableCache tableCache, @Nullable HelixAdmin helixAdmin, + @Nullable String clusterName) { + this(tableCache, helixAdmin, clusterName, null, null); + } + + TablesSystemTableProvider(TableCache tableCache, @Nullable HelixAdmin helixAdmin, @Nullable String clusterName, + @Nullable Function<String, TableSize> tableSizeFetcherOverride, @Nullable List<String> controllerUrls) { + _tableCache = tableCache; + _helixAdmin = helixAdmin; + _clusterName = clusterName; + _tableSizeFetcherOverride = tableSizeFetcherOverride; + _configuredControllerUrls = controllerUrls != null ? new ArrayList<>(controllerUrls) : List.of(); + } + + @Override + public String getTableName() { + return TABLE_NAME; + } + + @Override + public Schema getSchema() { + return SCHEMA; + } + + @Override + public TableConfig getTableConfig() { + return new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build(); + } + + @Override + public void close() + throws Exception { + for (Map.Entry<String, PinotAdminClient> entry : _adminClientCache.entrySet()) { + try { + entry.getValue().close(); + } catch (Exception e) { + LOGGER.debug("Failed to close admin client for {}: {}", entry.getKey(), e.toString()); + } + } + _adminClientCache.clear(); + } + + @Override + public IndexSegment getDataSource() { + if (_tableCache == null) { + return new InMemorySystemTableSegment(TABLE_NAME, SCHEMA, 0, Collections.emptyMap()); + } + + Set<String> tableNamesWithType = new LinkedHashSet<>(); + for (String tableName : _tableCache.getTableNameMap().values()) { + if (TableNameBuilder.getTableTypeFromTableName(tableName) != null) { + tableNamesWithType.add(tableName); + } + } + List<String> sortedTableNames = new ArrayList<>(tableNamesWithType); + sortedTableNames.sort(Comparator.naturalOrder()); + + List<String> controllerBaseUrls = getControllerBaseUrls(); + Function<String, TableSize> sizeFetcher = getSizeFetcher(); + class TableRow { + final String _tableNameWithType; + final TableType _tableType; + final String _rawTableName; + final @Nullable TableConfig _tableConfig; + private volatile @Nullable String _tableConfigJson; + private volatile @Nullable TableSize _tableSize; + private volatile boolean _tableSizeFetched; + + private TableRow(String tableNameWithType, TableType tableType, String rawTableName, + @Nullable TableConfig tableConfig) { + _tableNameWithType = tableNameWithType; + _tableType = tableType; + _rawTableName = rawTableName; + _tableConfig = tableConfig; + } + + @Nullable + private TableSize getTableSize() { + if (_tableSizeFetched) { + return _tableSize; + } + synchronized (this) { + if (_tableSizeFetched) { + return _tableSize; + } + _tableSize = fetchTableSize(_tableNameWithType, sizeFetcher, controllerBaseUrls); + _tableSizeFetched = true; + return _tableSize; + } + } + + private String getStatus() { + if (_tableConfig != null) { + return "ONLINE"; + } + TableSize sizeFromController = getTableSize(); + int segments = sizeFromController != null ? getSegmentCount(sizeFromController, _tableType) : 0; + return segments > 0 ? "ONLINE" : "UNKNOWN"; + } + + private int getSegments() { + TableSize sizeFromController = getTableSize(); + return sizeFromController != null ? getSegmentCount(sizeFromController, _tableType) : 0; + } + + private long getTotalDocs() { + TableSize sizeFromController = getTableSize(); + return sizeFromController != null ? TablesSystemTableProvider.this.getTotalDocs(sizeFromController, _tableType, + _tableNameWithType, controllerBaseUrls) : 0L; + } + + private long getReportedSize() { + TableSize sizeFromController = getTableSize(); + if (sizeFromController == null || sizeFromController._reportedSizeInBytes < 0) { + return 0L; + } + return sizeFromController._reportedSizeInBytes; + } + + private long getEstimatedSize() { + TableSize sizeFromController = getTableSize(); + if (sizeFromController == null || sizeFromController._estimatedSizeInBytes < 0) { + return 0L; + } + return sizeFromController._estimatedSizeInBytes; + } + + private String getBrokerTenant() { + if (_tableConfig != null && _tableConfig.getTenantConfig() != null) { + String tenant = _tableConfig.getTenantConfig().getBroker(); + return tenant != null ? tenant : ""; + } + return ""; + } + + private String getServerTenant() { + if (_tableConfig != null && _tableConfig.getTenantConfig() != null) { + String tenant = _tableConfig.getTenantConfig().getServer(); + return tenant != null ? tenant : ""; + } + return ""; + } + + private int getReplicas() { + if (_tableConfig != null && _tableConfig.getValidationConfig() != null) { + Integer replicationNumber = _tableConfig.getValidationConfig().getReplicationNumber(); + if (replicationNumber != null) { + return replicationNumber; + } + } + return 0; + } + + private String getTableConfigJson() { + String cached = _tableConfigJson; + if (cached != null) { + return cached; + } + synchronized (this) { + cached = _tableConfigJson; + if (cached != null) { + return cached; + } + cached = ""; + if (_tableConfig != null) { + try { + cached = JsonUtils.objectToString(_tableConfig); + } catch (Exception e) { + LOGGER.warn("Failed to serialize table config for {}: {}", _tableNameWithType, e.toString()); + cached = _tableConfig.toString(); + } + } + _tableConfigJson = cached; + return cached; + } + } + } + + List<TableRow> tableRows = new ArrayList<>(sortedTableNames.size()); + for (String tableNameWithType : sortedTableNames) { + TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableNameWithType); + if (tableType == null) { + continue; + } + String rawTableName = TableNameBuilder.extractRawTableName(tableNameWithType); + TableConfig tableConfig = _tableCache.getTableConfig(tableNameWithType); + tableRows.add(new TableRow(tableNameWithType, tableType, rawTableName, tableConfig)); + } + + Map<String, IntFunction<Object>> valueProviders = new java.util.HashMap<>(); + valueProviders.put("tableName", docId -> tableRows.get(docId)._rawTableName); + valueProviders.put("type", docId -> tableRows.get(docId)._tableType.name()); + valueProviders.put("status", docId -> tableRows.get(docId).getStatus()); + valueProviders.put("segments", docId -> tableRows.get(docId).getSegments()); + valueProviders.put("totalDocs", docId -> tableRows.get(docId).getTotalDocs()); + valueProviders.put("reportedSize", docId -> tableRows.get(docId).getReportedSize()); + valueProviders.put("estimatedSize", docId -> tableRows.get(docId).getEstimatedSize()); + valueProviders.put("brokerTenant", docId -> tableRows.get(docId).getBrokerTenant()); + valueProviders.put("serverTenant", docId -> tableRows.get(docId).getServerTenant()); + valueProviders.put("replicas", docId -> tableRows.get(docId).getReplicas()); + valueProviders.put("tableConfig", docId -> tableRows.get(docId).getTableConfigJson()); + + return new InMemorySystemTableSegment(TABLE_NAME, SCHEMA, tableRows.size(), valueProviders); + } + + @Nullable + private TableSize fetchTableSize(String tableNameWithType, + @Nullable Function<String, TableSize> fetcher, List<String> controllerBaseUrls) { + boolean cacheEnabled = SIZE_CACHE_TTL_MS > 0; + TableSize cached = cacheEnabled ? getCachedSize(tableNameWithType) : null; + if (cached != null) { + return cached; + } + if (fetcher != null) { + try { + TableSize fetched = fetcher.apply(tableNameWithType); + if (fetched != null) { + if (cacheEnabled) { + cacheSize(tableNameWithType, fetched); + } + return fetched; + } + } catch (Exception e) { + LOGGER.warn("Table size fetcher failed for {}: {}", tableNameWithType, e.toString()); + } + } + String rawTableName = TableNameBuilder.extractRawTableName(tableNameWithType); + TableSize size = fetchTableSizeForName(controllerBaseUrls, rawTableName); + if (size == null) { + size = fetchTableSizeForName(controllerBaseUrls, tableNameWithType); + if (size == null) { + logSizeFetchFailure("{}: failed to fetch size for {} from controllers {} " + + "(tried raw table name '{}' and table name with type '{}')", + TABLE_NAME, tableNameWithType, controllerBaseUrls, rawTableName, tableNameWithType); + } + } + if (size != null && cacheEnabled) { + cacheSize(tableNameWithType, size); + } + return size; + } + + @Nullable + private TableSize fetchTableSizeForName(List<String> controllerBaseUrls, String tableName) { + for (String baseUrl : controllerBaseUrls) { + try { + PinotAdminClient adminClient = getOrCreateAdminClient(baseUrl); + if (adminClient == null) { + continue; + } + + JsonNode sizeNode = adminClient.getTableSize(tableName, true, false); + + if (sizeNode == null) { + continue; + } + + TableSize parsed = JsonUtils.stringToObject(sizeNode.toString(), TableSize.class); + LOGGER.debug("{}: controller size response for {} via {} -> segments offline={}, realtime={}, " + + "reportedSize={}, estimatedSize={}", TABLE_NAME, tableName, baseUrl, + parsed._offlineSegments != null && parsed._offlineSegments._segments != null + ? parsed._offlineSegments._segments.size() : 0, + parsed._realtimeSegments != null && parsed._realtimeSegments._segments != null + ? parsed._realtimeSegments._segments.size() : 0, + parsed._reportedSizeInBytes, parsed._estimatedSizeInBytes); + return parsed; + } catch (Exception e) { + logSizeFetchFailure("{}: error fetching table size for {} via {} using admin client", TABLE_NAME, tableName, + baseUrl, e); + } + } + return null; + } + + private List<String> getControllerBaseUrls() { + Set<String> urls = new LinkedHashSet<>(); + if (_helixAdmin != null) { + for (String controller : discoverControllersFromHelix()) { + String normalized = normalizeControllerUrl(controller); + if (normalized != null) { + urls.add(normalized); + } + } + } + for (String url : _configuredControllerUrls) { + String normalized = normalizeControllerUrl(url); + if (normalized != null) { + urls.add(normalized); + } + } + return new ArrayList<>(urls); + } + + private int getSegmentCount(TableSize sizeFromController, TableType tableType) { + if (tableType == TableType.OFFLINE && sizeFromController._offlineSegments != null + && sizeFromController._offlineSegments._segments != null) { + return sizeFromController._offlineSegments._segments.size(); + } + if (tableType == TableType.REALTIME && sizeFromController._realtimeSegments != null + && sizeFromController._realtimeSegments._segments != null) { + return sizeFromController._realtimeSegments._segments.size(); + } + return 0; + } + + private long getTotalDocsFromSize(TableSize sizeFromController, TableType tableType) { + if (tableType == TableType.OFFLINE && sizeFromController._offlineSegments != null + && sizeFromController._offlineSegments._segments != null) { + return sizeFromController._offlineSegments._segments.values().stream() + .mapToLong(segmentSize -> segmentSize._totalDocs).sum(); + } + if (tableType == TableType.REALTIME && sizeFromController._realtimeSegments != null + && sizeFromController._realtimeSegments._segments != null) { + return sizeFromController._realtimeSegments._segments.values().stream() + .mapToLong(segmentSize -> segmentSize._totalDocs).sum(); + } + return 0; + } + + private long getTotalDocs(TableSize sizeFromController, TableType tableType, String tableNameWithType, + List<String> controllerBaseUrls) { + if (tableType == TableType.OFFLINE && sizeFromController._offlineSegments != null + && sizeFromController._offlineSegments._segments != null) { + long cached = sizeFromController._offlineTotalDocs; + if (cached >= 0) { + return cached; + } + long totalDocsFromSize = getTotalDocsFromSize(sizeFromController, tableType); + if (totalDocsFromSize > 0) { + synchronized (sizeFromController) { + if (sizeFromController._offlineTotalDocs < 0) { + sizeFromController._offlineTotalDocs = totalDocsFromSize; + } + return sizeFromController._offlineTotalDocs; + } + } + long fetched = fetchTotalDocsFromSegmentMetadata(tableNameWithType, sizeFromController._offlineSegments._segments, + controllerBaseUrls); + synchronized (sizeFromController) { + if (sizeFromController._offlineTotalDocs < 0) { + sizeFromController._offlineTotalDocs = fetched; + } + return sizeFromController._offlineTotalDocs; + } + } + if (tableType == TableType.REALTIME && sizeFromController._realtimeSegments != null + && sizeFromController._realtimeSegments._segments != null) { + long cached = sizeFromController._realtimeTotalDocs; + if (cached >= 0) { + return cached; + } + long totalDocsFromSize = getTotalDocsFromSize(sizeFromController, tableType); + if (totalDocsFromSize > 0) { + synchronized (sizeFromController) { + if (sizeFromController._realtimeTotalDocs < 0) { + sizeFromController._realtimeTotalDocs = totalDocsFromSize; + } + return sizeFromController._realtimeTotalDocs; + } + } + long fetched = fetchTotalDocsFromSegmentMetadata(tableNameWithType, + sizeFromController._realtimeSegments._segments, controllerBaseUrls); + synchronized (sizeFromController) { + if (sizeFromController._realtimeTotalDocs < 0) { + sizeFromController._realtimeTotalDocs = fetched; + } + return sizeFromController._realtimeTotalDocs; + } + } Review Comment: The caching logic for `_offlineTotalDocs` and `_realtimeTotalDocs` is duplicated in lines 436-446 and 459-470. Extract this into a helper method that takes the cached field reference and segment type to reduce code duplication and improve maintainability. ```suggestion private long getTotalDocsWithCaching(TableSize sizeFromController, TableType tableType, String tableNameWithType, List<String> controllerBaseUrls) { long cached = (tableType == TableType.OFFLINE) ? sizeFromController._offlineTotalDocs : sizeFromController._realtimeTotalDocs; if (cached >= 0) { return cached; } long totalDocsFromSize = getTotalDocsFromSize(sizeFromController, tableType); if (totalDocsFromSize > 0) { synchronized (sizeFromController) { if (tableType == TableType.OFFLINE) { if (sizeFromController._offlineTotalDocs < 0) { sizeFromController._offlineTotalDocs = totalDocsFromSize; } return sizeFromController._offlineTotalDocs; } else { if (sizeFromController._realtimeTotalDocs < 0) { sizeFromController._realtimeTotalDocs = totalDocsFromSize; } return sizeFromController._realtimeTotalDocs; } } } long fetched; if (tableType == TableType.OFFLINE) { fetched = fetchTotalDocsFromSegmentMetadata(tableNameWithType, sizeFromController._offlineSegments._segments, controllerBaseUrls); } else { fetched = fetchTotalDocsFromSegmentMetadata(tableNameWithType, sizeFromController._realtimeSegments._segments, controllerBaseUrls); } synchronized (sizeFromController) { if (tableType == TableType.OFFLINE) { if (sizeFromController._offlineTotalDocs < 0) { sizeFromController._offlineTotalDocs = fetched; } return sizeFromController._offlineTotalDocs; } else { if (sizeFromController._realtimeTotalDocs < 0) { sizeFromController._realtimeTotalDocs = fetched; } return sizeFromController._realtimeTotalDocs; } } } private long getTotalDocs(TableSize sizeFromController, TableType tableType, String tableNameWithType, List<String> controllerBaseUrls) { if (tableType == TableType.OFFLINE && sizeFromController._offlineSegments != null && sizeFromController._offlineSegments._segments != null) { return getTotalDocsWithCaching(sizeFromController, TableType.OFFLINE, tableNameWithType, controllerBaseUrls); } if (tableType == TableType.REALTIME && sizeFromController._realtimeSegments != null && sizeFromController._realtimeSegments._segments != null) { return getTotalDocsWithCaching(sizeFromController, TableType.REALTIME, tableNameWithType, controllerBaseUrls); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
