shizy818 commented on code in PR #15143: URL: https://github.com/apache/iotdb/pull/15143#discussion_r2004966616
########## iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/memtable/AbstractWritableMemChunk.java: ########## @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.memtable; + +import org.apache.iotdb.db.queryengine.exception.MemoryNotEnoughException; +import org.apache.iotdb.db.queryengine.execution.fragment.FragmentInstanceContext; +import org.apache.iotdb.db.queryengine.execution.fragment.QueryContext; +import org.apache.iotdb.db.queryengine.plan.planner.memory.MemoryReservationManager; +import org.apache.iotdb.db.utils.datastructure.TVList; + +import java.util.Iterator; + +public class AbstractWritableMemChunk { + protected static long RETRY_INTERVAL_MS = 100L; + protected static long MAX_WAIT_QUERY_MS = 60 * 1000L; + + /** + * Release the TVList if there is no query on it. Otherwise, it should set the first query as the + * owner. TVList is released until all queries finish. If it throws memory-not-enough exception + * during owner transfer, retry the release process after 100ms. If the problem is still not + * solved in 60s, it starts to abort first query, kick it out of the query list and retry. This + * method must ensure success because it's part of flushing. + * + * @param tvList + */ + protected void maybeReleaseTvList(TVList tvList) { + long startTimeInMs = System.currentTimeMillis(); + boolean succeed = false; + while (!succeed) { + try { + tryReleaseTvList(tvList); + succeed = true; + } catch (MemoryNotEnoughException ex) { + long waitQueryInMs = System.currentTimeMillis() - startTimeInMs; + if (waitQueryInMs > MAX_WAIT_QUERY_MS) { + // Abort first query in the list. When all queries in the list have been aborted, + // tryReleaseTvList will ensure succeed finally. + tvList.lockQueryList(); + try { + // fail the first query + Iterator<QueryContext> iterator = tvList.getQueryContextSet().iterator(); + if (iterator.hasNext()) { + FragmentInstanceContext firstQuery = (FragmentInstanceContext) iterator.next(); + firstQuery.failed( + new MemoryNotEnoughException( + "Memory not enough to clone the tvlist during flush phase")); + iterator.remove(); Review Comment: I do not quite understand here. We actively fail the query and let it know the reason is 'MemoryNotEnoughException'. Otherwise, how does the query know the fail reason? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
