zhongqishang commented on code in PR #3793:
URL: https://github.com/apache/amoro/pull/3793#discussion_r2416467965


##########
amoro-optimizer/amoro-optimizer-common/src/main/java/org/apache/amoro/optimizer/common/OptimizerExecutor.java:
##########
@@ -190,4 +187,31 @@ public static OptimizingTaskResult executeTask(
       return errorResult;
     }
   }
+
+  private static Map<String, String> fillTaskProperties(
+      OptimizerConfig config, OptimizingTask task) {
+    if (config.isCacheEnabled()) {
+      System.setProperty(DeleteCache.DELETE_CACHE_ENABLED, "true");
+    }
+    if 
(!config.getCacheMaxEntrySize().equals(DeleteCache.DELETE_CACHE_MAX_ENTRY_SIZE_DEFAULT))
 {
+      System.setProperty(DeleteCache.DELETE_CACHE_MAX_ENTRY_SIZE, 
config.getCacheMaxEntrySize());
+    }
+    if 
(!config.getCacheMaxTotalSize().equals(DeleteCache.DELETE_CACHE_MAX_TOTAL_SIZE_DEFAULT))
 {
+      System.setProperty(
+          DeleteCache.DELETE_CACHE_MAX_TOTAL_SIZE_DEFAULT, 
config.getCacheMaxTotalSize());
+    }
+    if 
(!config.getCacheMaxEntrySize().equals(DeleteCache.DELETE_CACHE_TIMEOUT)) {

Review Comment:
   ```suggestion
       if (!config.getCacheTimeout().equals(DeleteCache.DELETE_CACHE_TIMEOUT)) {
   ```



##########
amoro-optimizer/amoro-optimizer-common/src/main/java/org/apache/amoro/optimizer/common/OptimizerConfig.java:
##########
@@ -79,6 +79,30 @@ public class OptimizerConfig implements Serializable {
   @Option(name = "-id", aliases = "--" + OptimizerProperties.RESOURCE_ID, 
usage = "Resource id")
   private String resourceId;
 
+  @Option(
+      name = "-ce",
+      aliases = "--" + OptimizerProperties.OPTIMIZER_CACHE_ENABLED,
+      usage = "Whether enable cache, default false")
+  private boolean cacheEnabled = 
OptimizerProperties.OPTIMIZER_CACHE_ENABLED_DEFAULT;
+
+  @Option(
+      name = "-cmts",
+      aliases = "--" + OptimizerProperties.OPTIMIZER_CACHE_MAX_TOTAL_SIZE,
+      usage = "Max total size in cache, default 100MB")
+  private String cacheMaxTotalSize = 
OptimizerProperties.OPTIMIZER_CACHE_MAX_TOTAL_SIZE_DEFAULT;
+
+  @Option(
+      name = "-cmes",
+      aliases = "--" + OptimizerProperties.OPTIMIZER_CACHE_MAX_ENTRY_SIZE,
+      usage = "Max entry size in cache, default 50MB")

Review Comment:
   ```suggestion
         usage = "Max entry size in cache, default 64MB")
   ```



##########
docs/admin-guides/managing-optimizers.md:
##########
@@ -378,3 +386,7 @@ The description of the relevant parameters is shown in the 
following table:
 | -eds     | No       | Whether extend storage to disk, default false.         
                                                                                
                                                                                
                   |
 | -dsp     | No       | Defines the directory where the storage files are 
saved, the default temporary-file directory is specified by the system property 
`java.io.tmpdir`. On UNIX systems the default value of this property is 
typically "/tmp" or "/var/tmp". |
 | -msz     | No       | Memory storage size limit when extending disk 
storage(MB), default 512(MB).                                                   
                                                                                
                            |
+| -ce      | No       | Whether enable cache in optimizer, default false.      
                                                                                
                                                                                
                    |
+| -cmts    | No       | Max total size in optimier cache, default 100MB.       
                                                                                
                                                                                
                   |
+| -cmes    | No       | Max entry size in optimizer cache, default 50MB.       
                                                                                
                                                                                
                   |

Review Comment:
   ```suggestion
   | -cmts    | No       | Max total size in optimier cache, default 128MB.     
                                                                                
                                                                                
                     |
   | -cmes    | No       | Max entry size in optimizer cache, default 64MB.     
                                                                                
                                                                                
                     |
   ```



##########
docs/admin-guides/managing-optimizers.md:
##########
@@ -351,6 +355,10 @@ The description of the relevant parameters is shown in the 
following table:
 | -eds     | No       | Whether extend storage to disk, default false.         
                                                                                
                                                                                
                   |
 | -dsp     | No       | Defines the directory where the storage files are 
saved, the default temporary-file directory is specified by the system property 
`java.io.tmpdir`. On UNIX systems the default value of this property is 
typically "/tmp" or "/var/tmp". |
 | -msz     | No       | Memory storage size limit when extending disk 
storage(MB), default 512(MB).                                                   
                                                                                
                            |
+| -ce      | No       | Whether enable cache in optimizer, default false.      
                                                                                
                                                                                
                   |
+| -cmts    | No       | Max total size in optimier cache, default 100MB.       
                                                                                
                                                                                
                   |
+| -cmes    | No       | Max entry size in optimizer cache, default 50MB.       
                                                                                
                                                                                
                   |

Review Comment:
   ```suggestion
   | -cmts    | No       | Max total size in optimier cache, default 128MB.     
                                                                                
                                                                                
                     |
   | -cmes    | No       | Max entry size in optimizer cache, default 64MB.     
                                                                                
                                                                                
                     |
   ```



##########
amoro-optimizer/amoro-optimizer-common/src/main/java/org/apache/amoro/optimizer/common/OptimizerConfig.java:
##########
@@ -79,6 +79,30 @@ public class OptimizerConfig implements Serializable {
   @Option(name = "-id", aliases = "--" + OptimizerProperties.RESOURCE_ID, 
usage = "Resource id")
   private String resourceId;
 
+  @Option(
+      name = "-ce",
+      aliases = "--" + OptimizerProperties.OPTIMIZER_CACHE_ENABLED,
+      usage = "Whether enable cache, default false")
+  private boolean cacheEnabled = 
OptimizerProperties.OPTIMIZER_CACHE_ENABLED_DEFAULT;
+
+  @Option(
+      name = "-cmts",
+      aliases = "--" + OptimizerProperties.OPTIMIZER_CACHE_MAX_TOTAL_SIZE,
+      usage = "Max total size in cache, default 100MB")

Review Comment:
   ```suggestion
         usage = "Max total size in cache, default 128MB")
   ```



##########
amoro-format-iceberg/src/main/java/org/apache/amoro/io/reader/DeleteCache.java:
##########
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.amoro.io.reader;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import org.apache.amoro.config.ConfigHelpers;
+import org.apache.amoro.shade.guava32.com.google.common.base.Preconditions;
+import org.apache.amoro.utils.MemorySize;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/** A common implementation of {@link org.apache.iceberg.data.DeleteLoader}. */
+public class DeleteCache {
+  private static final Logger LOG = LoggerFactory.getLogger(DeleteCache.class);
+  public static final String DELETE_CACHE_ENABLED = "delete-cache-enabled";
+  public static final String DELETE_CACHE_ENABLED_DEFAULT = "false";
+  public static final String DELETE_CACHE_MAX_ENTRY_SIZE = 
"delete-cache-max-entry-size";
+  public static final String DELETE_CACHE_MAX_ENTRY_SIZE_DEFAULT = "64mb";
+  public static final String DELETE_CACHE_MAX_TOTAL_SIZE = 
"delete-cache-max-total-size";
+  public static final String DELETE_CACHE_MAX_TOTAL_SIZE_DEFAULT = "128mb";
+  public static final String DELETE_CACHE_TIMEOUT = "delete-cache-timeout";
+  public static final String DELETE_CACHE_TIMEOUT_DEFAULT = "10min";
+  private static final int MAX_GROUPS = 5;
+  private static volatile DeleteCache INSTANCE;
+
+  private final Duration timeout;
+  private final long maxEntrySize;
+  private final long maxTotalSize;
+  private final List<String> groups = new CopyOnWriteArrayList<>();
+  private volatile Cache<String, CacheValue> state;
+
+  private DeleteCache(Duration timeout, long maxEntrySize, long maxTotalSize) {
+    this.timeout = timeout;
+    this.maxEntrySize = maxEntrySize;
+    this.maxTotalSize = maxTotalSize;
+  }
+
+  public static DeleteCache getInstance() {
+    if (INSTANCE == null) {
+      long maxEntrySize =
+          MemorySize.parseBytes(
+              System.getProperty(DELETE_CACHE_MAX_ENTRY_SIZE, 
DELETE_CACHE_MAX_ENTRY_SIZE_DEFAULT));
+      long maxTotalSize =
+          MemorySize.parseBytes(
+              System.getProperty(DELETE_CACHE_MAX_TOTAL_SIZE, 
DELETE_CACHE_MAX_TOTAL_SIZE_DEFAULT));
+      Duration timeout =
+          ConfigHelpers.TimeUtils.parseDuration(
+              System.getProperty(DELETE_CACHE_TIMEOUT, 
DELETE_CACHE_TIMEOUT_DEFAULT));
+      initialInstance(timeout, maxEntrySize, maxTotalSize);
+    }
+    return INSTANCE;
+  }
+
+  public static synchronized void initialInstance(
+      Duration timeout, long maxEntrySize, long maxTotalSize) {
+    if (INSTANCE == null) {
+      INSTANCE = new DeleteCache(timeout, maxEntrySize, maxTotalSize);
+    } else {
+      LOG.warn("Cache is already initialed.");
+    }
+  }
+
+  public <V> V getOrLoad(String group, String key, Supplier<V> valueSupplier, 
long valueSize) {
+    if (valueSize > maxEntrySize) {
+      LOG.debug("{} exceeds max entry size: {} > {}", key, valueSize, 
maxEntrySize);
+      return valueSupplier.get();
+    }
+    if (!groups.contains(group) && groups.size() > MAX_GROUPS) {
+      String removed = groups.remove(MAX_GROUPS - 1);
+      groups.add(group);
+      if (removed != null) {
+        invalidate(removed);
+      }
+    }

Review Comment:
   - Condition `groups.size() > MAX_GROUPS` is always false.
   - `groups.remove(MAX_GROUPS - 1)` seems not the remove last one.



##########
docs/admin-guides/managing-optimizers.md:
##########
@@ -282,9 +282,13 @@ The optimizer group supports the following properties:
 | Property                       | Container type | Required | Default         
                                                                      | 
Description                                                                     
                                                                                
                                                                                
                                                                                
                                                                                
 |
 
|--------------------------------|----------------|----------|---------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | scheduling-policy              | All            | No       | quota           
                                                                      | The 
scheduler group scheduling policy, the default value is `quota`, it will be 
scheduled according to the quota resources configured for each table, the 
larger the table quota is, the more optimizer resources it can take. There is 
also a configuration `balanced` that will balance the scheduling of each table, 
the longer the table has not been optimized, the higher the scheduling priority 
will be. |
-| memory                         | Local          | Yes      | N/A             
                                                                      | The max 
memory of JVM for local optimizer, in MBs.                                      
                                                                                
                                                                                
                                                                                
                                                                         |
 | max-input-file-size-per-thread | All            | No       | 
536870912(512MB)                                                                
      | Max input file size per optimize thread.                                
                                                                                
                                                                                
                                                                                
                                                                                
         |
 | ams-optimizing-uri             | All            | No       | 
thrift://{ams.server-expose-host}:{ams.thrift-server.optimizing-service.binding-port}
 | Table optimizing service endpoint. This is used when the default service 
endpoint is not visitable.                                                      
                                                                                
                                                                                
                                                                                
        |
+| cache-enabled                  | All            | No       | false           
                                                                      | Whether 
enable cache in optimizer.                                                      
                                                                                
                                                                                
                                                                                
                                                                         |
+| cache-max-total-size           | All            | No       | 100mb           
                                                                      | Max 
total size in optimier cache.                                                   
                                                                                
                                                                                
                                                                                
                                                                             |
+| cache-max-entry-size           | All            | No       | 50mb            
                                                                      | Max 
entry size in optimizer cache.                                                  
                                                                                
                                                                                
                                                                                
                                                                             |
+| cache-timeout                  | All            | No       | 10min           
                                                                      | Timeout 
in optimizer cache.                                                             
                                                                                
                                                                                
                                                                                
                                                                         |
+| memory                         | Local          | Yes      | N/A             
                                                                      | The max 
memory of JVM for local optimizer, in MBs.                                      
                                                                                
                                                                                
                                                                                
                                                                         |

Review Comment:
   Optimizer properties doesn't seem to work, am I missing something?



##########
amoro-format-iceberg/src/main/java/org/apache/amoro/io/reader/DeleteCache.java:
##########
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.amoro.io.reader;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import org.apache.amoro.config.ConfigHelpers;
+import org.apache.amoro.shade.guava32.com.google.common.base.Preconditions;
+import org.apache.amoro.utils.MemorySize;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/** A common implementation of {@link org.apache.iceberg.data.DeleteLoader}. */

Review Comment:
   Comment does not match.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to