kbendick commented on a change in pull request #4054: URL: https://github.com/apache/iceberg/pull/4054#discussion_r800964205
########## File path: core/src/main/java/org/apache/iceberg/io/inmemory/InMemoryFileStore.java ########## @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.io.inmemory; + +import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +/** + * An in-memory collection based storage for file contents + * keyed by a string location. + */ +class InMemoryFileStore { + + private final ConcurrentMap<String, byte[]> store; + + InMemoryFileStore() { + this.store = new ConcurrentHashMap<>(); + } + + /** + * Put the file contents at the given location, overwrite if it already exists. + */ + public void put(String location, ByteBuffer data) { + // Copy the contents and store it. + byte[] bytes = new byte[data.remaining()]; + data.get(bytes); + store.put(location, bytes); + } + + /** + * Get the file contents for the given location. + */ + public Optional<ByteBuffer> get(String location) { + return Optional.ofNullable(store.get(location)).map(bytes -> { + // Copy the contents and return it. + byte[] copy = new byte[bytes.length]; + System.arraycopy(bytes, 0, copy, 0, bytes.length); + return ByteBuffer.wrap(copy); + }); Review comment: Nit: This might be a good candidate for a `ByteBuffers#fromByteArray` function in the `ByteBuffers` utility class. ########## File path: core/src/test/java/org/apache/iceberg/io/inmemory/InMemoryCatalogTest.java ########## @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.io.inmemory; + +import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hadoop.CommonCatalogTests; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.IntegerType; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Tests specific to {@link InMemoryCatalog}, e.g. rename table, alter namespace properties, etc. + * Common catalog tests can be found in {@link CommonCatalogTests}. + */ +public class InMemoryCatalogTest { + + private InMemoryCatalog catalog; + + @Before + public void before() { + catalog = new InMemoryCatalog(); + catalog.initialize("in-memory-catalog", ImmutableMap.of()); + } + + @Test + public void testSetProperties() { + Namespace namespace = Namespace.of("a"); + + // Create a namespace with properties: {'k1': 'v1', 'k2': 'v2'} + catalog.createNamespace(namespace, ImmutableMap.of("k1", "v1", "k2", "v2")); + // Verify the properties + assertEquals(ImmutableMap.of("k1", "v1", "k2", "v2"), catalog.loadNamespaceMetadata(namespace)); + + // Set properties such that 'k1' is overwritten and a new key 'k3' is added. + catalog.setProperties(namespace, ImmutableMap.of("k1", "v1'", "k3", "v3")); + // Verify the result of set properties. + assertEquals(ImmutableMap.of("k1", "v1'", "k2", "v2", "k3", "v3"), catalog.loadNamespaceMetadata(namespace)); + } + + @Test + public void testRemoveProperties() { + Namespace namespace = Namespace.of("a"); + + // Create a namespace with properties: {'k1': 'v1', 'k2': 'v2'} + catalog.createNamespace(namespace, ImmutableMap.of("k1", "v1", "k2", "v2")); + // Verify the properties + assertEquals(ImmutableMap.of("k1", "v1", "k2", "v2"), catalog.loadNamespaceMetadata(namespace)); + + // Remove properties 'k1' + catalog.removeProperties(namespace, ImmutableSet.of("k1", "k3")); + // Verify the result of set properties. Review comment: Nit: These comments don't add much value in my opinion. The code is readable as is, especially given the method names. I would remove most of them. ########## File path: core/src/main/java/org/apache/iceberg/io/inmemory/InMemoryCatalog.java ########## @@ -0,0 +1,374 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.io.inmemory; Review comment: Nit: Maybe the catalog itself deserves its own package? Everything in `org.apache.iceberg.io` (at least from `core`) seems to be just IO related. I'd personally put the actual FileIO stuff directly into `org.apache.iceberg.io`, as most of it begins with `InMemory` and there are a number of files in that package already. You could also do `org.apache.iceberg.io.memory` if you wanted, but I don't feel the extra level is needed based on the current contents. Then for the in-memory catalog implementation, I'd add it directly to `org.apache.iceberg.catalog`, but put it in `core` not `api`. But the catalog itself should probably not be in package `io` like this, as everything there is concerned with readers / writers and not actual catalog specific stuff. Just my 2 cents 😄 ########## File path: core/src/main/java/org/apache/iceberg/io/inmemory/InMemoryFileStore.java ########## @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.io.inmemory; + +import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +/** + * An in-memory collection based storage for file contents + * keyed by a string location. + */ +class InMemoryFileStore { + + private final ConcurrentMap<String, byte[]> store; + + InMemoryFileStore() { + this.store = new ConcurrentHashMap<>(); + } + + /** + * Put the file contents at the given location, overwrite if it already exists. + */ + public void put(String location, ByteBuffer data) { + // Copy the contents and store it. + byte[] bytes = new byte[data.remaining()]; + data.get(bytes); + store.put(location, bytes); + } + + /** + * Get the file contents for the given location. + */ + public Optional<ByteBuffer> get(String location) { + return Optional.ofNullable(store.get(location)).map(bytes -> { + // Copy the contents and return it. + byte[] copy = new byte[bytes.length]; + System.arraycopy(bytes, 0, copy, 0, bytes.length); + return ByteBuffer.wrap(copy); + }); + } + + /** + * Remove the given location and its contents. + */ Review comment: Nit: Consider adding what's returned to the Javadoc? Maybe `Returns true if the item was found and removed from the in-memory store` ########## File path: core/src/test/java/org/apache/iceberg/io/inmemory/InMemoryFileIOTest.java ########## @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.io.inmemory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.stream.IntStream; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.io.PositionOutputStream; +import org.apache.iceberg.io.SeekableInputStream; +import org.junit.Before; +import org.junit.Test; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +public class InMemoryFileIOTest { + + private InMemoryFileIO fileIO; + private InMemoryFileStore store; + + @Before + public void before() { + fileIO = new InMemoryFileIO(); + store = fileIO.getStore(); + } + + @Test + public void testGetStore() { + assertNotNull(store); + } + + @Test + public void testDeleteFile() { + assertFalse(store.exists("file1")); + + // Create the file + store.put("file1", ByteBuffer.wrap(new byte[0])); + assertTrue(store.exists("file1")); + + // Delete the file + fileIO.deleteFile("file1"); + // Verify that the file has been deleted + assertFalse(store.exists("file1")); + } + + @Test + public void testNewInputFile() throws IOException { + String fileName = "file1"; + + store.put(fileName, ByteBuffer.wrap("data1".getBytes(UTF_8))); + + InputFile inputFile = fileIO.newInputFile(fileName); + assertNotNull(inputFile); + + assertTrue(inputFile.exists()); + assertEquals(fileName, inputFile.location()); + assertEquals(5, inputFile.getLength()); + + SeekableInputStream inputStream = inputFile.newStream(); + assertNotNull(inputStream); + + // Test read() + assertEquals(0, inputStream.getPos()); + assertEquals('d', inputStream.read()); + + // Test read(byte[], index, len) + byte[] dataRead = new byte[3]; + assertEquals(2, inputStream.read(dataRead, 0, 2)); + assertEquals("at", new String(dataRead, 0, 2, UTF_8)); + + inputStream.close(); + } + + @Test + public void testSeek() throws IOException { + String fileName = "file1"; + + // Number of rows + int numRowsInChunk = 10; + + // Emulate parquet file structure + ByteBuffer buffer = ByteBuffer.allocate(1024); + // Add magic number + buffer.put("par1".getBytes(UTF_8)); + // Add column chunk 1 for int32 + IntStream.range(0, numRowsInChunk).forEach(buffer::putInt); + // Add column chunk 2 for float64 + IntStream.range(0, numRowsInChunk).forEach(buffer::putDouble); + // Add metadata footer (index to the start of chunks) + buffer.putInt(4); + buffer.putInt(4 + numRowsInChunk * 4); + // Add footer size + buffer.putInt(8); + // Add magic number + buffer.put("par1".getBytes(UTF_8)); + + // Put the data in the store + buffer.flip(); + store.put(fileName, buffer); + + // magic number, chunk1, chunk2, footer, footer size, magic number + int expectedFileSize = 4 + numRowsInChunk * 4 + numRowsInChunk * 8 + 8 + 4 + 4; Review comment: Nit: for the `4 * numRowsInChunk` and `numRowsInChunk * 8` (and generally for all of the commented magic numbners), could you make named constants for them? Something like `int expectedFileSize = MAGIC_NUMBER_SIZE + numRowsInChunk * BYTES_IN_INT32 + numRowsInChunk * BYTES_IN_FLOAT64 + FOOTER + SIZE_OF_FOOTER + ....` (or however it is exactly that these numbers work out)? It's a bit hard to grok on first appearance and I think that would help quite a lot. ########## File path: core/src/main/java/org/apache/iceberg/io/inmemory/InMemoryFileStore.java ########## @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.io.inmemory; + +import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +/** + * An in-memory collection based storage for file contents + * keyed by a string location. + */ +class InMemoryFileStore { + + private final ConcurrentMap<String, byte[]> store; + + InMemoryFileStore() { + this.store = new ConcurrentHashMap<>(); + } + + /** + * Put the file contents at the given location, overwrite if it already exists. + */ + public void put(String location, ByteBuffer data) { + // Copy the contents and store it. + byte[] bytes = new byte[data.remaining()]; + data.get(bytes); Review comment: Nit: You might consider using the `ByteBuffers` utility class instead. It's `org.apache.iceberg.util.ByteBuffers`. There's a `toByteArray` method there that handles some corner cases and thing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
