wgtmac commented on code in PR #296:
URL: https://github.com/apache/iceberg-cpp/pull/296#discussion_r2562647613


##########
src/iceberg/catalog/rest/rest_util.cc:
##########
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/catalog/rest/rest_util.h"
+
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include <cpr/util.h>
+
+namespace iceberg::rest {
+
+std::string_view TrimTrailingSlash(std::string_view str) {
+  while (!str.empty() && str.back() == '/') {
+    str.remove_suffix(1);
+  }
+  return str;
+}
+
+std::string EncodeString(std::string_view toEncode) {

Review Comment:
   ```suggestion
   std::string EncodeString(std::string_view str) {
   ```
   
   Or `to_encode`



##########
src/iceberg/test/rest_util_test.cc:
##########
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/catalog/rest/rest_util.h"
+
+#include <gtest/gtest.h>
+
+#include "iceberg/table_identifier.h"
+
+namespace iceberg::rest {
+
+TEST(RestUtilTest, TrimTrailingSlash) {
+  EXPECT_EQ(TrimTrailingSlash("https://foo";), "https://foo";);
+  EXPECT_EQ(TrimTrailingSlash("https://foo/";), "https://foo";);
+  EXPECT_EQ(TrimTrailingSlash("https://foo////";), "https://foo";);
+}
+
+TEST(RestUtilTest, RoundTripUrlEncodeDecodeNamespace) {
+  // {"dogs"}
+  EXPECT_EQ(EncodeNamespaceForUrl(Namespace{.levels = {"dogs"}}), "dogs");
+  EXPECT_EQ(DecodeNamespaceFromUrl("dogs").levels, 
std::vector<std::string>{"dogs"});
+
+  // {"dogs.named.hank"}
+  EXPECT_EQ(EncodeNamespaceForUrl(Namespace{.levels = {"dogs.named.hank"}}),
+            "dogs.named.hank");
+  EXPECT_EQ(DecodeNamespaceFromUrl("dogs.named.hank").levels,
+            std::vector<std::string>{"dogs.named.hank"});
+
+  // {"dogs/named/hank"}
+  EXPECT_EQ(EncodeNamespaceForUrl(Namespace{.levels = {"dogs/named/hank"}}),
+            "dogs%2Fnamed%2Fhank");
+  EXPECT_EQ(DecodeNamespaceFromUrl("dogs%2Fnamed%2Fhank").levels,
+            std::vector<std::string>{"dogs/named/hank"});
+
+  // {"dogs", "named", "hank"}
+  EXPECT_EQ(EncodeNamespaceForUrl(Namespace{.levels = {"dogs", "named", 
"hank"}}),
+            "dogs%1Fnamed%1Fhank");
+  EXPECT_EQ(DecodeNamespaceFromUrl("dogs%1Fnamed%1Fhank").levels,
+            (std::vector<std::string>{"dogs", "named", "hank"}));
+
+  // {"dogs.and.cats", "named", "hank.or.james-westfall"}
+  EXPECT_EQ(EncodeNamespaceForUrl(Namespace{
+                .levels = {"dogs.and.cats", "named", 
"hank.or.james-westfall"}}),
+            "dogs.and.cats%1Fnamed%1Fhank.or.james-westfall");
+  EXPECT_EQ(
+      
DecodeNamespaceFromUrl("dogs.and.cats%1Fnamed%1Fhank.or.james-westfall").levels,
+      (std::vector<std::string>{"dogs.and.cats", "named", 
"hank.or.james-westfall"}));
+
+  // empty namespace
+  EXPECT_EQ(EncodeNamespaceForUrl(Namespace{.levels = {}}), "");
+  EXPECT_EQ(DecodeNamespaceFromUrl("").levels, std::vector<std::string>{});
+}
+
+TEST(RestUtilTest, EncodeString) {
+  // RFC 3986 unreserved characters should not be encoded
+  EXPECT_EQ(EncodeString("abc123XYZ"), "abc123XYZ");
+  EXPECT_EQ(EncodeString("test-file_name.txt~backup"), 
"test-file_name.txt~backup");
+
+  // Spaces and special characters should be encoded
+  EXPECT_EQ(EncodeString("hello world"), "hello%20world");
+  EXPECT_EQ(EncodeString("[email protected]"), "test%40example.com");
+  EXPECT_EQ(EncodeString("path/to/file"), "path%2Fto%2Ffile");
+  EXPECT_EQ(EncodeString("key=value&foo=bar"), "key%3Dvalue%26foo%3Dbar");
+  EXPECT_EQ(EncodeString("100%"), "100%25");
+  EXPECT_EQ(EncodeString("hello\x1Fworld"), "hello%1Fworld");
+  EXPECT_EQ(EncodeString(""), "");
+}
+
+TEST(RestUtilTest, DecodeString) {
+  // Decode percent-encoded strings
+  EXPECT_EQ(DecodeString("hello%20world"), "hello world");
+  EXPECT_EQ(DecodeString("test%40example.com"), "[email protected]");
+  EXPECT_EQ(DecodeString("path%2Fto%2Ffile"), "path/to/file");
+  EXPECT_EQ(DecodeString("key%3Dvalue%26foo%3Dbar"), "key=value&foo=bar");
+  EXPECT_EQ(DecodeString("100%25"), "100%");
+
+  // ASCII Unit Separator (0x1F)
+  EXPECT_EQ(DecodeString("hello%1Fworld"), "hello\x1Fworld");
+
+  // Unreserved characters remain unchanged
+  EXPECT_EQ(DecodeString("test-file_name.txt~backup"), 
"test-file_name.txt~backup");
+  EXPECT_EQ(DecodeString(""), "");
+}
+
+TEST(RestUtilTest, EncodeDecodeStringRoundTrip) {
+  std::vector<std::string> test_cases = {"hello world",
+                                         "[email protected]",
+                                         "path/to/file",
+                                         "key=value&foo=bar",
+                                         "100%",
+                                         "hello\x1Fworld",
+                                         "special!@#$%^&*()chars",
+                                         "mixed-123_test.file~ok",
+                                         ""};
+
+  for (const auto& test : test_cases) {
+    std::string encoded = EncodeString(test);
+    std::string decoded = DecodeString(encoded);
+    EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
+  }
+}
+
+TEST(RestUtilTest, MergeConfigs) {
+  std::unordered_map<std::string, std::string> server_defaults = {
+      {"default1", "value1"}, {"default2", "value2"}, {"common", 
"default_value"}};
+
+  std::unordered_map<std::string, std::string> client_configs = {
+      {"client1", "value1"}, {"common", "client_value"}};

Review Comment:
   We are missing a case where client config overrides the server defaults. 
Note that `"client_value"` has been overriden by `"override_value"`.



##########
src/iceberg/catalog/rest/rest_util.h:
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include "iceberg/catalog/rest/iceberg_rest_export.h"
+#include "iceberg/table_identifier.h"
+
+namespace iceberg::rest {
+
+/// \brief Trim trailing slashes from a URI string.
+/// \details Removes all trailing '/' characters from the URI.
+/// \param str A string to trim.
+/// \return The trimmed URI string with all trailing slashes removed.

Review Comment:
   ```suggestion
   /// \brief Trim all trailing slashes from a string.
   /// \param str A string to trim.
   /// \return The trimmed string with all trailing slashes removed.
   ```



##########
src/iceberg/catalog/rest/rest_util.cc:
##########
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/catalog/rest/rest_util.h"
+
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include <cpr/util.h>
+
+namespace iceberg::rest {
+
+std::string_view TrimTrailingSlash(std::string_view str) {
+  while (!str.empty() && str.back() == '/') {
+    str.remove_suffix(1);
+  }
+  return str;
+}
+
+std::string EncodeString(std::string_view toEncode) {
+  // Use CPR's urlEncode which internally calls libcurl's curl_easy_escape()
+  cpr::util::SecureString encoded = cpr::util::urlEncode(toEncode);
+  return {encoded.data(), encoded.size()};
+}
+
+std::string DecodeString(std::string_view encoded) {
+  // Use CPR's urlDecode which internally calls libcurl's curl_easy_unescape()
+  cpr::util::SecureString decoded = cpr::util::urlDecode(encoded);
+  return {decoded.data(), decoded.size()};
+}
+
+std::string EncodeNamespaceForUrl(const Namespace& ns) {
+  if (ns.levels.empty()) {
+    return "";
+  }
+  std::string result = EncodeString(ns.levels.front());
+
+  // Join encoded levels with "%1F"
+  for (size_t i = 1; i < ns.levels.size(); ++i) {
+    result.append("%1F");

Review Comment:
   Define `constexpr std::string_view kNamespaceEscapedSeparator = "%1F"` to 
avoid using magic string.



##########
src/iceberg/catalog/rest/rest_catalog.h:
##########
@@ -1,41 +1,119 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 
 #pragma once
 
+#include <memory>
 #include <string>
 
-#include <cpr/cpr.h>
-
+#include "iceberg/catalog.h"
+#include "iceberg/catalog/rest/catalog_properties.h"
+#include "iceberg/catalog/rest/http_client.h"

Review Comment:
   ```suggestion
   ```
   
   These can be removed to use forward declaration. Perhaps we can add a 
`iceberg/catalog/rest/type_fwd.h` to define forward declaration of all rest 
types in a single place.



##########
src/iceberg/catalog/rest/rest_util.cc:
##########
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/catalog/rest/rest_util.h"
+
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include <cpr/util.h>
+
+namespace iceberg::rest {
+
+std::string_view TrimTrailingSlash(std::string_view str) {
+  while (!str.empty() && str.back() == '/') {
+    str.remove_suffix(1);
+  }
+  return str;
+}
+
+std::string EncodeString(std::string_view toEncode) {
+  // Use CPR's urlEncode which internally calls libcurl's curl_easy_escape()
+  cpr::util::SecureString encoded = cpr::util::urlEncode(toEncode);

Review Comment:
   Per the comment from libcurl below, `NULL` will be returned and thus 
`SecureString` would be an empty string in this case. We need to wrap the 
return type with `Result<std::string>` to capture this.
   
   ```
   /*
    * NAME curl_easy_escape()
    *
    * DESCRIPTION
    *
    * Escapes URL strings (converts all letters consider illegal in URLs to 
their
    * %XX versions). This function returns a new allocated string or NULL if an
    * error occurred.
    */
   CURL_EXTERN char *curl_easy_escape(CURL *handle,
                                      const char *string,
                                      int length);
   
   /*
    * NAME curl_easy_unescape()
    *
    * DESCRIPTION
    *
    * Unescapes URL encoding in strings (converts all %XX codes to their 8bit
    * versions). This function returns a new allocated string or NULL if an 
error
    * occurred.
    * Conversion Note: On non-ASCII platforms the ASCII %XX codes are
    * converted into the host encoding.
    */
   CURL_EXTERN char *curl_easy_unescape(CURL *handle,
                                        const char *string,
                                        int length,
                                        int *outlength);
   ```



##########
src/iceberg/catalog/rest/catalog_properties.h:
##########
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "iceberg/catalog/rest/iceberg_rest_export.h"
+#include "iceberg/result.h"
+#include "iceberg/util/config.h"
+
+/// \file iceberg/catalog/rest/catalog_properties.h
+/// \brief RestCatalogProperties implementation for Iceberg REST API.
+
+namespace iceberg::rest {
+
+/// \brief Configuration class for a REST Catalog.
+class ICEBERG_REST_EXPORT RestCatalogProperties
+    : public ConfigBase<RestCatalogProperties> {
+ public:
+  template <typename T>
+  using Entry = const ConfigBase<RestCatalogProperties>::Entry<T>;
+
+  /// \brief The URI of the REST catalog server.

Review Comment:
   ```suggestion
     /// \brief The (required) URI of the REST catalog server.
   ```



##########
src/iceberg/catalog/rest/constant.h:
##########
@@ -28,17 +28,16 @@
 
 namespace iceberg::rest {
 
-constexpr std::string_view kHeaderContentType = "Content-Type";
-constexpr std::string_view kHeaderAccept = "Accept";
-constexpr std::string_view kHeaderXClientVersion = "X-Client-Version";
-constexpr std::string_view kHeaderUserAgent = "User-Agent";
+inline const std::string kHeaderContentType = "Content-Type";

Review Comment:
   The main reason is that we still need to wrap it with `std::string` every 
time to create http headers.



##########
src/iceberg/catalog/rest/rest_util.h:
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include "iceberg/catalog/rest/iceberg_rest_export.h"
+#include "iceberg/table_identifier.h"
+
+namespace iceberg::rest {
+
+/// \brief Trim trailing slashes from a URI string.
+/// \details Removes all trailing '/' characters from the URI.
+/// \param str A string to trim.
+/// \return The trimmed URI string with all trailing slashes removed.
+ICEBERG_REST_EXPORT std::string_view TrimTrailingSlash(std::string_view str);
+
+/// \brief URL-encode a string (RFC 3986).
+/// \details This implementation uses libcurl (via CPR), which follows RFC 
3986 strictly:
+/// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
+/// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
+/// - All other characters are percent-encoded (%XX).
+/// \param toEncode The string to encode.
+/// \return The URL-encoded string.
+/// \note Iceberg's Java client uses `java.net.URLEncoder`, which follows the
+/// `application/x-www-form-urlencoded` standard (HTML Forms). We strictly 
adhere to RFC
+/// 3986 to ensure correct URI path semantics (encoding spaces as %20 rather 
than +) for
+/// maximum cross-platform interoperability.
+ICEBERG_REST_EXPORT std::string EncodeString(std::string_view toEncode);

Review Comment:
   ```suggestion
   ICEBERG_REST_EXPORT std::string EncodeString(std::string_view str);
   ```



##########
src/iceberg/catalog/rest/rest_catalog.h:
##########
@@ -1,41 +1,119 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 
 #pragma once
 
+#include <memory>
 #include <string>
 
-#include <cpr/cpr.h>
-
+#include "iceberg/catalog.h"
+#include "iceberg/catalog/rest/catalog_properties.h"
+#include "iceberg/catalog/rest/http_client.h"
 #include "iceberg/catalog/rest/iceberg_rest_export.h"
+#include "iceberg/catalog/rest/resource_paths.h"
+#include "iceberg/result.h"
+
+/// \file iceberg/catalog/rest/rest_catalog.h
+/// RestCatalog implementation for Iceberg REST API.
 
-namespace iceberg::catalog::rest {
+namespace iceberg::rest {
 
-class ICEBERG_REST_EXPORT RestCatalog {
+/// \brief Rest catalog implementation.
+class ICEBERG_REST_EXPORT RestCatalog : public Catalog {
  public:
-  explicit RestCatalog(const std::string& base_url);
-  ~RestCatalog() = default;
+  RestCatalog(const RestCatalog&) = delete;
+  RestCatalog& operator=(const RestCatalog&) = delete;
+  RestCatalog(RestCatalog&&) = delete;

Review Comment:
   I think this and below can be default?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to