bryancall commented on code in PR #12948:
URL: https://github.com/apache/trafficserver/pull/12948#discussion_r2927975785


##########
plugins/header_rewrite/conditions_geo_maxmind.cc:
##########
@@ -51,9 +76,23 @@ MMConditionGeo::initLibrary(const std::string &path)
   if (MMDB_SUCCESS != status) {
     Dbg(pi_dbg_ctl, "Cannot open %s - %s", path.c_str(), 
MMDB_strerror(status));
     delete gMaxMindDB;
+    gMaxMindDB = nullptr;
     return;
   }
-  Dbg(pi_dbg_ctl, "Loaded %s", path.c_str());
+
+  // Probe the database schema at load time so we know which field paths to
+  // use for country lookups.  Try a few well-known IPs until one hits.
+  for (auto *ip : probe_ips) {
+    int                  gai_error, mmdb_error;
+    MMDB_lookup_result_s result = MMDB_lookup_string(gMaxMindDB, ip, 
&gai_error, &mmdb_error);
+    if (gai_error == 0 && MMDB_SUCCESS == mmdb_error && result.found_entry) {
+      gMmdbSchema = detect_schema(&result.entry);
+      Dbg(pi_dbg_ctl, "Loaded %s (schema: %s)", path.c_str(), gMmdbSchema == 
MmdbSchema::FLAT ? "flat" : "nested");
+      return;
+    }
+  }
+
+  Dbg(pi_dbg_ctl, "Loaded %s (schema: defaulting to nested, no probe IPs 
matched)", path.c_str());

Review Comment:
   The probe IPs (8.8.8.8, 1.1.1.1, 128.0.0.1) cover every real-world GeoIP 
database we've seen — any database missing all three would be unusual. The 
default fallback is nested, which covers all major vendors (GeoLite2, GeoIP2, 
DBIP). Lazy detection or dual-path lookups would add per-request overhead for a 
scenario that doesn't arise in practice.



##########
plugins/header_rewrite/CMakeLists.txt:
##########
@@ -60,7 +60,34 @@ if(BUILD_TESTING)
   target_link_libraries(test_header_rewrite PRIVATE header_rewrite_parser 
ts::inkevent ts::tscore)
 
   if(maxminddb_FOUND)
+    target_compile_definitions(test_header_rewrite PRIVATE 
TS_USE_HRW_MAXMINDDB=1)
     target_link_libraries(test_header_rewrite PRIVATE maxminddb::maxminddb)
+
+    find_package(
+      Python3
+      COMPONENTS Interpreter
+      QUIET
+    )
+    if(Python3_FOUND)
+      set(_mmdb_test_dir "${CMAKE_CURRENT_BINARY_DIR}/test_mmdb")
+      add_custom_command(
+        OUTPUT "${_mmdb_test_dir}/test_flat_geo.mmdb" 
"${_mmdb_test_dir}/test_nested_geo.mmdb"
+        COMMAND ${CMAKE_COMMAND} -E make_directory "${_mmdb_test_dir}"
+        COMMAND ${Python3_EXECUTABLE} 
"${CMAKE_CURRENT_SOURCE_DIR}/generate_test_mmdb.py" "${_mmdb_test_dir}"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/generate_test_mmdb.py"
+        COMMENT "Generating test MMDB files for header_rewrite"
+      )
+      add_custom_target(
+        generate_test_mmdb DEPENDS "${_mmdb_test_dir}/test_flat_geo.mmdb" 
"${_mmdb_test_dir}/test_nested_geo.mmdb"
+      )
+      add_dependencies(test_header_rewrite generate_test_mmdb)
+      set_tests_properties(
+        test_header_rewrite
+        PROPERTIES
+          ENVIRONMENT
+          
"MMDB_TEST_FLAT=${_mmdb_test_dir}/test_flat_geo.mmdb;MMDB_TEST_NESTED=${_mmdb_test_dir}/test_nested_geo.mmdb"
+      )

Review Comment:
   Fixed in 340aa8e — CMake now checks for the mmdb_writer and netaddr Python 
modules at configure time via `execute_process()` before adding the custom 
command/target. If the modules are not available, it prints a status message 
and skips generation entirely.



##########
plugins/header_rewrite/generate_test_mmdb.py:
##########
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+#
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Generate test MMDB files for header_rewrite geo lookup unit tests.
+
+Two schemas exist in the wild:
+
+  Nested (GeoLite2/GeoIP2/DBIP):  country -> iso_code
+  Flat   (vendor-specific):       country_code (top-level)
+
+This script generates one MMDB file for each schema so the C++ test
+can verify that auto-detection works for both.
+
+Requires: pip install mmdb-writer
+"""
+
+import os
+import sys
+
+try:
+    from mmdb_writer import MMDBWriter, MmdbU32
+    import netaddr
+except ImportError:
+    print("SKIP: mmdb-writer or netaddr not installed (pip install 
mmdb-writer)", file=sys.stderr)
+    sys.exit(0)

Review Comment:
   Fixed in 340aa8e — changed to `sys.exit(1)` and added the CMake-side module 
check so the script is only invoked when dependencies are present. Also updated 
the message to mention both required packages.



##########
plugins/header_rewrite/header_rewrite_test.cc:
##########
@@ -538,12 +545,202 @@ test_tokenizer()
   return errors;
 }
 
+#if TS_USE_HRW_MAXMINDDB
+
+static bool
+file_exists(const char *path)
+{
+  struct stat st;
+  return stat(path, &st) == 0;
+}
+
+static int
+open_test_mmdb(MMDB_s &mmdb, const char *path)
+{
+  int status = MMDB_open(path, MMDB_MODE_MMAP, &mmdb);
+  if (MMDB_SUCCESS != status) {
+    std::cerr << "Cannot open " << path << ": " << MMDB_strerror(status) << 
std::endl;
+    return 1;
+  }
+  return 0;
+}
+
+static std::string
+lookup_country(MMDB_s &mmdb, const char *ip)
+{
+  int                  gai_error, mmdb_error;
+  MMDB_lookup_result_s result = MMDB_lookup_string(&mmdb, ip, &gai_error, 
&mmdb_error);
+
+  if (gai_error != 0 || MMDB_SUCCESS != mmdb_error || !result.found_entry) {
+    return "(lookup_failed)";
+  }
+
+  MMDB_entry_data_s entry_data;
+
+  // Try nested path first (GeoLite2 style)
+  int status = MMDB_get_value(&result.entry, &entry_data, "country", 
"iso_code", NULL);
+  if (MMDB_SUCCESS == status && entry_data.has_data && entry_data.type == 
MMDB_DATA_TYPE_UTF8_STRING) {
+    return std::string(entry_data.utf8_string, entry_data.data_size);
+  }
+
+  // Try flat path (vendor style)
+  status = MMDB_get_value(&result.entry, &entry_data, "country_code", NULL);
+  if (MMDB_SUCCESS == status && entry_data.has_data && entry_data.type == 
MMDB_DATA_TYPE_UTF8_STRING) {
+    return std::string(entry_data.utf8_string, entry_data.data_size);
+  }
+
+  return "(not_found)";
+}
+
+static uint32_t
+lookup_asn(MMDB_s &mmdb, const char *ip)
+{
+  int                  gai_error, mmdb_error;
+  MMDB_lookup_result_s result = MMDB_lookup_string(&mmdb, ip, &gai_error, 
&mmdb_error);
+
+  if (gai_error != 0 || MMDB_SUCCESS != mmdb_error || !result.found_entry) {
+    return 0;
+  }
+
+  MMDB_entry_data_s entry_data;
+  int               status = MMDB_get_value(&result.entry, &entry_data, 
"autonomous_system_number", NULL);
+  if (MMDB_SUCCESS == status && entry_data.has_data && entry_data.type == 
MMDB_DATA_TYPE_UINT32) {
+    return entry_data.uint32;
+  }
+  return 0;
+}
+
+// Test that we can read country codes from a flat-schema MMDB (vendor 
databases
+// where country_code is a top-level string field).
+int
+test_flat_schema()
+{
+  const char *path = getenv("MMDB_TEST_FLAT");
+  if (path == nullptr || !file_exists(path)) {
+    std::cout << "SKIP: flat-schema test mmdb not found (set MMDB_TEST_FLAT)" 
<< std::endl;
+    return 0;

Review Comment:
   Fixed in 340aa8e — added `#include <cstdlib>`.



##########
plugins/header_rewrite/header_rewrite_test.cc:
##########
@@ -24,9 +24,16 @@
 #include <cstdarg>
 #include <iostream>
 #include <ostream>
+#include <sys/stat.h>
 
 #include "parser.h"
 
+#if TS_USE_HRW_MAXMINDDB
+#include <maxminddb.h>
+#include <arpa/inet.h>
+#include <netdb.h>

Review Comment:
   Fixed in 340aa8e — removed both unused includes.



##########
plugins/header_rewrite/generate_test_mmdb.py:
##########
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+#
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Generate test MMDB files for header_rewrite geo lookup unit tests.
+
+Two schemas exist in the wild:
+
+  Nested (GeoLite2/GeoIP2/DBIP):  country -> iso_code
+  Flat   (vendor-specific):       country_code (top-level)
+
+This script generates one MMDB file for each schema so the C++ test
+can verify that auto-detection works for both.
+
+Requires: pip install mmdb-writer
+"""
+
+import os
+import sys
+
+try:
+    from mmdb_writer import MMDBWriter, MmdbU32
+    import netaddr
+except ImportError:
+    print("SKIP: mmdb-writer or netaddr not installed (pip install 
mmdb-writer)", file=sys.stderr)
+    sys.exit(0)

Review Comment:
   Fixed in 340aa8e — updated both the skip message and the docstring to 
mention netaddr alongside mmdb-writer.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to