tolbertam commented on code in PR #2013: URL: https://github.com/apache/cassandra-java-driver/pull/2013#discussion_r1953319078
########## core/src/main/java/com/datastax/oss/driver/internal/core/addresstranslation/SubnetAddressTranslator.java: ########## @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.oss.driver.internal.core.addresstranslation; + +import com.datastax.oss.driver.api.core.addresstranslation.AddressTranslator; +import com.datastax.oss.driver.api.core.config.DriverOption; +import com.datastax.oss.driver.api.core.context.DriverContext; +import com.google.common.base.Splitter; +import edu.umd.cs.findbugs.annotations.NonNull; +import inet.ipaddr.IPAddress; +import inet.ipaddr.IPAddressString; +import java.net.InetSocketAddress; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This translator returns the proxy address of the private subnet containing the Cassandra node IP, + * or default address if no matching subnets, or passes through the original node address if no + * default configured. + * + * <p>The translator can be used for scenarios when all nodes are behind some kind of proxy, and + * that proxy is different for nodes located in different subnets (eg. when Cassandra is deployed in + * multiple datacenters/regions). One can use this, for example, for Cassandra on Kubernetes with + * different Cassandra datacenters deployed to different Kubernetes clusters. + */ +public class SubnetAddressTranslator implements AddressTranslator { + + private static final Logger LOG = LoggerFactory.getLogger(SubnetAddressTranslator.class); + + /** + * A map of Cassandra node subnets (CIDR notations) to target addresses, for example (note quoted + * keys): + * + * <pre> + * advanced.address-translator.subnet-addresses { + * "100.64.0.0/15" = "cassandra.datacenter1.com:9042" + * "100.66.0.0/15" = "cassandra.datacenter2.com" + * # IPv6 example: + * # "::ffff:6440:0/111" = "cassandra.datacenter1.com:9042" Review Comment: :heart: appreciate the IPv6 support! ########## core/src/main/java/com/datastax/oss/driver/internal/core/addresstranslation/SubnetAddressTranslator.java: ########## @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.oss.driver.internal.core.addresstranslation; + +import com.datastax.oss.driver.api.core.addresstranslation.AddressTranslator; +import com.datastax.oss.driver.api.core.config.DriverOption; +import com.datastax.oss.driver.api.core.context.DriverContext; +import com.google.common.base.Splitter; +import edu.umd.cs.findbugs.annotations.NonNull; +import inet.ipaddr.IPAddress; +import inet.ipaddr.IPAddressString; +import java.net.InetSocketAddress; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This translator returns the proxy address of the private subnet containing the Cassandra node IP, + * or default address if no matching subnets, or passes through the original node address if no + * default configured. + * + * <p>The translator can be used for scenarios when all nodes are behind some kind of proxy, and + * that proxy is different for nodes located in different subnets (eg. when Cassandra is deployed in + * multiple datacenters/regions). One can use this, for example, for Cassandra on Kubernetes with + * different Cassandra datacenters deployed to different Kubernetes clusters. + */ +public class SubnetAddressTranslator implements AddressTranslator { + + private static final Logger LOG = LoggerFactory.getLogger(SubnetAddressTranslator.class); + + /** + * A map of Cassandra node subnets (CIDR notations) to target addresses, for example (note quoted + * keys): + * + * <pre> + * advanced.address-translator.subnet-addresses { + * "100.64.0.0/15" = "cassandra.datacenter1.com:9042" + * "100.66.0.0/15" = "cassandra.datacenter2.com" + * # IPv6 example: + * # "::ffff:6440:0/111" = "cassandra.datacenter1.com:9042" + * # "::ffff:6442:0/111" = "cassandra.datacenter2.com" + * } + * </pre> + * + * If configured without port, the default 9042 will be used. Also supports IPv6 subnets. Note: + * subnets must be represented as prefix blocks, see {@link inet.ipaddr.Address#isPrefixBlock()}. + */ + public static final String ADDRESS_TRANSLATOR_SUBNET_ADDRESSES = + "advanced.address-translator.subnet-addresses"; + + /** + * A default address to fallback to if Cassandra node IP isn't contained in any of the configured + * subnets. If configured without port, the default 9042 will be used. Also supports IPv6 Review Comment: I was just looking over the existing configuration in reference.conf and it looks like we are pretty consistent about requiring the native port when it comes to specifying endpoints (e.g. `basic.contact-points` mandates this through `ContactPoints.extract`). I think it may just be better to always require the port to be consistent with `basic.contact-points`, and it would also simplify the code a little bit. ########## core/src/main/resources/reference.conf: ########## @@ -1020,17 +1020,30 @@ datastax-java-driver { # the package com.datastax.oss.driver.internal.core.addresstranslation. # # The driver provides the following implementations out of the box: - # - PassThroughAddressTranslator: returns all addresses unchanged + # - PassThroughAddressTranslator: returns all addresses unchanged. # - FixedHostNameAddressTranslator: translates all addresses to a specific hostname. + # - SubnetAddressTranslator: translates addresses to hostname based on the subnet matches. # - Ec2MultiRegionAddressTranslator: suitable for an Amazon multi-region EC2 deployment where # clients are also deployed in EC2. It optimizes network costs by favoring private IPs over # public ones whenever possible. # # You can also specify a custom class that implements AddressTranslator and has a public # constructor with a DriverContext argument. class = PassThroughAddressTranslator + # # This property has to be set only in case you use FixedHostNameAddressTranslator. # advertised-hostname = mycustomhostname + # + # Theses properties have to be set only in case you use SubnetAddressTranslator. Review Comment: ```suggestion # These properties are only applicable in case you use SubnetAddressTranslator. ``` ########## core/src/main/resources/reference.conf: ########## @@ -1020,17 +1020,30 @@ datastax-java-driver { # the package com.datastax.oss.driver.internal.core.addresstranslation. # # The driver provides the following implementations out of the box: - # - PassThroughAddressTranslator: returns all addresses unchanged + # - PassThroughAddressTranslator: returns all addresses unchanged. # - FixedHostNameAddressTranslator: translates all addresses to a specific hostname. + # - SubnetAddressTranslator: translates addresses to hostname based on the subnet matches. # - Ec2MultiRegionAddressTranslator: suitable for an Amazon multi-region EC2 deployment where # clients are also deployed in EC2. It optimizes network costs by favoring private IPs over # public ones whenever possible. # # You can also specify a custom class that implements AddressTranslator and has a public # constructor with a DriverContext argument. class = PassThroughAddressTranslator + # # This property has to be set only in case you use FixedHostNameAddressTranslator. # advertised-hostname = mycustomhostname + # + # Theses properties have to be set only in case you use SubnetAddressTranslator. + # subnet-addresses { + # "100.64.0.0/15" = "cassandra.datacenter1.com:9042" + # "100.66.0.0/15" = "cassandra.datacenter2.com" # port defaults to 9042 if not specified + # # IPv6 example: + # # "::ffff:6440:0/111" = "cassandra.datacenter1.com:9042" + # # "::ffff:6442:0/111" = "cassandra.datacenter2.com" # port defaults to 9042 if not specified + # } + # Optional. When configured, addresses not matching the configured subnets are translated to it. Port defaults to 9042 if not specified. Review Comment: ```suggestion # Optional. When configured, addresses not matching the configured subnets are translated to this address. Port defaults to 9042 if not specified. ``` ########## core/src/main/java/com/datastax/oss/driver/internal/core/addresstranslation/SubnetAddressTranslator.java: ########## @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.oss.driver.internal.core.addresstranslation; + +import com.datastax.oss.driver.api.core.addresstranslation.AddressTranslator; +import com.datastax.oss.driver.api.core.config.DriverOption; +import com.datastax.oss.driver.api.core.context.DriverContext; +import com.google.common.base.Splitter; +import edu.umd.cs.findbugs.annotations.NonNull; +import inet.ipaddr.IPAddress; +import inet.ipaddr.IPAddressString; +import java.net.InetSocketAddress; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This translator returns the proxy address of the private subnet containing the Cassandra node IP, + * or default address if no matching subnets, or passes through the original node address if no + * default configured. + * + * <p>The translator can be used for scenarios when all nodes are behind some kind of proxy, and + * that proxy is different for nodes located in different subnets (eg. when Cassandra is deployed in + * multiple datacenters/regions). One can use this, for example, for Cassandra on Kubernetes with + * different Cassandra datacenters deployed to different Kubernetes clusters. + */ +public class SubnetAddressTranslator implements AddressTranslator { + + private static final Logger LOG = LoggerFactory.getLogger(SubnetAddressTranslator.class); + + /** + * A map of Cassandra node subnets (CIDR notations) to target addresses, for example (note quoted + * keys): + * + * <pre> + * advanced.address-translator.subnet-addresses { + * "100.64.0.0/15" = "cassandra.datacenter1.com:9042" + * "100.66.0.0/15" = "cassandra.datacenter2.com" + * # IPv6 example: + * # "::ffff:6440:0/111" = "cassandra.datacenter1.com:9042" + * # "::ffff:6442:0/111" = "cassandra.datacenter2.com" + * } + * </pre> + * + * If configured without port, the default 9042 will be used. Also supports IPv6 subnets. Note: + * subnets must be represented as prefix blocks, see {@link inet.ipaddr.Address#isPrefixBlock()}. + */ + public static final String ADDRESS_TRANSLATOR_SUBNET_ADDRESSES = + "advanced.address-translator.subnet-addresses"; + + /** + * A default address to fallback to if Cassandra node IP isn't contained in any of the configured + * subnets. If configured without port, the default 9042 will be used. Also supports IPv6 + * addresses. + */ + public static final String ADDRESS_TRANSLATOR_DEFAULT_ADDRESS = + "advanced.address-translator.default-address"; + + public static DriverOption ADDRESS_TRANSLATOR_SUBNET_ADDRESSES_OPTION = + new DriverOption() { + @NonNull + @Override + public String getPath() { + return ADDRESS_TRANSLATOR_SUBNET_ADDRESSES; + } + }; + + public static DriverOption ADDRESS_TRANSLATOR_DEFAULT_ADDRESS_OPTION = + new DriverOption() { + @NonNull + @Override + public String getPath() { + return ADDRESS_TRANSLATOR_DEFAULT_ADDRESS; + } + }; + + private static final String DELIMITER = ":"; + private static final int DEFAULT_PORT = 9042; + + private final List<SubnetAddress> subnetAddresses; + private final Optional<InetSocketAddress> defaultAddress; + private final String logPrefix; + + public SubnetAddressTranslator(@NonNull DriverContext context) { + logPrefix = context.getSessionName(); + this.subnetAddresses = + context.getConfig().getDefaultProfile() + .getStringMap(ADDRESS_TRANSLATOR_SUBNET_ADDRESSES_OPTION).entrySet().stream() + .map( + e -> { + // Quoted and/or containing forward slashes map keys in reference.conf are read to + // strings with additional quotes, eg. 100.64.0.0/15 -> '100.64.0."0/15"' or + // "100.64.0.0/15" -> '"100.64.0.0/15"' Review Comment: This is a bit of a weird idiosyncrasy of the parser, but like you said, it's invisible to the user, so I think it's ok. ########## core/src/main/java/com/datastax/oss/driver/internal/core/addresstranslation/SubnetAddressTranslator.java: ########## @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.oss.driver.internal.core.addresstranslation; + +import com.datastax.oss.driver.api.core.addresstranslation.AddressTranslator; +import com.datastax.oss.driver.api.core.config.DriverOption; +import com.datastax.oss.driver.api.core.context.DriverContext; +import com.google.common.base.Splitter; +import edu.umd.cs.findbugs.annotations.NonNull; +import inet.ipaddr.IPAddress; +import inet.ipaddr.IPAddressString; +import java.net.InetSocketAddress; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This translator returns the proxy address of the private subnet containing the Cassandra node IP, + * or default address if no matching subnets, or passes through the original node address if no + * default configured. + * + * <p>The translator can be used for scenarios when all nodes are behind some kind of proxy, and + * that proxy is different for nodes located in different subnets (eg. when Cassandra is deployed in + * multiple datacenters/regions). One can use this, for example, for Cassandra on Kubernetes with + * different Cassandra datacenters deployed to different Kubernetes clusters. + */ +public class SubnetAddressTranslator implements AddressTranslator { + + private static final Logger LOG = LoggerFactory.getLogger(SubnetAddressTranslator.class); + + /** + * A map of Cassandra node subnets (CIDR notations) to target addresses, for example (note quoted + * keys): + * + * <pre> + * advanced.address-translator.subnet-addresses { + * "100.64.0.0/15" = "cassandra.datacenter1.com:9042" + * "100.66.0.0/15" = "cassandra.datacenter2.com" + * # IPv6 example: + * # "::ffff:6440:0/111" = "cassandra.datacenter1.com:9042" + * # "::ffff:6442:0/111" = "cassandra.datacenter2.com" + * } + * </pre> + * + * If configured without port, the default 9042 will be used. Also supports IPv6 subnets. Note: + * subnets must be represented as prefix blocks, see {@link inet.ipaddr.Address#isPrefixBlock()}. + */ + public static final String ADDRESS_TRANSLATOR_SUBNET_ADDRESSES = + "advanced.address-translator.subnet-addresses"; + + /** + * A default address to fallback to if Cassandra node IP isn't contained in any of the configured + * subnets. If configured without port, the default 9042 will be used. Also supports IPv6 + * addresses. + */ + public static final String ADDRESS_TRANSLATOR_DEFAULT_ADDRESS = + "advanced.address-translator.default-address"; + + public static DriverOption ADDRESS_TRANSLATOR_SUBNET_ADDRESSES_OPTION = + new DriverOption() { + @NonNull + @Override + public String getPath() { + return ADDRESS_TRANSLATOR_SUBNET_ADDRESSES; + } + }; + + public static DriverOption ADDRESS_TRANSLATOR_DEFAULT_ADDRESS_OPTION = + new DriverOption() { + @NonNull + @Override + public String getPath() { + return ADDRESS_TRANSLATOR_DEFAULT_ADDRESS; + } + }; + + private static final String DELIMITER = ":"; + private static final int DEFAULT_PORT = 9042; + + private final List<SubnetAddress> subnetAddresses; + private final Optional<InetSocketAddress> defaultAddress; + private final String logPrefix; + + public SubnetAddressTranslator(@NonNull DriverContext context) { + logPrefix = context.getSessionName(); + this.subnetAddresses = + context.getConfig().getDefaultProfile() + .getStringMap(ADDRESS_TRANSLATOR_SUBNET_ADDRESSES_OPTION).entrySet().stream() + .map( + e -> { + // Quoted and/or containing forward slashes map keys in reference.conf are read to + // strings with additional quotes, eg. 100.64.0.0/15 -> '100.64.0."0/15"' or + // "100.64.0.0/15" -> '"100.64.0.0/15"' + String subnet = e.getKey().replaceAll("\"", ""); + String address = e.getValue(); + return new SubnetAddress(subnet, address); + }) + .collect(Collectors.toList()); + this.defaultAddress = + Optional.ofNullable( + context + .getConfig() + .getDefaultProfile() + .getString(ADDRESS_TRANSLATOR_DEFAULT_ADDRESS_OPTION, null)) + .map(SubnetAddressTranslator::parseAddress); + SubnetAddressTranslator.validateSubnetsAreNotOverlapping(this.subnetAddresses); + } + + @NonNull + @Override + public InetSocketAddress translate(@NonNull InetSocketAddress address) { + InetSocketAddress translatedAddress = null; + for (SubnetAddress subnetAddress : subnetAddresses) { + if (subnetAddress.contains(address)) { + translatedAddress = subnetAddress.address; + } + } + if (translatedAddress == null && defaultAddress.isPresent()) { + translatedAddress = defaultAddress.get(); + } + if (translatedAddress == null) { + translatedAddress = address; + } + LOG.debug("[{}] Resolved {} to {}", logPrefix, address, translatedAddress); + return translatedAddress; + } + + @Override + public void close() {} + + private static InetSocketAddress parseAddress(String address) { + List<String> addressTuple = Splitter.onPattern(DELIMITER).splitToList(address); + if (addressTuple.size() == 2) { + return new InetSocketAddress(addressTuple.get(0), Integer.parseInt(addressTuple.get(1))); Review Comment: One thing I just realized is that since this is done on the construction of the address translator, it means that we will resolve an IP address from the host name at instantiation; Alternatively `FixedHostNameAddressTranslator` resolves in `InetSocketAddress.translate(address)` which means it will be resolved on control connection initialization and refreshing the node list. Both can be problematic if we ever expect DNS changes to point to a new IP. One might have to bounce their app to pick up the new IP(s) from the DNS change if suddenly the existing IP you were using no longer points to your service. In Kubernetes, depending on your service/loadbalancer implementation, I suppose its possible the underlying IP may change possibly, right? I think generally it will be stable/static, but may vary depending on implementation. The driver has a configuration `advanced.resolve-contact-points` that allows one to change the behavior such that contact points aren't initially resolved, rather they get resolved every time we attempt to create a new connection which works resilient to IP changes. I think it could be useful to utilize this configuration in such a way: 1. defer parsing into `InetSocketAddress` in `translate` itself, much like `FixedHostNameAddressTranslator` does, store the `String` as is in `SubnetAddress`. 2. Use `ContactPoints.extract(String, boolean)` in `translate` . For `resolve`, pull that value from `advanced.resolve-contact-points`. (we could consider doing the same for `FixedHostNameAddressTranslator`, but don't need to solve on the PR). This would allow the DNS resolution to always happen whenever a connection is created. Additionally, I believe the `DefaultEndPoint` implementations will continue to be considered equal if the underlying IPs change, which would be nice I suppose. Thoughts? ########## manual/core/address_resolution/README.md: ########## @@ -118,6 +118,52 @@ datastax-java-driver.advanced.address-translator.class = com.mycompany.MyAddress Note: the contact points provided while creating the `CqlSession` are not translated, only addresses retrieved from or sent by Cassandra nodes are. +### Fixed proxy hostname Review Comment: Fantastic, thanks for writing up docs on this! ########## manual/core/address_resolution/README.md: ########## @@ -118,6 +118,52 @@ datastax-java-driver.advanced.address-translator.class = com.mycompany.MyAddress Note: the contact points provided while creating the `CqlSession` are not translated, only addresses retrieved from or sent by Cassandra nodes are. +### Fixed proxy hostname + +If your client applications access Cassandra through some kind of proxy (eg. with AWS PrivateLink when all Cassandra +nodes are exposed via one hostname pointing to AWS Endpoint), you can configure driver with +`FixedHostNameAddressTranslator` to always translate all node addresses to that same proxy hostname, no matter what IP +address a node has but still using its native transport port. + +To use it, specify the following in the [configuration](../configuration): + +``` +datastax-java-driver.advanced.address-translator.class = FixedHostNameAddressTranslator +advertised-hostname = proxyhostname +``` + +### Fixed proxy hostname per subnet + +When running Cassandra in a private network and accessing it from outside of that private network via some kind of +proxy, we have an option to use `FixedHostNameAddressTranslator`. But for multi-datacenter Cassandra deployments, we +want to have more control over routing queries to a specific datacenter (eg. for optimizing latencies), which requires +setting up a separate proxy per datacenter. + +Normally, each Cassandra datacenter nodes are deployed to a separate subnet to support internode communications in the +cluster and avoid IP addresses collisions. So when Cassandra broadcasts its nodes IP addresses, we can determine which +datacenter that node belongs to by checking its IP address against the given datacenter subnet. + +For such scenarios you can use `SubnetAddressTranslator` to translate node IPs to the datacenter proxy address +associated with it. + +To use it, specify the following in the [configuration](../configuration): +``` +datastax-java-driver.advanced.address-translator { + class = SubnetAddressTranslator + subnet-addresses { + "100.64.0.0/15" = "cassandra.datacenter1.com:9042" + "100.66.0.0/15" = "cassandra.datacenter2.com" # port defaults to 9042 if not specified + # IPv6 example: + # "::ffff:6440:0/111" = "cassandra.datacenter1.com:9042" + # "::ffff:6442:0/111" = "cassandra.datacenter2.com" # port defaults to 9042 if not specified + } + # Optional. When configured, addresses not matching the configured subnets are translated to it. Port defaults to 9042 if not specified. + default-address = "cassandra.datacenter1.com:9042" +} +``` + +Such setup is common for running Cassandra on Kubernetes with [k8ssandra](https://docs.k8ssandra.io/). Review Comment: It would be good to also note that the optional IPAddress dependency must be explicitly declared in your project to use this translator. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For additional commands, e-mail: pr-h...@cassandra.apache.org