From 4a6128fbf4ab7a190835a1c693dbd3bdb9e32ae4 Mon Sep 17 00:00:00 2001
From: Jelte Fennema <jelte.fennema@microsoft.com>
Date: Fri, 17 Mar 2023 09:14:02 +0100
Subject: [PATCH v13 4/5] Add DNS based libpq load balancing test

This adds a test for DNS based load balancing in libpq. This patch was a
point of discussion due the fact that it requires changing /etc/hosts
for this test to run. Thus it was removed from the main libpq load
balancing patch, and added to its own patch for further discussion.

This patch also adds tests for DNS based retries of connections of libpq,
even when load balancing is disabled. We did not have tests for that
behaviour either.
---
 .cirrus.yml                                   |  16 ++-
 doc/src/sgml/regress.sgml                     |  11 +-
 .../libpq/t/004_load_balance_dns.pl           | 122 ++++++++++++++++++
 3 files changed, 147 insertions(+), 2 deletions(-)
 create mode 100644 src/interfaces/libpq/t/004_load_balance_dns.pl

diff --git a/.cirrus.yml b/.cirrus.yml
index 505c50f3285..04786174ed4 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -25,7 +25,7 @@ env:
   MTEST_ARGS: --print-errorlogs --no-rebuild -C build
   PGCTLTIMEOUT: 120 # avoids spurious failures during parallel tests
   TEMP_CONFIG: ${CIRRUS_WORKING_DIR}/src/tools/ci/pg_ci_base.conf
-  PG_TEST_EXTRA: kerberos ldap ssl
+  PG_TEST_EXTRA: kerberos ldap ssl load_balance
 
 
 # What files to preserve in case tests fail
@@ -313,6 +313,14 @@ task:
     mkdir -m 770 /tmp/cores
     chown root:postgres /tmp/cores
     sysctl kernel.core_pattern='/tmp/cores/%e-%s-%p.core'
+
+  setup_hosts_file_script: |
+    cat >> /etc/hosts <<-EOF
+      127.0.0.1 pg-loadbalancetest
+      127.0.0.2 pg-loadbalancetest
+      127.0.0.3 pg-loadbalancetest
+    EOF
+
   setup_additional_packages_script: |
     #apt-get update
     #DEBIAN_FRONTEND=noninteractive apt-get -y install ...
@@ -564,6 +572,12 @@ task:
   setup_additional_packages_script: |
     REM choco install -y --no-progress ...
 
+  setup_hosts_file_script: |
+    echo 127.0.0.1 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
+    echo 127.0.0.2 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
+    echo 127.0.0.3 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
+    type c:\Windows\System32\Drivers\etc\hosts
+
   # Use /DEBUG:FASTLINK to avoid high memory usage during linking
   configure_script: |
     vcvarsall x64
diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml
index 719e0a76985..dd7e4a200e9 100644
--- a/doc/src/sgml/regress.sgml
+++ b/doc/src/sgml/regress.sgml
@@ -256,7 +256,7 @@ make check-world -j8 >/dev/null
    <varname>PG_TEST_EXTRA</varname> to a whitespace-separated list, for
    example:
 <programlisting>
-make check-world PG_TEST_EXTRA='kerberos ldap ssl'
+make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance'
 </programlisting>
    The following values are currently supported:
    <variablelist>
@@ -290,6 +290,15 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl'
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>load_balance</literal></term>
+     <listitem>
+      <para>
+       Runs the test <filename>src/interfaces/libpq/t/004_load_balance_dns.pl</filename>.  This opens TCP/IP listen sockets.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry>
      <term><literal>wal_consistency_checking</literal></term>
      <listitem>
diff --git a/src/interfaces/libpq/t/004_load_balance_dns.pl b/src/interfaces/libpq/t/004_load_balance_dns.pl
new file mode 100644
index 00000000000..a4f40510954
--- /dev/null
+++ b/src/interfaces/libpq/t/004_load_balance_dns.pl
@@ -0,0 +1,122 @@
+# Copyright (c) 2023, PostgreSQL Global Development Group
+use strict;
+use warnings;
+use Config;
+use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::Cluster;
+use Test::More;
+
+# This tests loadbalancing based on a DNS entry that contains multiple records
+# for different IPs. Since setting up a DNS server is more effort than we
+# consider reasonable to run this test, this situation is instead immitated by
+# using a hosts file where a single hostname maps to multiple different IP
+# addresses. This test requires the adminstrator to add the following lines to
+# the hosts file (if we detect that this hasn't happend we skip the test):
+#
+# 127.0.0.1 pg-loadbalancetest
+# 127.0.0.2 pg-loadbalancetest
+# 127.0.0.3 pg-loadbalancetest
+#
+# Windows or Linux are required to run this test because these OSes allow
+# binding to 127.0.0.2 and 127.0.0.3 addresess by default, but other OSes
+# don't. We need to bind to different IP addresses, so that we can use these
+# different IP addresses in the hosts file.
+#
+# The hosts file needs to be prepared before running this test. We don't do it
+# on the fly, because it requires root permissions to change the hosts file. In
+# CI we set up the previously mentioned rules in the hosts file, so that this
+# load balancing method is tested.
+
+# Cluster setup which is shared for testing both load balancing methods
+my $can_bind_to_127_0_0_2 = $Config{osname} eq 'linux' || $PostgreSQL::Test::Utils::windows_os;
+
+# Checks for the requirements for testing load balancing method 2
+if (!$can_bind_to_127_0_0_2) {
+	plan skip_all => "OS could not bind to 127.0.0.2"
+}
+
+my $hosts_path;
+if ($windows_os) {
+	$hosts_path = 'c:\Windows\System32\Drivers\etc\hosts';
+}
+else
+{
+	$hosts_path = '/etc/hosts';
+}
+
+my $hosts_content = PostgreSQL::Test::Utils::slurp_file($hosts_path);
+
+if ($hosts_content !~ m/pg-loadbalancetest/) {
+	# Host file is not prepared for this test
+	plan skip_all => "hosts file was not prepared for DNS load balance test"
+}
+
+if ($ENV{PG_TEST_EXTRA} !~ /\bload_balance\b/)
+{
+	plan skip_all => 'Potentially unsafe test load_balance not enabled in PG_TEST_EXTRA';
+}
+
+$PostgreSQL::Test::Cluster::use_tcp = 1;
+$PostgreSQL::Test::Cluster::test_pghost = '127.0.0.1';
+my $port = PostgreSQL::Test::Cluster::get_free_port();
+my $node1 = PostgreSQL::Test::Cluster->new('node1', port => $port);
+my $node2 = PostgreSQL::Test::Cluster->new('node2', port => $port, own_host => 1);
+my $node3 = PostgreSQL::Test::Cluster->new('node3', port => $port, own_host => 1);
+
+# Create a data directory with initdb
+$node1->init();
+$node2->init();
+$node3->init();
+
+# Start the PostgreSQL server
+$node1->start();
+$node2->start();
+$node3->start();
+
+# load_balance_hosts=disable should always choose the first one.
+$node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=disable",
+	"load_balance_hosts=disable connects to the first node",
+	sql => "SELECT 'connect2'",
+	log_like => [qr/statement: SELECT 'connect2'/]);
+
+
+# Statistically the following loop with load_balance_hosts=random will almost
+# certainly connect at least once to each of the nodes. The chance of that not
+# happening is so small that it's negligible: (2/3)^50 = 1.56832855e-9
+foreach my $i (1 .. 50) {
+	$node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random",
+		"seed 1234 selects node 1 first",
+		sql => "SELECT 'connect1'");
+}
+
+my $node1_occurences = () = $node1->log_content() =~ /statement: SELECT 'connect1'/g;
+my $node2_occurences = () = $node2->log_content() =~ /statement: SELECT 'connect1'/g;
+my $node3_occurences = () = $node3->log_content() =~ /statement: SELECT 'connect1'/g;
+
+my $total_occurences = $node1_occurences + $node2_occurences + $node3_occurences;
+
+ok($node1_occurences > 1, "expected at least one execution on node1, found $node1_occurences");
+ok($node2_occurences > 1, "expected at least one execution on node2, found $node2_occurences");
+ok($node3_occurences > 1, "expected at least one execution on node3, found $node3_occurences");
+ok($total_occurences == 50, "expected 50 executions across all nodes, found $total_occurences");
+
+$node1->stop();
+$node2->stop();
+
+# load_balance_hosts=disable should continue trying hosts until it finds a
+# working one.
+$node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=disable",
+	"load_balance_hosts=disable continues until it connects to the a working node",
+	sql => "SELECT 'connect3'",
+	log_like => [qr/statement: SELECT 'connect3'/]);
+
+# Also with load_balance_hosts=random we continue to the next nodes if previous
+# ones are down. Connect a few times to make sure it's not just lucky.
+foreach my $i (1 .. 5) {
+	$node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random",
+		"load_balance_hosts=random continues until it connects to the a working node",
+		sql => "SELECT 'connect4'",
+		log_like => [qr/statement: SELECT 'connect4'/]);
+}
+
+done_testing();
-- 
2.34.1

