On Sat, Mar 20, 2021 at 09:01:25PM +0100, Peter Eisentraut wrote:
> On 18.03.21 13:51, John Naylor wrote:
> > Hi David,
> > 
> > Just a nitpick:
> > 
> > +SET bytea_output TO hex;
> > 
> > Since we don't see the string in the output, I don't immediately see the
> > reason to change the output format here?

That's how I got it to work. If there's a way to make it go without
that, I'd be delighted to learn what it is :)

> > Aside from that, this patch works as expected, and is ready for committer.
> 
> I have now read the entire internet on what a suitable name for this
> function could be.  I think the emerging winner is BIT_COUNT(), which
> already exists in MySQL, and also in Python (int.bit_count()) and Java
> (Integer.bitCount()).

Thanks for doing this tedious work. Please find attached the next
version of the patch.

Best,
David.
-- 
David Fetter <david(at)fetter(dot)org> http://fetter.org/
Phone: +1 415 235 3778

Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate
>From 9483d41941b5daa44e46e6fb164846647d716406 Mon Sep 17 00:00:00 2001
From: David Fetter <da...@fetter.org>
Date: Wed, 30 Dec 2020 02:51:46 -0800
Subject: [PATCH v5] popcount
To: hackers
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------2.30.2"

This is a multi-part message in MIME format.
--------------2.30.2
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit


Now it's accessible to SQL for the BIT VARYING and BYTEA types.

diff --git doc/src/sgml/func.sgml doc/src/sgml/func.sgml
index 68fe6a95b4..066431fd3c 100644
--- doc/src/sgml/func.sgml
+++ doc/src/sgml/func.sgml
@@ -4030,6 +4030,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
       </para></entry>
      </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>bit_count</primary>
+        </indexterm>
+        <function>bit_count</function> ( <parameter>bytes</parameter> <type>bytea</type> )
+        <returnvalue>bigint</returnvalue>
+       </para>
+       <para>
+        Counts the number of bits set in a binary string.
+       </para>
+       <para>
+        <literal>bit_count('\xdeadbeef'::bytea)</literal>
+        <returnvalue>24</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -4830,6 +4847,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>bit_count</primary>
+        </indexterm>
+        <function>bit_count</function> ( <parameter>bits</parameter> <type>bit</type> )
+        <returnvalue>bigint</returnvalue>
+       </para>
+       <para>
+        Counts the bits set in a bit string.
+       </para>
+       <para>
+        <literal>bit_count(B'101010101010101010')</literal>
+        <returnvalue>9</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -4869,6 +4903,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
         <returnvalue>101010001010101010</returnvalue>
        </para></entry>
       </row>
+
      </tbody>
     </tgroup>
    </table>
diff --git src/include/catalog/pg_proc.dat src/include/catalog/pg_proc.dat
index e259531f60..feb00eccf9 100644
--- src/include/catalog/pg_proc.dat
+++ src/include/catalog/pg_proc.dat
@@ -1446,6 +1446,9 @@
 { oid => '752', descr => 'substitute portion of string',
   proname => 'overlay', prorettype => 'bytea',
   proargtypes => 'bytea bytea int4', prosrc => 'byteaoverlay_no_len' },
+{ oid => '8436', descr => 'count set bits',
+  proname => 'bit_count', prorettype => 'int8', proargtypes => 'bytea',
+  prosrc => 'byteapopcount'},
 
 { oid => '725',
   proname => 'dist_pl', prorettype => 'float8', proargtypes => 'point line',
@@ -3876,6 +3879,9 @@
 { oid => '3033', descr => 'set bit',
   proname => 'set_bit', prorettype => 'bit', proargtypes => 'bit int4 int4',
   prosrc => 'bitsetbit' },
+{ oid => '8435', descr => 'count set bits',
+  proname => 'bit_count', prorettype => 'int8', proargtypes => 'bit',
+  prosrc => 'bitpopcount'},
 
 # for macaddr type support
 { oid => '436', descr => 'I/O',
diff --git src/backend/utils/adt/varbit.c src/backend/utils/adt/varbit.c
index 2235866244..c9c6c73422 100644
--- src/backend/utils/adt/varbit.c
+++ src/backend/utils/adt/varbit.c
@@ -36,6 +36,7 @@
 #include "libpq/pqformat.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
+#include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/varbit.h"
@@ -1878,3 +1879,30 @@ bitgetbit(PG_FUNCTION_ARGS)
 	else
 		PG_RETURN_INT32(0);
 }
+
+/*
+ * bitpopcount
+ *
+ * Returns the number of bits set in a bit string.
+ *
+ */
+Datum
+bitpopcount(PG_FUNCTION_ARGS)
+{
+	/* There's really no chance of an overflow here because
+	 * to get to INT64_MAX set bits, an object would have to be
+	 * an exbibyte long, exceeding what PostgreSQL can currently
+	 * store by a factor of 2^28
+	 */
+	int64		popcount;
+	VarBit		*arg1 = PG_GETARG_VARBIT_P(0);
+	bits8		*p;
+	int			len;
+
+	p = VARBITS(arg1);
+	len = (VARBITLEN(arg1) + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
+
+	popcount = pg_popcount((char *)p, len);
+
+	PG_RETURN_INT64(popcount);
+}
diff --git src/backend/utils/adt/varlena.c src/backend/utils/adt/varlena.c
index 0bc345aa4d..95091887a9 100644
--- src/backend/utils/adt/varlena.c
+++ src/backend/utils/adt/varlena.c
@@ -3440,6 +3440,22 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
 	return result;
 }
 
+/*
+ * popcount
+ */
+Datum
+byteapopcount(PG_FUNCTION_ARGS)
+{
+	bytea	*t1 = PG_GETARG_BYTEA_PP(0);
+	int		len;
+	int64	result;
+
+	len = VARSIZE_ANY_EXHDR(t1);
+	result = pg_popcount(VARDATA_ANY(t1), len);
+
+	PG_RETURN_INT64(result);
+}
+
 /*
  * byteapos -
  *	  Return the position of the specified substring.
diff --git src/test/regress/expected/bit.out src/test/regress/expected/bit.out
index a7f95b846d..9b6b3d0c4f 100644
--- src/test/regress/expected/bit.out
+++ src/test/regress/expected/bit.out
@@ -710,6 +710,19 @@ SELECT overlay(B'0101011100' placing '001' from 20);
  0101011100001
 (1 row)
 
+-- Popcount
+SELECT bit_count(B'0101011100'::bit(10));
+ bit_count 
+-----------
+         5
+(1 row)
+
+SELECT bit_count(B'1111111111'::bit(10));
+ bit_count 
+-----------
+        10
+(1 row)
+
 -- This table is intentionally left around to exercise pg_dump/pg_upgrade
 CREATE TABLE bit_defaults(
   b1 bit(4) DEFAULT '1001',
diff --git src/test/regress/expected/strings.out src/test/regress/expected/strings.out
index fb4573d85f..e8c6a99e9d 100644
--- src/test/regress/expected/strings.out
+++ src/test/regress/expected/strings.out
@@ -2227,3 +2227,10 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5
  Th\000o\x02\x03
 (1 row)
 
+SET bytea_output TO hex;
+SELECT bit_count(E'\\xdeadbeef'::bytea);
+ bit_count 
+-----------
+        24
+(1 row)
+
diff --git src/test/regress/sql/bit.sql src/test/regress/sql/bit.sql
index ea01742c4a..271aa5ea3c 100644
--- src/test/regress/sql/bit.sql
+++ src/test/regress/sql/bit.sql
@@ -215,6 +215,10 @@ SELECT overlay(B'0101011100' placing '101' from 6);
 SELECT overlay(B'0101011100' placing '001' from 11);
 SELECT overlay(B'0101011100' placing '001' from 20);
 
+-- Popcount
+SELECT bit_count(B'0101011100'::bit(10));
+SELECT bit_count(B'1111111111'::bit(10));
+
 -- This table is intentionally left around to exercise pg_dump/pg_upgrade
 CREATE TABLE bit_defaults(
   b1 bit(4) DEFAULT '1001',
diff --git src/test/regress/sql/strings.sql src/test/regress/sql/strings.sql
index 57a48c9d0b..3fb2d66a9f 100644
--- src/test/regress/sql/strings.sql
+++ src/test/regress/sql/strings.sql
@@ -742,3 +742,6 @@ SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);
 SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape');
 SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape');
 SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape');
+
+SET bytea_output TO hex;
+SELECT bit_count(E'\\xdeadbeef'::bytea);

--------------2.30.2--


Reply via email to