Hi here

These patches introduce a few function to the ist API and also a converter
to validate a FIX message and to extract data from a FIX payload.

Thx at Christopher for his help during this dev.

Baptiste
From 4e9de7128c7065dc01b423dcce13b18487f1f353 Mon Sep 17 00:00:00 2001
From: Baptiste Assmann <bed...@gmail.com>
Date: Tue, 17 Mar 2020 10:18:41 +0100
Subject: [PATCH 4/4] MINOR: conv: parses Financial Information eXchange
 messages

This patch implements a couple of converters to validate and extract data from
a FIX message.
The validation consists in a few checks such as mandatory fields and
checksum computation.
The extraction can get any tag value based on a tag string or tag id.
---
 doc/configuration.txt |  36 ++++++++
 include/proto/fix.h   | 200 ++++++++++++++++++++++++++++++++++++++++++
 include/types/fix.h   |  55 ++++++++++++
 src/sample.c          |  72 +++++++++++++++
 4 files changed, 363 insertions(+)
 create mode 100644 include/proto/fix.h
 create mode 100644 include/types/fix.h

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 8347e8a4d..81b53c59f 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -13926,6 +13926,42 @@ field(<index>,<delimiters>[,<count>])
       str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
       str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3
 
+fix_tag_value(<tag>)
+  Parses a FIX (Financial Information eXchange) message and extracts the value
+  from the tag <tag>.
+  <tag> can be a string or an integer pointing to the desired tag. Any integer
+  value is accepted, but only the following strings are translated into their
+  integer equivalent: BeginString, BodyLength, MsgType, SenderComID,
+  TagetComID, CheckSum. If more are needed, we can add them in proto/fix.h
+  easily.
+
+  Note: only the first message sent by the client and the server can be parsed.
+
+  Example:
+      tcp-request inspect-delay 10s
+      acl data_in_buffer req.len gt 10
+      # MsgType tag ID is 35, so both lines below will return the same content
+      tcp-request content set-var(txn.foo) req.payload(0,0),fix_tag_value(35) \
+                  if data_in_buffer
+      tcp-request content set-var(txn.bar) req.payload(0,0),fix_tag_value(MsgType) \
+                  if data_in_buffer
+
+fix_validate
+  Parses a binary payload and performs sanity checks regarding FIX (Financial
+  Information eXchange):
+  - checks the BeginString tag
+  - checks that all tag IDs are well numeric
+  - checks that last tag in the message is the CheckSum one
+  - validate the checksum is right
+
+  This converter returns a boolean, true if the payload contains a valid FIX
+  message, right if not.
+
+  Example:
+      tcp-request inspect-delay 10s
+      acl data_in_buffer req.len gt 10
+      tcp-request content reject if data_in_buffer !{ req.payload(0,0),fix_validate }
+
 hex
   Converts a binary input sample to a hex string containing two hex digits per
   input byte. It is used to log or transfer hex dumps of some binary input data
diff --git a/include/proto/fix.h b/include/proto/fix.h
new file mode 100644
index 000000000..e7b8cf5ac
--- /dev/null
+++ b/include/proto/fix.h
@@ -0,0 +1,200 @@
+/*
+ * include/proto/fix.h
+ * This file contains functions and macros declarations for FIX protocol decoding.
+ *
+ * Copyright 2020 Baptiste Assmann <bed...@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _PROTO_FIX_H
+#define _PROTO_FIX_H
+
+#include <common/ist.h>
+#include <common/standard.h>
+
+#include <types/fix.h>
+
+
+/*
+ * Return a FIX tag ID ptr from <tag> if one found, NULL if not.
+ *
+ * full list of tag ID available here, just in case we need to support more "string" equivalent in the future:
+ *   https://www.onixs.biz/fix-dictionary/4.2/fields_by_tag.html
+ */
+static inline struct ist fix_tagid(struct ist tag)
+{
+	if (istisnumeric(tag))
+		return tag;
+
+	else if (strcasecmp(tag.ptr, "BeginString") == 0)
+		return FIX_TAG_BeginString;
+
+	else if (strcasecmp(tag.ptr, "BodyLength") == 0)
+		return FIX_TAG_BodyLength;
+
+	else if (strcasecmp(tag.ptr, "CheckSum") == 0)
+		return FIX_TAG_CheckSum;
+
+	else if (strcasecmp(tag.ptr, "MsgType") == 0)
+		return FIX_TAG_MsgType;
+
+	else if (strcasecmp(tag.ptr, "SenderComID") == 0)
+		return FIX_TAG_SenderComID;
+
+	else if (strcasecmp(tag.ptr, "TagetComID") == 0)
+		return FIX_TAG_TargetComID;
+
+	return IST_NULL;
+}
+
+/*
+ * Parse a FIX message <msg> and return the value of <tagid>.
+ *
+ * Returns IST_NULL if <tagid> can't be found, otherwise its value and size are retuned in a struct ist.
+ *
+ * the tag looks like
+ *   <tagid>=<value>FIX_DELIMITER
+ */
+static inline struct ist fix_tag_value(struct ist msg, struct ist tagid)
+{
+	while (istlen(msg) > 0) {
+		/* search the <tagid>: string between current postion until '=' */
+		if (istmatch(tagid, iststop(msg, '=')) == 1) {
+			/* returns <value>: string starting right after '=' and finishing at FIX_DELIMITER
+			 * + 1 for the '=' character */
+			return iststop(istadv(msg, istlen(iststop(msg, '=')) + 1), FIX_DELIMITER);
+		}
+
+		/* move the current position to the first character of next <tagid>,
+		 * which is right after FIX_DELIMITER
+		 * + 1 is for FIX_DELMITER */
+		msg = istadv(msg, istlen(iststop(msg, FIX_DELIMITER)) + 1);
+	}
+
+	return IST_NULL;
+}
+
+/*
+ * Parse a FIX message <msg> and performs sanity checks:
+ * * check if the message starts with '8=FIX.<digit>.<digit><FIX_DELIMITER>'
+ * * check if all tag ids follows the form '<numeric>='
+ * * check if the last tag of the message is the CheckSum: '10=nnn<FIX_DELIMITER>'
+ * * validate the checksum is right
+ *
+ * Returns 0 if an error is found and 1 if not
+ */
+static inline int fix_validate_message(struct ist msg)
+{
+	struct ist parser;
+	unsigned int checksum;
+
+	if (istlen(msg) < FIX_MSG_MINSIZE)
+		return 0;
+
+	/* parsing the whole message to compute the checksum and check all tag ids are properly set */
+	parser = msg;
+	checksum = 0;
+	while (istlen(parser) > 0) {
+		struct ist tag, value;
+		char *c;
+
+		/* parse the tag ID */
+		tag = iststop(parser, '=');
+		if (istisnumeric(tag) == 0)
+			return 0;
+
+		/* this is the first tag and it must be BeginString */
+		if (istsame(msg, parser) == 1) {
+			struct ist BeginStringValue = ist("FIX.");
+
+			if (istmatch(parser, FIX_TAG_BeginString) == 0)
+				return 0;
+
+			/* <value> starts after '=' and stops at FIX_DELIMITER
+			 * + 1 is for the '=' character */
+			value = iststop(istadv(parser, istlen(tag) + 1), FIX_DELIMITER);
+
+			/* value should look like FIX.<digit>.<digit> */
+			/* value lenght for BeginString is 7 characters */
+			if (istlen(value) != 7)
+				return 0;
+			if (istmatch(value, BeginStringValue) == 0)
+				return 0;
+			/* we want to bypass the string 'FIX.' */
+			value = istadv(value, 4);
+
+			/* next character is a digit */
+			if (!isdigit(*value.ptr))
+				return 0;
+			value = istnext(value);
+
+			/* next character is a dot */
+			if ((*value.ptr) != '.')
+				return 0;
+			value = istnext(value);
+
+			/* next character is a digit */
+			if (!isdigit(*value.ptr))
+				return 0;
+		}
+
+		/*
+		 * CheckSum tag should be the last one and is not taken into account
+		 * to compute the checksum itself
+		 */
+		else if (istmatch(FIX_TAG_CheckSum, tag) == 1) {
+			/* '+ 5' for the checksum value (3 digits) + '=' sign + FIX_DELIMITER */
+			if (istlen(tag) + 5 != istlen(parser))
+				return 0;
+			break;
+		}
+		for (c = tag.ptr ; c < tag.ptr + tag.len ; ++c)
+			checksum += (unsigned int) (*c);
+
+		/* + 1 to pass the '=' character */
+		parser = istadv(parser, istlen(tag) + 1);
+		checksum += (unsigned int) '=';
+
+		/* parse the tag value: is start after '=' and finishes before FIX_DELIMITER */
+		value = iststop(parser, FIX_DELIMITER);
+		for (c = value.ptr ; c < value.ptr + value.len ; ++c)
+			checksum += (unsigned int) (*c);
+
+		/* + 1 for the FIX_DELIMITER character */
+		parser = istadv(parser, istlen(value) + 1);
+		checksum += (unsigned int) FIX_DELIMITER;
+	}
+
+	/* validating the checksum now */
+
+	/* shrink parser to CheckSum tag value only
+	 * + 1 is for the '=' character */
+	parser = iststop(istadv(parser, istlen(iststop(parser, '=')) + 1), FIX_DELIMITER);
+
+	if (checksum % 256 != strl2ui(parser.ptr, istlen(parser)))
+		return 0;
+
+	return 1;
+}
+
+#endif /* _PROTO_FIX_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/types/fix.h b/include/types/fix.h
new file mode 100644
index 000000000..33ef203ce
--- /dev/null
+++ b/include/types/fix.h
@@ -0,0 +1,55 @@
+/*
+ * include/types/fix.h
+ * This file contains structure declarations for FIX protocol.
+ *
+ * Copyright 2020 Baptiste Assmann <bed...@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _TYPES_FIX_H
+#define _TYPES_FIX_H
+
+#include <common/ist.h>
+
+/*
+ * FIX messages are composed by a list of Tag=Value separated by a 'delimiter'
+ */
+
+#define FIX_DELIMITER 0x01
+
+/*
+ * FIX tag ID as struct ist
+ */
+#define FIX_TAG_BeginString (ist("8"))
+#define FIX_TAG_BodyLength  (ist("9"))
+#define FIX_TAG_CheckSum    (ist("10"))
+#define FIX_TAG_MsgType     (ist("35"))
+#define FIX_TAG_SenderComID (ist("49"))
+#define FIX_TAG_TargetComID (ist("56"))
+
+/*
+ * strings in struct ist format used when parsing a FIX message
+ */
+#define FIX_MSG_MINSIZE        17
+
+#endif /* _TYPES_FIX_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/src/sample.c b/src/sample.c
index d602887b8..7d116003d 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -29,6 +29,7 @@
 
 #include <proto/arg.h>
 #include <proto/auth.h>
+#include <proto/fix.h>
 #include <proto/log.h>
 #include <proto/proxy.h>
 #include <proto/protocol_buffers.h>
@@ -2912,6 +2913,7 @@ static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void
 	return 0;
 }
 
+
 static int sample_conv_protobuf(const struct arg *arg_p, struct sample *smp, void *private)
 {
 	unsigned char *pos;
@@ -2946,6 +2948,72 @@ static int sample_conv_protobuf_check(struct arg *args, struct sample_conv *conv
 	return 1;
 }
 
+/*
+ * Extract the tag value of an input binary sample. Takes a mandatory argument:
+ * the FIX protocol tag identifier.
+ * Return 1 if the tag was found, 0 if not.
+ */
+static int sample_conv_fix_tag_value(const struct arg *arg_p, struct sample *smp, void *private)
+{
+	struct ist value;
+
+	if (!smp_make_rw(smp))
+		return 0;
+
+	value = fix_tag_value(ist2(smp->data.u.str.area, smp->data.u.str.data),
+		              ist2(arg_p[0].data.str.area, arg_p[0].data.str.data));
+	if (isttest(value) == 0)
+		return 0;
+
+	ist2str(smp->data.u.str.area, value, 0);
+	smp->data.u.str.data = istlen(value);
+
+	return 1;
+}
+
+/* This function checks the "fix_tag_value" converter configuration.
+ * It expects a "known" (by HAProxy) tag name or ID.
+ * Tag string names are converted to their ID counterpart because this is the
+ * format they are sent over the wire.
+ */
+static int sample_conv_fix_value_check(struct arg *args, struct sample_conv *conv,
+                                      const char *file, int line, char **err)
+{
+	struct ist tagid;
+
+	tagid = fix_tagid(ist2(args[0].data.str.area, args[0].data.str.data));
+	if (tagid.ptr == NULL) {
+		memprintf(err, "Unknown FIX tag name '%s'", args[0].data.str.area);
+		return 0;
+	}
+
+	ist2str(args[0].data.str.area, tagid, 0);
+	args[0].data.str.data = istlen(tagid);
+
+	return 1;
+}
+
+/*
+ * Checks that a buffer contains a valid FIX message
+ *
+ * Return 1 if the check could be run, 0 if not.
+ * The result of the analyse itsef is stored in <smp> as a boolean
+ */
+static int sample_conv_fix_validate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+	struct ist msg;
+
+	if (!smp_make_rw(smp))
+		return 0;
+
+	msg = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+	smp->data.type = SMP_T_BOOL;
+	smp->data.u.sint = fix_validate_message(msg);
+
+	return 1;
+}
+
 /* This function checks the "strcmp" converter's arguments and extracts the
  * variable name and its scope.
  */
@@ -3414,6 +3482,10 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, {
 	{ "ungrpc", sample_conv_ungrpc,    ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN  },
 	{ "protobuf", sample_conv_protobuf, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN  },
 
+	/* FIX converters */
+	{ "fix_tag_value", sample_conv_fix_tag_value, ARG1(1,STR), sample_conv_fix_value_check, SMP_T_BIN, SMP_T_BIN  },
+	{ "fix_validate",  sample_conv_fix_validate,  0,           NULL,                        SMP_T_BIN, SMP_T_BOOL  },
+
 	{ "and",    sample_conv_binary_and, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT  },
 	{ "or",     sample_conv_binary_or,  ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT  },
 	{ "xor",    sample_conv_binary_xor, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT  },
-- 
2.17.1

From cf16c323fde7b4752fab194392628882726941fa Mon Sep 17 00:00:00 2001
From: Baptiste Assmann <bed...@gmail.com>
Date: Sun, 29 Mar 2020 08:10:16 +0200
Subject: [PATCH 2/4] MINOR: ist: add istadv() function

The purpose of istadv() function is to move <.ptr> right after the first
occurence of a given <.chr>.
It is very useful when used in while loop.
---
 include/common/ist.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/common/ist.h b/include/common/ist.h
index bffed4f23..ef08be82e 100644
--- a/include/common/ist.h
+++ b/include/common/ist.h
@@ -754,6 +754,17 @@ static inline int istisnumeric(const struct ist ist)
 	return 1;
 }
 
+/*
+ * advance <.ptr> by <nb> characters.
+ * If <ist> is too shortn (ist.end,0) is returned.
+ */
+static inline struct ist istadv(const struct ist ist, int nb)
+{
+	if (ist.len < nb)
+		return ist2(ist.ptr + ist.len, 0);
+	return ist2(ist.ptr + nb, ist.len - nb);
+}
+
 #ifndef IST_FREESTANDING
 /* This function allocates <size> bytes and returns an `ist` pointing to
  * the allocated area with size `0`.
-- 
2.17.1

From e8d46648eaccf8b51f2f5772e670c9b648b28289 Mon Sep 17 00:00:00 2001
From: Baptiste Assmann <bed...@gmail.com>
Date: Sun, 29 Mar 2020 09:10:27 +0200
Subject: [PATCH 3/4] MINOR: ist: add istsame() function

The istsame() function takes 2 ist and compare their <.ptr> and <.len>
values respectively.
It returns non-zero if they are the same.
---
 include/common/ist.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/common/ist.h b/include/common/ist.h
index ef08be82e..6e1997d51 100644
--- a/include/common/ist.h
+++ b/include/common/ist.h
@@ -765,6 +765,14 @@ static inline struct ist istadv(const struct ist ist, int nb)
 	return ist2(ist.ptr + nb, ist.len - nb);
 }
 
+/*
+ * compare 2 ists and return non-zero if they are the same
+ */
+static inline int istsame(struct ist ist1, struct ist ist2)
+{
+	return ((ist1.ptr == ist2.ptr) && (ist1.len == ist2.len));
+}
+
 #ifndef IST_FREESTANDING
 /* This function allocates <size> bytes and returns an `ist` pointing to
  * the allocated area with size `0`.
-- 
2.17.1

From 90b101b38ef1afad4814fd8736c5fdf99855ba84 Mon Sep 17 00:00:00 2001
From: Baptiste Assmann <bed...@gmail.com>
Date: Wed, 25 Mar 2020 15:54:02 +0100
Subject: [PATCH 1/4] MINOR: ist: add isnumeric() function

the function istisnumeric() returns non-zero if the string in <ist> is a
number.
---
 include/common/ist.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/include/common/ist.h b/include/common/ist.h
index 68c83ffa0..bffed4f23 100644
--- a/include/common/ist.h
+++ b/include/common/ist.h
@@ -734,6 +734,26 @@ static inline struct ist iststop(const struct ist ist, char chr)
 	return ist2(ist.ptr, len - 1);
 }
 
+/*
+ * returns non-zero if <ist> is a number.
+ * if <ist> is empty, returns 0 too.
+ */
+static inline int istisnumeric(const struct ist ist)
+{
+	char *c;
+
+	if (istlen(ist) == 0)
+		return 0;
+
+	for (c = ist.ptr ; c < ist.ptr + ist.len ; c++) {
+		if (!isdigit((unsigned char)*c)) {
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
 #ifndef IST_FREESTANDING
 /* This function allocates <size> bytes and returns an `ist` pointing to
  * the allocated area with size `0`.
-- 
2.17.1

Reply via email to