mmiklavc edited a comment on issue #1409: METRON-2112 Normalize parser original_string handling URL: https://github.com/apache/metron/pull/1409#issuecomment-497049477 ### Test Plan **Test the default original_string functionality** * Create kafka topic ``` /usr/hdp/current/kafka-broker/bin/kafka-console-consumer.sh --zookeeper $ZOOKEEPER --topic jsonMapQuery --from-beginning ``` * Pull configs down from ZK ``` $METRON_HOME/bin/zk_load_configs.sh -m PULL -o ${METRON_HOME}/config/zookeeper -z $ZOOKEEPER -f ``` * Create indexing config. We won't need one for the parser itself because a default is provided already. ``` # /usr/metron/0.7.2/config/zookeeper/indexing/jsonMapQuery.json { "hdfs" : { "index": "json_map_query", "batchSize": 1, "enabled" : true }, "elasticsearch" : { "index": "json_map_query", "batchSize": 1, "enabled" : true }, "solr" : { "index": "json_map_query", "batchSize": 1, "enabled" : true } } ``` * Push config back up to ZK ``` $METRON_HOME/bin/zk_load_configs.sh -m PUSH -i $METRON_HOME/config/zookeeper/ -z $ZOOKEEPER ``` * Start the topology ``` $METRON_HOME/bin/start_parser_topology.sh -z $ZOOKEEPER -s jsonMapQuery ``` * Add some data to a file named json-data.json ``` {"foo":[{ "string" : "bar", "number" : 1, "ignored" : [ "blah" ] },{ "number" : 2 },{ "number" : 3 },{ "number" : 4 },{ "number" : 5 },{ "number" : 6 },{ "number" : 7 },{ "number" : 8 },{ "number" : 9 },{ "number" : 10 }]} ``` * Send the data to Kafka ``` cat json-data.json | /usr/hdp/current/kafka-broker/bin/kafka-console-producer.sh --broker-list $BROKERLIST --topic jsonMapQuery ``` * Expect to see 10 new messages created in json_map_query_index ``` curl -XGET "http://node1:9200/json_map_query*/_search?pretty=true" { "took" : 18, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 10, "max_score" : 1.0, "hits" : [ { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3SuUcGT3jmMsgLwb", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376560", "parallelenricher:enrich:end:ts" : "1559104376560", "number" : 6, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376560", "parallelenricher:splitter:end:ts" : "1559104376560", "guid" : "975a0761-4590-4290-a9d0-a98493eb1bb0", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3SvZcGT3jmMsgLwf", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376561", "parallelenricher:enrich:end:ts" : "1559104376561", "number" : 10, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376561", "parallelenricher:splitter:end:ts" : "1559104376561", "guid" : "3de3a1ab-11fc-42f0-8083-1349e512e113", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3StRcGT3jmMsgLwZ", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376559", "parallelenricher:enrich:end:ts" : "1559104376559", "number" : 4, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376559", "parallelenricher:splitter:end:ts" : "1559104376559", "guid" : "f170f724-8da8-48f4-8f33-62aaa5a5842d", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3SvCcGT3jmMsgLwd", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376561", "parallelenricher:enrich:end:ts" : "1559104376561", "number" : 8, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376561", "parallelenricher:splitter:end:ts" : "1559104376561", "guid" : "060f5142-73b6-4fc4-b3fa-ad17f5c4f7ce", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3SvLcGT3jmMsgLwe", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376561", "parallelenricher:enrich:end:ts" : "1559104376561", "number" : 9, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376561", "parallelenricher:splitter:end:ts" : "1559104376561", "guid" : "d805927d-8fef-4a70-8cf2-f4441eb1f70f", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3SurcGT3jmMsgLwc", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376560", "parallelenricher:enrich:end:ts" : "1559104376560", "number" : 7, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376560", "parallelenricher:splitter:end:ts" : "1559104376560", "guid" : "658f48b8-19a7-4506-aabd-133c54ddde89", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3SsKcGT3jmMsgLwW", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376559", "parallelenricher:enrich:end:ts" : "1559104376559", "number" : 1, "ignored" : [ "blah" ], "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "string" : "bar", "parallelenricher:enrich:begin:ts" : "1559104376559", "parallelenricher:splitter:end:ts" : "1559104376559", "guid" : "b569df68-e621-46ed-8390-c2adc257c055", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3StHcGT3jmMsgLwY", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376559", "parallelenricher:enrich:end:ts" : "1559104376559", "number" : 3, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376559", "parallelenricher:splitter:end:ts" : "1559104376559", "guid" : "f37b4283-a445-47c3-a965-c17937e39755", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3SswcGT3jmMsgLwX", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376559", "parallelenricher:enrich:end:ts" : "1559104376559", "number" : 2, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376559", "parallelenricher:splitter:end:ts" : "1559104376559", "guid" : "d7c838e6-ee9f-4067-aa59-10f9ff8412d3", "timestamp" : 1559104359358 } }, { "_index" : "json_map_query_index_2019.05.29.04", "_type" : "jsonMapQuery_doc", "_id" : "AWsB3St4cGT3jmMsgLwa", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559104376560", "parallelenricher:enrich:end:ts" : "1559104376560", "number" : 5, "source:type" : "jsonMapQuery", "original_string" : "{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "parallelenricher:enrich:begin:ts" : "1559104376560", "parallelenricher:splitter:end:ts" : "1559104376560", "guid" : "8ad4d1fc-8833-4976-9166-846d0f1d91bb", "timestamp" : 1559104359358 } } ] } } ``` Verify every message contains the full original_string as source, unchanged. **Test JsonMapQuery parser override functionality** * Open the jsonMapQuery parser config and add an override string field set to true. ``` { "parserClassName":"org.apache.metron.parsers.json.JSONMapParser", "sensorTopic":"jsonMapQuery", "parserConfig": {"jsonpQuery":"$.foo", "overrideOriginalString" : true} } ``` * Push to ZK. ``` $METRON_HOME/bin/zk_load_configs.sh -m PUSH -i $METRON_HOME/config/zookeeper/ -z $ZOOKEEPER # verify the change went through $METRON_HOME/bin/zk_load_configs.sh -m DUMP -z $ZOOKEEPER -c PARSER -n jsonMapQuery ``` * Restart the topology (parserConfigs are not loaded dynamically) ``` storm kill jsonMapQuery $METRON_HOME/bin/start_parser_topology.sh -z $ZOOKEEPER -s jsonMapQuery ``` * Clear your json sensor index ``` curl -XDELETE "http://localhost:9200/json_map_query*" # verify it's empty curl -XGET "http://node1:9200/json_map_query*/_stats/docs?pretty=true" { "_shards" : { "total" : 0, "successful" : 0, "failed" : 0 }, "_all" : { "primaries" : { }, "total" : { } }, "indices" : { } } ``` * Run the data through again ``` cat json-data.json | /usr/hdp/current/kafka-broker/bin/kafka-console-producer.sh --broker-list $BROKERLIST --topic jsonMapQuery ``` * Expect to see 10 new messages created in json_map_query_index ``` curl -XGET "http://node1:9200/json_map_query*/_search?pretty=true" { "took" : 2, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 10, "max_score" : 1.0, "hits" : [ { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt80cGT3jmMsgLwz", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200444", "parallelenricher:enrich:end:ts" : "1559152200444", "number" : 1, "ignored" : [ "blah" ], "source:type" : "jsonMapQuery", "original_string" : "{\"number\":1,\"ignored\":[\"blah\"],\"string\":\"bar\"}", "string" : "bar", "parallelenricher:enrich:begin:ts" : "1559152200444", "parallelenricher:splitter:end:ts" : "1559152200444", "guid" : "820c7c4d-14a5-4fca-a43f-2c2fa207c6e7", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt9UcGT3jmMsgLw0", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 2, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":2}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "270ec17b-a34f-4a8e-b2a1-32b4ad5891d7", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt_ZcGT3jmMsgLw8", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 10, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":10}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "8bc38d0c-0172-464f-8794-f633cac6e60d", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt9lcGT3jmMsgLw1", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 3, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":3}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "a9a89c94-922b-41fe-ad37-3f0cb758dc8e", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt96cGT3jmMsgLw3", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 5, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":5}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "7917038a-67ba-497c-b74e-47c9fe13a34e", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt-qcGT3jmMsgLw5", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 7, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":7}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "0fc44b9c-e218-42f5-816b-0b459088b80a", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt9ucGT3jmMsgLw2", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 4, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":4}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "8aa5943a-884d-4ce5-8816-a227075e2e8f", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt-VcGT3jmMsgLw4", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 6, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":6}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "0eb61762-435a-4dc6-b025-f82f761c75cc", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt-3cGT3jmMsgLw6", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 8, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":8}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "4763dfa1-91fd-4c21-ab48-28d564a5c878", "timestamp" : 1559152177657 } }, { "_index" : "json_map_query_index_2019.05.29.17", "_type" : "jsonMapQuery_doc", "_id" : "AWsEtt_LcGT3jmMsgLw7", "_score" : 1.0, "_source" : { "parallelenricher:splitter:begin:ts" : "1559152200445", "parallelenricher:enrich:end:ts" : "1559152200445", "number" : 9, "source:type" : "jsonMapQuery", "original_string" : "{\"number\":9}", "parallelenricher:enrich:begin:ts" : "1559152200445", "parallelenricher:splitter:end:ts" : "1559152200445", "guid" : "74c7c52a-9aa3-43c4-b7aa-d5a8edbf9409", "timestamp" : 1559152177657 } } ] } } ``` The `original_string` should now be the individual field that was parsed from the root message. **Check parser chaining and aggregation use case** https://github.com/apache/metron/tree/master/use-cases/parser_chaining This feature should still work as-is without any change.
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
