Changeset: 5f732a468c51 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/5f732a468c51
Modified Files:
        sql/test/nested/Tests/webclicks.test.in
Branch: nested
Log Message:

increase complexity of webclicks test


diffs (122 lines):

diff --git a/sql/test/nested/Tests/webclicks.test.in 
b/sql/test/nested/Tests/webclicks.test.in
--- a/sql/test/nested/Tests/webclicks.test.in
+++ b/sql/test/nested/Tests/webclicks.test.in
@@ -112,10 +112,29 @@ select cast(t.json as event) from (selec
 ("996257967-103007874", 
"47f07c1399c9c6bd1012861f9c5c958e042732e259b909e077f7e8967b650c75", 
"8359e1f53c300c44ee0e8ba1610f620c589258a8a4752ea089cfc44fee51ff64", 
"productDetail", 2024-11-30 22:13:37.823000, "Mozilla", ({(("hash", 
"da39a3ee5e6b4b0d3255bfef95601890afd80709")),(("hostname", 
"e13dacd9566ecf8efd4aff212960c02de20aa110")),(("pageSubType", 
"d5d4cd07616a542891b7ec2d0257b3a24b69856e")),(("pageType", 
"6b5cdf4073b8166e5cc01a2532469d2e356eebfd")),(("pathname", 
"0b28998c6a5efc0afb854752b1bba65f37c70230")),(("protocol", 
"c3437dbc7c1255d3a21d444d86ebf2e9234c22bd")),(("referrer", 
"27bbe94658bf6bd4c431052e2065de0457ec8546")),(("search", 
"da39a3ee5e6b4b0d3255bfef95601890afd80709")),(("state", 
"da39a3ee5e6b4b0d3255bfef95601890afd80709")),(("title", 
"d08339b839a6716a96334736c1c769091a0a8644"))}), ({(("customerId", 
"d5d4cd07616a542891b7ec2d0257b3a24b69856e")),(("customerType", 
"d5d4cd07616a542891b7ec2d0257b3a24b69856e")),(("isAuthenticated", 
"7cb6efb98ba5972a9b5090dc2e517fe14d12cb04"))}), 
 ("detail", ({(("null", "null"))}), ({(("product", "null", ({(("brand", 
"b8884d21754edbf713af7000a67eac0efd5df8ea"))})))})))
 
 statement ok
-create table events(data event)
+create table events(
+    eventid varchar,
+    sessionid varchar,
+    userid varchar,
+    event varchar,
+    timestamp timestamp,
+    user_agent varchar,
+    location loc,
+    "user" webusr,
+    ecommerce ecommerce)
 
 statement ok
-insert into events select json from r'$TSTSRCDIR/webclicks.json'
+insert into events
+select e.eventid,
+    e.sessionid,
+    e.userid,
+    e.event,
+    e.timestamp,
+    e.user_agent,
+    e.location,
+    e."user",
+    e.ecommerce
+from ( select cast(t.json as event) as e from (select * from 
r'$TSTSRCDIR/webclicks.json') t)
 
 query I
 select count(*) from events
@@ -123,11 +142,85 @@ select count(*) from events
 2
 
 query T
-select data.event from events
+select event from events
 ----
 productDetail
 productDetail
 
+query TTTTTTTTTT
+SELECT
+    timestamp,
+    sessionid,
+    userid,
+    user_agent,
+    event,
+    ecommerce.name as ecom_name,
+    CASE WHEN l.element.key = 'pageType' THEN l.element.value END AS pagetype,
+    CASE WHEN u.element.key = 'customerType' THEN u.element.value END AS 
customertype,
+    CASE WHEN u.element.key = 'customerid' THEN u.element.value END AS 
customerid,
+    CASE WHEN eal.element.key = 'brand' THEN eal.element.value END AS 
ecom_attributes_brand
+FROM events,
+    unnest(location.list),
+    unnest(ecommerce.objects.list) as eol,
+    unnest("user".list) as u,
+    unnest(eol.element.attributes.list) as eal
+----
+
+
+#WITH base AS (
+#    SELECT
+#        timestamp,
+#        sessionid,
+#        userid,
+#        user_agent,
+#        event,
+#        ecommerce.name as ecom_name,
+#        CASE WHEN l.element.key = 'pageType' THEN l.element.value END AS 
pagetype, -- Typical way of handling these structs & arrays.
+#        CASE WHEN u.element.key = 'customerType' THEN u.element.value END AS 
customertype,
+#        CASE WHEN u.element.key = 'customerid' THEN u.element.value END AS 
customerid,
+#        CASE WHEN eal.element.key = 'brand' THEN eal.element.value END AS 
ecom_attributes_brand,
+#    FROM events
+#    left join unnest(location.list) as l
+#    left join unnest(ecommerce.objects.list) as eol
+#    left join unnest(eol.element.attributes.list) as eal
+#    cross join unnest(user.list) as u -- Both left join unnest and cross join 
unnest work in bigquery. It does not really matter what syntax should be used.
+#    WHERE DATE(timestamp) = DATE("2024-11-30")
+#        AND userid != 'na' AND userid != ''
+#        AND sessionid != 'na' AND sessionid != ''
+#)
+#, last_datapoints AS (
+#    SELECT
+#        sessionid,
+#        userid,
+#        timestamp,
+#        event,
+#        max(pagetype) as pagetype,
+#        max(user_agent) as user_agent,
+#        max(ecom_name) as ecom_name,
+#        max(customertype) as customertype,
+#        max(ecom_attributes_brand) as ecom_attributes_brand,
+#        max(customerid) as customerid
+#    FROM base
+#    group by sessionid, userid, timestamp, event
+#)
+#select event,
+#ecom_name,
+#IFNULL(REGEXP_EXTRACT(user_agent, r".*Edg.*"), "No edge") as edge_user_agent, 
-- This does not offer very valuable information in this form on this data, but 
being able to do regex extracts is very important.
+#date(timestamp) as date,
+#customertype,
+#count(distinct(userid)) as users,
+#count(distinct(customerid)) as customers,
+#count(distinct(pagetype)) as pagetypes,
+#count(distinct(concat(userid, sessionid))) as sessions
+#from (
+#    SELECT *
+#    FROM last_datapoints
+#    QUALIFY ROW_NUMBER() OVER (PARTITION BY sessionid, userid, event ORDER BY 
timestamp ASC) = 1 -- Shortcut for row_number + where statement
+#)
+#group by 1, 2, 3, 4, 5
+#order by 3 desc;
+#
+
 
 # clean up - the order should be reversed of creation for depended types
 
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to