[ https://issues.apache.org/jira/browse/ASTERIXDB-2808?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael J. Carey reassigned ASTERIXDB-2808: ------------------------------------------- Assignee: Dmitry Lychagin (was: Michael J. Carey) > Significantly increase in execution time when encapsulating a highly nested > query in a function > ----------------------------------------------------------------------------------------------- > > Key: ASTERIXDB-2808 > URL: https://issues.apache.org/jira/browse/ASTERIXDB-2808 > Project: Apache AsterixDB > Issue Type: Bug > Components: FUN - Functions > Affects Versions: 0.9.6 > Reporter: Gift Sinthong > Assignee: Dmitry Lychagin > Priority: Major > > Here is a highly nested query that drops attributes, appends attributes, and > applies a Python UDF. It took *2.928 sec*. to return the result. > {code:java} > SELECT VALUE SF_CRIMES.getResolution(t) FROM (SELECT VALUE OBJECT_REMOVE(t, > 'resolved') FROM (SELECT VALUE OBJECT_REMOVE(t, 'time') FROM (SELECT VALUE > OBJECT_REMOVE(t, 'date') FROM (SELECT VALUE OBJECT_REMOVE(t, 'pdDistrict') > FROM (SELECT VALUE OBJECT_REMOVE(t, 'resolution') FROM (SELECT VALUE > OBJECT_REMOVE(t, 'dayOfWeek') FROM (SELECT VALUE OBJECT_REMOVE(t, 'category') > FROM (SELECT VALUE OBJECT_REMOVE(t, 'pdId') FROM (SELECT t.*, > to_number(resolution != "NONE") AS `resolved` FROM (SELECT t.*, > get_hour(parse_time(time, "hh:mm")) AS `hour` FROM (SELECT t.*, > get_month(parse_date(date, "MM/DD/YYYY")) AS `month` FROM (SELECT t.*, > to_number(category = "VEHICLE THEFT") AS `VEHICLE THEFT`, to_number(category > = "WARRANTS") AS `WARRANTS`, to_number(category = "WEAPON LAWS") AS `WEAPON > LAWS`, to_number(category = "DRUG/NARCOTIC") AS `DRUG/NARCOTIC`, > to_number(category = "SEX OFFENSES, FORCIBLE") AS `SEX OFFENSES, FORCIBLE`, > to_number(category = "FORGERY/COUNTERFEITING") AS `FORGERY/COUNTERFEITING`, > to_number(category = "MISSING PERSON") AS `MISSING PERSON`, > to_number(category = "VANDALISM") AS `VANDALISM`, to_number(category = > "DRUNKENNESS") AS `DRUNKENNESS`, to_number(category = "STOLEN PROPERTY") AS > `STOLEN PROPERTY`, to_number(category = "SUSPICIOUS OCC") AS `SUSPICIOUS > OCC`, to_number(category = "DISORDERLY CONDUCT") AS `DISORDERLY CONDUCT`, > to_number(category = "NON-CRIMINAL") AS `NON-CRIMINAL`, to_number(category = > "TRESPASS") AS `TRESPASS`, to_number(category = "FRAUD") AS `FRAUD`, > to_number(category = "LARCENY/THEFT") AS `LARCENY/THEFT`, to_number(category > = "OTHER OFFENSES") AS `OTHER OFFENSES`, to_number(category = "ROBBERY") AS > `ROBBERY`, to_number(category = "ASSAULT") AS `ASSAULT`, to_number(category = > "BURGLARY") AS `BURGLARY`, to_number(category = "KIDNAPPING") AS > `KIDNAPPING`, to_number(category = "PROSTITUTION") AS `PROSTITUTION`, > to_number(category = "SECONDARY CODES") AS `SECONDARY CODES`, > to_number(pdDistrict = "TENDERLOIN") AS `TENDERLOIN`, to_number(pdDistrict = > "MISSION") AS `MISSION`, to_number(pdDistrict = "SOUTHERN") AS `SOUTHERN`, > to_number(pdDistrict = "PARK") AS `PARK`, to_number(pdDistrict = "TARAVAL") > AS `TARAVAL`, to_number(pdDistrict = "BAYVIEW") AS `BAYVIEW`, > to_number(pdDistrict = "RICHMOND") AS `RICHMOND`, to_number(pdDistrict = > "CENTRAL") AS `CENTRAL`, to_number(pdDistrict = "INGLESIDE") AS `INGLESIDE`, > to_number(pdDistrict = "NORTHERN") AS `NORTHERN`, to_number(dayOfWeek = > "Tuesday") AS `Tuesday`, to_number(dayOfWeek = "Monday") AS `Monday`, > to_number(dayOfWeek = "Sunday") AS `Sunday`, to_number(dayOfWeek = > "Wednesday") AS `Wednesday`, to_number(dayOfWeek = "Saturday") AS `Saturday`, > to_number(dayOfWeek = "Friday") AS `Friday`, to_number(dayOfWeek = > "Thursday") AS `Thursday` FROM (SELECT VALUE OBJECT_REMOVE(t, 'id') FROM > (SELECT VALUE OBJECT_REMOVE(t, 'location') FROM (SELECT VALUE > OBJECT_REMOVE(t, 'incidntNum') FROM (SELECT VALUE OBJECT_REMOVE(t, > 'description') FROM (SELECT VALUE OBJECT_REMOVE(t, 'address') FROM (SELECT > VALUE t FROM SF_CRIMES.Crimes AS t WHERE t.pdId=18600000506244) t) t) t) t) > t) t) t) t) t) t) t) t) t) t) t) t) t) t; > {code} > Here is the resulting query plan. > {code:java} > distribute result [$$392] > -- DISTRIBUTE_RESULT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$392]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$392] <- > [getResolution(cast(object-remove(object-remove(object-remove(object-remove(object-remove(object-remove(object-remove(object-remove(object-concat-strict(if-missing-or-null(to-object(object-concat-strict(if-missing-or-null(to-object($$439), > cast($$427)), {"hour": get-hour(parse-time($$439.getField("time"), > "hh:mm"))})), cast($$427)), {"resolved": > to-number(neq(object-concat-strict(if-missing-or-null(to-object($$439), > cast($$427)), {"hour": get-hour(parse-time($$439.getField("time"), > "hh:mm"))}).getField("resolution"), "NONE"))}), "pdId"), "category"), > "dayOfWeek"), "resolution"), "pdDistrict"), "date"), "time"), "resolved")))] > -- ASSIGN |PARTITIONED| > project ([$$427, $$439]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$439] <- > [object-concat-strict(if-missing-or-null(to-object($$438), cast($$427)), > {"month": get-month(parse-date($$438.getField("date"), "MM/DD/YYYY"))})] > -- ASSIGN |PARTITIONED| > project ([$$427, $$438]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$438] <- > [object-concat-strict(if-missing-or-null(to-object($$435), cast($$427)), > {"VEHICLE THEFT": to-number(eq($$393, "VEHICLE THEFT")), "WARRANTS": > to-number(eq($$393, "WARRANTS")), "WEAPON LAWS": to-number(eq($$393, "WEAPON > LAWS")), "DRUG/NARCOTIC": to-number(eq($$393, "DRUG/NARCOTIC")), "SEX > OFFENSES, FORCIBLE": to-number(eq($$393, "SEX OFFENSES, FORCIBLE")), > "FORGERY/COUNTERFEITING": to-number(eq($$393, "FORGERY/COUNTERFEITING")), > "MISSING PERSON": to-number(eq($$393, "MISSING PERSON")), "VANDALISM": > to-number(eq($$393, "VANDALISM")), "DRUNKENNESS": to-number(eq($$393, > "DRUNKENNESS")), "STOLEN PROPERTY": to-number(eq($$393, "STOLEN PROPERTY")), > "SUSPICIOUS OCC": to-number(eq($$393, "SUSPICIOUS OCC")), "DISORDERLY > CONDUCT": to-number(eq($$393, "DISORDERLY CONDUCT")), "NON-CRIMINAL": > to-number(eq($$393, "NON-CRIMINAL")), "TRESPASS": to-number(eq($$393, > "TRESPASS")), "FRAUD": to-number(eq($$393, "FRAUD")), "LARCENY/THEFT": > to-number(eq($$393, "LARCENY/THEFT")), "OTHER OFFENSES": to-number(eq($$393, > "OTHER OFFENSES")), "ROBBERY": to-number(eq($$393, "ROBBERY")), "ASSAULT": > to-number(eq($$393, "ASSAULT")), "BURGLARY": to-number(eq($$393, > "BURGLARY")), "KIDNAPPING": to-number(eq($$393, "KIDNAPPING")), > "PROSTITUTION": to-number(eq($$393, "PROSTITUTION")), "SECONDARY CODES": > to-number(eq($$393, "SECONDARY CODES")), "TENDERLOIN": to-number(eq($$394, > "TENDERLOIN")), "MISSION": to-number(eq($$394, "MISSION")), "SOUTHERN": > to-number(eq($$394, "SOUTHERN")), "PARK": to-number(eq($$394, "PARK")), > "TARAVAL": to-number(eq($$394, "TARAVAL")), "BAYVIEW": to-number(eq($$394, > "BAYVIEW")), "RICHMOND": to-number(eq($$394, "RICHMOND")), "CENTRAL": > to-number(eq($$394, "CENTRAL")), "INGLESIDE": to-number(eq($$394, > "INGLESIDE")), "NORTHERN": to-number(eq($$394, "NORTHERN")), "Tuesday": > to-number(eq($$395, "Tuesday")), "Monday": to-number(eq($$395, "Monday")), > "Sunday": to-number(eq($$395, "Sunday")), "Wednesday": to-number(eq($$395, > "Wednesday")), "Saturday": to-number(eq($$395, "Saturday")), "Friday": > to-number(eq($$395, "Friday")), "Thursday": to-number(eq($$395, > "Thursday"))})] > -- ASSIGN |PARTITIONED| > assign [$$427, $$395, $$394, $$393] <- [{ }, > $$435.getField("dayOfWeek"), $$435.getField("pdDistrict"), > $$435.getField("category")] > -- ASSIGN |PARTITIONED| > project ([$$435]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$435] <- > [object-remove(object-remove(object-remove(object-remove(object-remove($$t, > "address"), "description"), "incidntNum"), "location"), "id")] > -- ASSIGN |PARTITIONED| > project ([$$t]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > unnest-map [$$396, $$t] <- index-search("Crimes", > 0, "SF_CRIMES", "Crimes", FALSE, FALSE, 1, $$442, 1, $$443, TRUE, TRUE, TRUE) > -- BTREE_SEARCH |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > assign [$$442, $$443] <- [18600000506244, > 18600000506244] > -- ASSIGN |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE |PARTITIONED| > {code} > However, when putting the query into a function (called 'is_resolvable') and > passing in the same record (pdId = 18600000506244), it took *6.895 sec*. to > return the result and *5.654 sec*. just to generate the query plan. The query > is listed below along with its query plan. > {code:java} > SELECT is_resolvable(t) FROM (SELECT VALUE c FROM Crimes c WHERE > c.pdId=18600000506244) t; > {code} > {code:java} > distribute result [$$438] > -- DISTRIBUTE_RESULT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > project ([$$438]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$438] <- [{"$1": get-item($$436, 0)}] > -- ASSIGN |PARTITIONED| > project ([$$436]) > -- STREAM_PROJECT |PARTITIONED| > subplan { > aggregate [$$436] <- [listify($$435)] > -- AGGREGATE |LOCAL| > assign [$$435] <- > [getResolution(cast(object-remove(object-remove(object-remove(object-remove(object-remove(object-remove(object-remove(object-remove(object-concat-strict(if-missing-or-null(to-object($$466), > cast($$453)), {"resolved": to-number(neq($$446, "NONE"))}), "pdId"), > "category"), "dayOfWeek"), "resolution"), "pdDistrict"), "date"), "time"), > "resolved")))] > -- ASSIGN |LOCAL| > assign [$$446] <- [$$466.getField("resolution")] > -- ASSIGN |LOCAL| > assign [$$466] <- > [object-concat-strict(if-missing-or-null(to-object($$465), cast($$453)), > {"hour": get-hour(parse-time($$445, "hh:mm"))})] > -- ASSIGN |LOCAL| > assign [$$445] <- [$$465.getField("time")] > -- ASSIGN |LOCAL| > assign [$$465] <- > [object-concat-strict(if-missing-or-null(to-object($$464), cast($$453)), > {"month": get-month(parse-date($$444, "MM/DD/YYYY"))})] > -- ASSIGN |LOCAL| > assign [$$444] <- [$$464.getField("date")] > -- ASSIGN |LOCAL| > assign [$$464] <- > [object-concat-strict(if-missing-or-null(to-object($$461), cast($$453)), > {"VEHICLE THEFT": to-number(eq($$439, "VEHICLE THEFT")), "WARRANTS": > to-number(eq($$439, "WARRANTS")), "WEAPON LAWS": to-number(eq($$439, "WEAPON > LAWS")), "DRUG/NARCOTIC": to-number(eq($$439, "DRUG/NARCOTIC")), "SEX > OFFENSES, FORCIBLE": to-number(eq($$439, "SEX OFFENSES, FORCIBLE")), > "FORGERY/COUNTERFEITING": to-number(eq($$439, "FORGERY/COUNTERFEITING")), > "MISSING PERSON": to-number(eq($$439, "MISSING PERSON")), "VANDALISM": > to-number(eq($$439, "VANDALISM")), "DRUNKENNESS": to-number(eq($$439, > "DRUNKENNESS")), "STOLEN PROPERTY": to-number(eq($$439, "STOLEN PROPERTY")), > "SUSPICIOUS OCC": to-number(eq($$439, "SUSPICIOUS OCC")), "DISORDERLY > CONDUCT": to-number(eq($$439, "DISORDERLY CONDUCT")), "NON-CRIMINAL": > to-number(eq($$439, "NON-CRIMINAL")), "TRESPASS": to-number(eq($$439, > "TRESPASS")), "FRAUD": to-number(eq($$439, "FRAUD")), "LARCENY/THEFT": > to-number(eq($$439, "LARCENY/THEFT")), "OTHER OFFENSES": to-number(eq($$439, > "OTHER OFFENSES")), "ROBBERY": to-number(eq($$439, "ROBBERY")), "ASSAULT": > to-number(eq($$439, "ASSAULT")), "BURGLARY": to-number(eq($$439, > "BURGLARY")), "KIDNAPPING": to-number(eq($$439, "KIDNAPPING")), > "PROSTITUTION": to-number(eq($$439, "PROSTITUTION")), "SECONDARY CODES": > to-number(eq($$439, "SECONDARY CODES")), "TENDERLOIN": to-number(eq($$440, > "TENDERLOIN")), "MISSION": to-number(eq($$440, "MISSION")), "SOUTHERN": > to-number(eq($$440, "SOUTHERN")), "PARK": to-number(eq($$440, "PARK")), > "TARAVAL": to-number(eq($$440, "TARAVAL")), "BAYVIEW": to-number(eq($$440, > "BAYVIEW")), "RICHMOND": to-number(eq($$440, "RICHMOND")), "CENTRAL": > to-number(eq($$440, "CENTRAL")), "INGLESIDE": to-number(eq($$440, > "INGLESIDE")), "NORTHERN": to-number(eq($$440, "NORTHERN")), "Tuesday": > to-number(eq($$441, "Tuesday")), "Monday": to-number(eq($$441, "Monday")), > "Sunday": to-number(eq($$441, "Sunday")), "Wednesday": to-number(eq($$441, > "Wednesday")), "Saturday": to-number(eq($$441, "Saturday")), "Friday": > to-number(eq($$441, "Friday")), "Thursday": to-number(eq($$441, > "Thursday"))})] > -- ASSIGN |LOCAL| > assign [$$441, $$440, $$439] <- > [$$461.getField("dayOfWeek"), $$461.getField("pdDistrict"), > $$461.getField("category")] > -- ASSIGN |LOCAL| > assign [$$461] <- > [object-remove(object-remove(object-remove(object-remove(object-remove($$t, > "address"), "description"), "incidntNum"), "location"), "id")] > -- ASSIGN |LOCAL| > unnest $$t <- scan-collection($$237) > -- UNNEST |LOCAL| > nested tuple source > -- NESTED_TUPLE_SOURCE |LOCAL| > } > -- SUBPLAN |PARTITIONED| > project ([$$453, $$237]) > -- STREAM_PROJECT |PARTITIONED| > assign [$$453, $$237] <- [{ }, to-array($$c)] > -- ASSIGN |PARTITIONED| > project ([$$c]) > -- STREAM_PROJECT |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > unnest-map [$$442, $$c] <- index-search("Crimes", 0, > "SF_CRIMES", "Crimes", FALSE, FALSE, 1, $$467, 1, $$468, TRUE, TRUE, TRUE) > -- BTREE_SEARCH |PARTITIONED| > exchange > -- ONE_TO_ONE_EXCHANGE |PARTITIONED| > assign [$$467, $$468] <- [18600000506244, > 18600000506244] > -- ASSIGN |PARTITIONED| > empty-tuple-source > -- EMPTY_TUPLE_SOURCE |PARTITIONED| > {code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)