HeartSaVioR commented on a change in pull request #28040: [SPARK-31278][SS] Fix
StreamingQuery output rows metric
URL: https://github.com/apache/spark/pull/28040#discussion_r399723128
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
##########
@@ -253,9 +253,11 @@ abstract class FileStreamSinkSuite extends StreamTest {
addTimestamp(104, 123) // watermark = 90 before this, watermark = 123 -
10 = 113 after this
check((100L, 105L) -> 2L) // no-data-batch emits results on 100-105,
+ assert(query.lastProgress.sink.numOutputRows === 1)
Review comment:
The value is -1 instead of 0 if it doesn't support output metrics, and as
you can see the error message in build, here the value is 0 instead of -1,
because the patch overwrites the value to 0 when the batch hasn't run. So yes
the last progress here is for "no data & no run", though the new commit should
fix this problem.
> V1 suite
```
{
"id" : "1bbf91ac-0a24-4da7-bfe8-54ce1ac63e0f",
"runId" : "ac738c58-a976-4c87-9547-b4a1ee1c2560",
"name" : null,
"timestamp" : "2020-03-28T23:33:08.567Z",
"batchId" : 0,
"numInputRows" : 1,
"inputRowsPerSecond" : 83.33333333333333,
"processedRowsPerSecond" : 0.3835826620636747,
"durationMs" : {
"addBatch" : 2055,
"getBatch" : 2,
"latestOffset" : 0,
"queryPlanning" : 449,
"triggerExecution" : 2607,
"walCommit" : 49
},
"eventTime" : {
"avg" : "1970-01-01T00:01:40.000Z",
"max" : "1970-01-01T00:01:40.000Z",
"min" : "1970-01-01T00:01:40.000Z",
"watermark" : "1970-01-01T00:00:00.000Z"
},
"stateOperators" : [ {
"numRowsTotal" : 1,
"numRowsUpdated" : 1,
"memoryUsedBytes" : 1400,
"customMetrics" : {
"loadedMapCacheHitCount" : 0,
"loadedMapCacheMissCount" : 0,
"stateOnCurrentVersionSizeBytes" : 680
}
} ],
"sources" : [ {
"description" : "MemoryStream[value#1L]",
"startOffset" : null,
"endOffset" : 0,
"numInputRows" : 1,
"inputRowsPerSecond" : 83.33333333333333,
"processedRowsPerSecond" : 0.3835826620636747
} ],
"sink" : {
"description" :
"FileSink[/private/var/folders/wn/3hpqx8015hjbmq43hmrw78z40000gn/T/stream.output-cf800c40-1e18-405e-b48f-71a08348a298]",
"numOutputRows" : -1
}
}
{
"id" : "1bbf91ac-0a24-4da7-bfe8-54ce1ac63e0f",
"runId" : "ac738c58-a976-4c87-9547-b4a1ee1c2560",
"name" : null,
"timestamp" : "2020-03-28T23:33:11.185Z",
"batchId" : 1,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0,
"durationMs" : {
"addBatch" : 935,
"getBatch" : 0,
"latestOffset" : 0,
"queryPlanning" : 52,
"triggerExecution" : 1101,
"walCommit" : 70
},
"eventTime" : {
"watermark" : "1970-01-01T00:01:30.000Z"
},
"stateOperators" : [ {
"numRowsTotal" : 1,
"numRowsUpdated" : 0,
"memoryUsedBytes" : 2272,
"customMetrics" : {
"loadedMapCacheHitCount" : 10,
"loadedMapCacheMissCount" : 0,
"stateOnCurrentVersionSizeBytes" : 720
}
} ],
"sources" : [ {
"description" : "MemoryStream[value#1L]",
"startOffset" : 0,
"endOffset" : 0,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0
} ],
"sink" : {
"description" :
"FileSink[/private/var/folders/wn/3hpqx8015hjbmq43hmrw78z40000gn/T/stream.output-cf800c40-1e18-405e-b48f-71a08348a298]",
"numOutputRows" : -1
}
}
{
"id" : "1bbf91ac-0a24-4da7-bfe8-54ce1ac63e0f",
"runId" : "ac738c58-a976-4c87-9547-b4a1ee1c2560",
"name" : null,
"timestamp" : "2020-03-28T23:33:12.287Z",
"batchId" : 2,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0,
"durationMs" : {
"latestOffset" : 0,
"triggerExecution" : 0
},
"eventTime" : {
"watermark" : "1970-01-01T00:01:30.000Z"
},
"stateOperators" : [ {
"numRowsTotal" : 1,
"numRowsUpdated" : 0,
"memoryUsedBytes" : 2272,
"customMetrics" : {
"loadedMapCacheHitCount" : 10,
"loadedMapCacheMissCount" : 0,
"stateOnCurrentVersionSizeBytes" : 720
}
} ],
"sources" : [ {
"description" : "MemoryStream[value#1L]",
"startOffset" : 0,
"endOffset" : 0,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0
} ],
"sink" : {
"description" :
"FileSink[/private/var/folders/wn/3hpqx8015hjbmq43hmrw78z40000gn/T/stream.output-cf800c40-1e18-405e-b48f-71a08348a298]",
"numOutputRows" : 0
}
}
{
"id" : "1bbf91ac-0a24-4da7-bfe8-54ce1ac63e0f",
"runId" : "ac738c58-a976-4c87-9547-b4a1ee1c2560",
"name" : null,
"timestamp" : "2020-03-28T23:33:13.066Z",
"batchId" : 2,
"numInputRows" : 2,
"inputRowsPerSecond" : 153.84615384615384,
"processedRowsPerSecond" : 3.2258064516129035,
"durationMs" : {
"addBatch" : 482,
"getBatch" : 0,
"latestOffset" : 0,
"queryPlanning" : 50,
"triggerExecution" : 620,
"walCommit" : 44
},
"eventTime" : {
"avg" : "1970-01-01T00:01:53.500Z",
"max" : "1970-01-01T00:02:03.000Z",
"min" : "1970-01-01T00:01:44.000Z",
"watermark" : "1970-01-01T00:01:30.000Z"
},
"stateOperators" : [ {
"numRowsTotal" : 2,
"numRowsUpdated" : 2,
"memoryUsedBytes" : 2584,
"customMetrics" : {
"loadedMapCacheHitCount" : 20,
"loadedMapCacheMissCount" : 0,
"stateOnCurrentVersionSizeBytes" : 920
}
} ],
"sources" : [ {
"description" : "MemoryStream[value#1L]",
"startOffset" : 0,
"endOffset" : 1,
"numInputRows" : 2,
"inputRowsPerSecond" : 153.84615384615384,
"processedRowsPerSecond" : 3.2258064516129035
} ],
"sink" : {
"description" :
"FileSink[/private/var/folders/wn/3hpqx8015hjbmq43hmrw78z40000gn/T/stream.output-cf800c40-1e18-405e-b48f-71a08348a298]",
"numOutputRows" : -1
}
}
{
"id" : "1bbf91ac-0a24-4da7-bfe8-54ce1ac63e0f",
"runId" : "ac738c58-a976-4c87-9547-b4a1ee1c2560",
"name" : null,
"timestamp" : "2020-03-28T23:33:13.688Z",
"batchId" : 3,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0,
"durationMs" : {
"addBatch" : 987,
"getBatch" : 0,
"latestOffset" : 0,
"queryPlanning" : 43,
"triggerExecution" : 1117,
"walCommit" : 44
},
"eventTime" : {
"watermark" : "1970-01-01T00:01:53.000Z"
},
"stateOperators" : [ {
"numRowsTotal" : 1,
"numRowsUpdated" : 0,
"memoryUsedBytes" : 2512,
"customMetrics" : {
"loadedMapCacheHitCount" : 30,
"loadedMapCacheMissCount" : 0,
"stateOnCurrentVersionSizeBytes" : 720
}
} ],
"sources" : [ {
"description" : "MemoryStream[value#1L]",
"startOffset" : 1,
"endOffset" : 1,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0
} ],
"sink" : {
"description" :
"FileSink[/private/var/folders/wn/3hpqx8015hjbmq43hmrw78z40000gn/T/stream.output-cf800c40-1e18-405e-b48f-71a08348a298]",
"numOutputRows" : -1
}
}
{
"id" : "1bbf91ac-0a24-4da7-bfe8-54ce1ac63e0f",
"runId" : "ac738c58-a976-4c87-9547-b4a1ee1c2560",
"name" : null,
"timestamp" : "2020-03-28T23:33:14.806Z",
"batchId" : 4,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0,
"durationMs" : {
"latestOffset" : 0,
"triggerExecution" : 0
},
"eventTime" : {
"watermark" : "1970-01-01T00:01:53.000Z"
},
"stateOperators" : [ {
"numRowsTotal" : 1,
"numRowsUpdated" : 0,
"memoryUsedBytes" : 2512,
"customMetrics" : {
"loadedMapCacheHitCount" : 30,
"loadedMapCacheMissCount" : 0,
"stateOnCurrentVersionSizeBytes" : 720
}
} ],
"sources" : [ {
"description" : "MemoryStream[value#1L]",
"startOffset" : 1,
"endOffset" : 1,
"numInputRows" : 0,
"inputRowsPerSecond" : 0.0,
"processedRowsPerSecond" : 0.0
} ],
"sink" : {
"description" :
"FileSink[/private/var/folders/wn/3hpqx8015hjbmq43hmrw78z40000gn/T/stream.output-cf800c40-1e18-405e-b48f-71a08348a298]",
"numOutputRows" : 0
}
}
```
> V2 suite
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]