[MediaWiki-commits] [Gerrit] Use webrequest_source text for AppSessionMetrics, mobile is ... - change (analytics...source)

2016-01-19 Thread Joal (Code Review)
Joal has submitted this change and it was merged.

Change subject: Use webrequest_source text for AppSessionMetrics, mobile is 
merging with text
..


Use webrequest_source text for AppSessionMetrics, mobile is merging with text

Bug: T122651
Change-Id: I39eb113335303d0df3d2aced007c6decb9c7aa8d
---
M 
refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
M 
refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
2 files changed, 13 insertions(+), 11 deletions(-)

Approvals:
  Joal: Looks good to me, approved
  jenkins-bot: Verified



diff --git 
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
 
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
index 6d6cf00..9ef5b95 100644
--- 
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
+++ 
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
@@ -186,11 +186,11 @@
 
   /**
* Generate list of Parquet file paths over a range of dates
-   * @param webrequestMobilePath Base path to webrequest mobile parquet data
+   * @param webrequestTextPath Base path to webrequest text parquet data
* @param datesInfo Hashmap with report date related info
* @return List of path strings like [".../day=1", ".../day=2"]
*/
-  def dateRangeToPathList(webrequestMobilePath: String, datesInfo: Map[String, 
Int]): List[String] = {
+  def dateRangeToPathList(webrequestTextPath: String, datesInfo: Map[String, 
Int]): List[String] = {
 //Custom iterator for stepping through LocalDate objects
 def makeDateRange(from: LocalDate, to: LocalDate, step: Period): 
Iterator[LocalDate] =
   Iterator.iterate(from)(_.plus(step)).takeWhile(_.isBefore(to))
@@ -198,7 +198,7 @@
 val dateStart = new LocalDate(datesInfo("year"), datesInfo("month"), 
datesInfo("day"))
 val dateEnd = dateStart.plusDays(datesInfo("periodDays"))
 val dateRange = makeDateRange(dateStart, dateEnd, new Period().withDays(1))
-dateRange.toList.map(dt => 
"%s/year=%d/month=%d/day=%d".format(webrequestMobilePath, dt.getYear, 
dt.getMonthOfYear, dt.getDayOfMonth))
+dateRange.toList.map(dt => 
"%s/year=%d/month=%d/day=%d".format(webrequestTextPath, dt.getYear, 
dt.getMonthOfYear, dt.getDayOfMonth))
   }
 
   /**
@@ -211,7 +211,8 @@
*/
   def pathListToUuidDataframe(paths: List[String], sqlContext: SQLContext): 
DataFrame = {
 sqlContext.parquetFile(paths: _*)
-  .filter("is_pageview and x_analytics_map['wmfuuid'] is not null and 
x_analytics_map['wmfuuid'] != ''")
+  .filter("is_pageview and access_method = 'mobile app' " +
+"and x_analytics_map['wmfuuid'] is not null and 
x_analytics_map['wmfuuid'] != ''")
   .selectExpr("x_analytics_map['wmfuuid'] as wmfuuid", "CAST(ts AS int) as 
ts")
   }
 
@@ -349,12 +350,13 @@
 sqlContext.setConf("spark.sql.parquet.compression.codec", "snappy")
 
 // Generate a list of all parquet file paths to read given the 
webrequest base path,
-// and all dates related information
-val webrequestMobilePath = params.webrequestBasePath + 
"/webrequest_source=mobile"
+// and all dates related information.  NOTE: As of January 2016,
+// mobile web caches have been merged with text, so 
webrequest_source=text.
+val webrequestTextPath = params.webrequestBasePath + 
"/webrequest_source=text"
 // Helper hashmap with all date related information to avoid passing 
around lots of params
 val datesInfo = HashMap("year" -> params.year, "month" -> 
params.month, "day" -> params.day, "periodDays" -> params.periodDays)
 // List of path strings like [".../day=1", ".../day=2"]
-val webrequestPaths = dateRangeToPathList(webrequestMobilePath, 
datesInfo)
+val webrequestPaths = dateRangeToPathList(webrequestTextPath, 
datesInfo)
 
 // Get sessions data for all users, calculate stats for different 
metrics,
 // and get the stats in a printable string format to output
diff --git 
a/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
 
b/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
index 27064ec..254f317 100644
--- 
a/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
+++ 
b/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
@@ -34,15 +34,15 @@
 
   test("List of parquet paths is generated correctly based on the report run 
date and period") {
 val datesInfo = HashMap("year" -> 2015, "month" -> 5, "day" -> 10, 
"periodDays" -> 10)
-val webrequestMobilePath = ".../webrequest_source=mobile"
-val pathList = AppSessionMetrics.dateRangeToPathList(webrequestMobilePath, 
datesInfo)
+val 

[MediaWiki-commits] [Gerrit] Use webrequest_source text for AppSessionMetrics, mobile is ... - change (analytics...source)

2016-01-18 Thread Ottomata (Code Review)
Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/264868

Change subject: Use webrequest_source text for AppSessionMetrics, mobile is 
merging with text
..

Use webrequest_source text for AppSessionMetrics, mobile is merging with text

Bug: T122651
Change-Id: I39eb113335303d0df3d2aced007c6decb9c7aa8d
---
M 
refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
1 file changed, 3 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source 
refs/changes/68/264868/1

diff --git 
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
 
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
index 6d6cf00..92eb4b7 100644
--- 
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
+++ 
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
@@ -349,8 +349,9 @@
 sqlContext.setConf("spark.sql.parquet.compression.codec", "snappy")
 
 // Generate a list of all parquet file paths to read given the 
webrequest base path,
-// and all dates related information
-val webrequestMobilePath = params.webrequestBasePath + 
"/webrequest_source=mobile"
+// and all dates related information.  NOTE: As of January 2016,
+// mobile web caches have been merged with text, so 
webrequest_source=text.
+val webrequestMobilePath = params.webrequestBasePath + 
"/webrequest_source=text"
 // Helper hashmap with all date related information to avoid passing 
around lots of params
 val datesInfo = HashMap("year" -> params.year, "month" -> 
params.month, "day" -> params.day, "periodDays" -> params.periodDays)
 // List of path strings like [".../day=1", ".../day=2"]

-- 
To view, visit https://gerrit.wikimedia.org/r/264868
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I39eb113335303d0df3d2aced007c6decb9c7aa8d
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Ottomata 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits