Joal has submitted this change and it was merged.

Change subject: Use webrequest_source text for AppSessionMetrics, mobile is 
merging with text
......................................................................


Use webrequest_source text for AppSessionMetrics, mobile is merging with text

Bug: T122651
Change-Id: I39eb113335303d0df3d2aced007c6decb9c7aa8d
---
M 
refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
M 
refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
2 files changed, 13 insertions(+), 11 deletions(-)

Approvals:
  Joal: Looks good to me, approved
  jenkins-bot: Verified



diff --git 
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
 
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
index 6d6cf00..9ef5b95 100644
--- 
a/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
+++ 
b/refinery-job/src/main/scala/org/wikimedia/analytics/refinery/job/AppSessionMetrics.scala
@@ -186,11 +186,11 @@
 
   /**
    * Generate list of Parquet file paths over a range of dates
-   * @param webrequestMobilePath Base path to webrequest mobile parquet data
+   * @param webrequestTextPath Base path to webrequest text parquet data
    * @param datesInfo Hashmap with report date related info
    * @return List of path strings like [".../day=1", ".../day=2"]
    */
-  def dateRangeToPathList(webrequestMobilePath: String, datesInfo: Map[String, 
Int]): List[String] = {
+  def dateRangeToPathList(webrequestTextPath: String, datesInfo: Map[String, 
Int]): List[String] = {
     //Custom iterator for stepping through LocalDate objects
     def makeDateRange(from: LocalDate, to: LocalDate, step: Period): 
Iterator[LocalDate] =
       Iterator.iterate(from)(_.plus(step)).takeWhile(_.isBefore(to))
@@ -198,7 +198,7 @@
     val dateStart = new LocalDate(datesInfo("year"), datesInfo("month"), 
datesInfo("day"))
     val dateEnd = dateStart.plusDays(datesInfo("periodDays"))
     val dateRange = makeDateRange(dateStart, dateEnd, new Period().withDays(1))
-    dateRange.toList.map(dt => 
"%s/year=%d/month=%d/day=%d".format(webrequestMobilePath, dt.getYear, 
dt.getMonthOfYear, dt.getDayOfMonth))
+    dateRange.toList.map(dt => 
"%s/year=%d/month=%d/day=%d".format(webrequestTextPath, dt.getYear, 
dt.getMonthOfYear, dt.getDayOfMonth))
   }
 
   /**
@@ -211,7 +211,8 @@
    */
   def pathListToUuidDataframe(paths: List[String], sqlContext: SQLContext): 
DataFrame = {
     sqlContext.parquetFile(paths: _*)
-      .filter("is_pageview and x_analytics_map['wmfuuid'] is not null and 
x_analytics_map['wmfuuid'] != ''")
+      .filter("is_pageview and access_method = 'mobile app' " +
+        "and x_analytics_map['wmfuuid'] is not null and 
x_analytics_map['wmfuuid'] != ''")
       .selectExpr("x_analytics_map['wmfuuid'] as wmfuuid", "CAST(ts AS int) as 
ts")
   }
 
@@ -349,12 +350,13 @@
         sqlContext.setConf("spark.sql.parquet.compression.codec", "snappy")
 
         // Generate a list of all parquet file paths to read given the 
webrequest base path,
-        // and all dates related information
-        val webrequestMobilePath = params.webrequestBasePath + 
"/webrequest_source=mobile"
+        // and all dates related information.  NOTE: As of January 2016,
+        // mobile web caches have been merged with text, so 
webrequest_source=text.
+        val webrequestTextPath = params.webrequestBasePath + 
"/webrequest_source=text"
         // Helper hashmap with all date related information to avoid passing 
around lots of params
         val datesInfo = HashMap("year" -> params.year, "month" -> 
params.month, "day" -> params.day, "periodDays" -> params.periodDays)
         // List of path strings like [".../day=1", ".../day=2"]
-        val webrequestPaths = dateRangeToPathList(webrequestMobilePath, 
datesInfo)
+        val webrequestPaths = dateRangeToPathList(webrequestTextPath, 
datesInfo)
 
         // Get sessions data for all users, calculate stats for different 
metrics,
         // and get the stats in a printable string format to output
diff --git 
a/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
 
b/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
index 27064ec..254f317 100644
--- 
a/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
+++ 
b/refinery-job/src/test/scala/org/wikimedia/analytics/refinery/job/AppSessionSuite.scala
@@ -34,15 +34,15 @@
 
   test("List of parquet paths is generated correctly based on the report run 
date and period") {
     val datesInfo = HashMap("year" -> 2015, "month" -> 5, "day" -> 10, 
"periodDays" -> 10)
-    val webrequestMobilePath = ".../webrequest_source=mobile"
-    val pathList = AppSessionMetrics.dateRangeToPathList(webrequestMobilePath, 
datesInfo)
+    val webrequestTextPath = ".../webrequest_source=text"
+    val pathList = AppSessionMetrics.dateRangeToPathList(webrequestTextPath, 
datesInfo)
 
     //Assert the length of the list equals report period in days
     assert(pathList.length == datesInfo("periodDays"))
 
     //Assert the paths are being generated correctly
-    assert(pathList.head == 
".../webrequest_source=mobile/year=2015/month=5/day=10")
-    assert(pathList.last == 
".../webrequest_source=mobile/year=2015/month=5/day=19")
+    assert(pathList.head == 
".../webrequest_source=text/year=2015/month=5/day=10")
+    assert(pathList.last == 
".../webrequest_source=text/year=2015/month=5/day=19")
   }
 
 }
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/264868
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I39eb113335303d0df3d2aced007c6decb9c7aa8d
Gerrit-PatchSet: 3
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: Joal <j...@wikimedia.org>
Gerrit-Reviewer: Madhuvishy <mviswanat...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to