Joal has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/405867 )
Change subject: Update camus part checker topic name normalization
......................................................................
Update camus part checker topic name normalization
Camus replaces dots by underscores in topic names when using them
as folders. This patch replicate this behavior for the partition
checker.
Bug: T171099
Change-Id: I13c728abd3c5fa0432c1b1019287062224f6d356
---
M
refinery-camus/src/main/scala/org/wikimedia/analytics/refinery/camus/CamusPartitionChecker.scala
M
refinery-camus/src/test/scala/org/wikimedia/analytics/refinery/camus/TestCamusPartitionChecker.scala
2 files changed, 14 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source
refs/changes/67/405867/1
diff --git
a/refinery-camus/src/main/scala/org/wikimedia/analytics/refinery/camus/CamusPartitionChecker.scala
b/refinery-camus/src/main/scala/org/wikimedia/analytics/refinery/camus/CamusPartitionChecker.scala
index 2a62e4d..500b68f 100644
---
a/refinery-camus/src/main/scala/org/wikimedia/analytics/refinery/camus/CamusPartitionChecker.scala
+++
b/refinery-camus/src/main/scala/org/wikimedia/analytics/refinery/camus/CamusPartitionChecker.scala
@@ -60,9 +60,11 @@
}
}
+ // Replacing dots by underscores in topic names as per
+ //
https://github.com/wikimedia/analytics-camus/blob/master/camus-etl-kafka/src/main/java/com/linkedin/camus/etl/kafka/partitioner/DefaultPartitioner.java#L67
def partitionDirectory(base: String, topic: String, year: Int, month: Int,
day: Int, hour: Int): String = {
if ((! StringUtils.isEmpty(base)) && (! StringUtils.isEmpty(topic)))
-
f"${base}%s/${topic}%s/hourly/${year}%04d/${month}%02d/${day}%02d/${hour}%02d"
+ f"${base}%s/${topic.replaceAll("\\.",
"_")}%s/hourly/${year}%04d/${month}%02d/${day}%02d/${hour}%02d"
else
throw new IllegalArgumentException("Can't make partition directory with
empty base or topic.")
}
diff --git
a/refinery-camus/src/test/scala/org/wikimedia/analytics/refinery/camus/TestCamusPartitionChecker.scala
b/refinery-camus/src/test/scala/org/wikimedia/analytics/refinery/camus/TestCamusPartitionChecker.scala
index 43bb037..6ff5f7e 100644
---
a/refinery-camus/src/test/scala/org/wikimedia/analytics/refinery/camus/TestCamusPartitionChecker.scala
+++
b/refinery-camus/src/test/scala/org/wikimedia/analytics/refinery/camus/TestCamusPartitionChecker.scala
@@ -66,6 +66,17 @@
partitionDir should equal(expectedDir)
}
+ it should "compute partition directory with dotted topic" in {
+ val base = "/test/base/folder"
+ val topic = "topic.test"
+ val (year, month, day, hour) = (2015, 9, 28, 1)
+
+ val partitionDir = CamusPartitionChecker.partitionDirectory(base, topic,
year, month, day, hour)
+ val expectedDir = "/test/base/folder/topic_test/hourly/2015/09/28/01"
+
+ partitionDir should equal(expectedDir)
+ }
+
it should "fail computing partition directory if no base" in {
val base = null
val topic = "topic"
--
To view, visit https://gerrit.wikimedia.org/r/405867
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I13c728abd3c5fa0432c1b1019287062224f6d356
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits