Joal has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/382733 )
Change subject: Correct bugs in mediawiki_history scripts
......................................................................
Correct bugs in mediawiki_history scripts
A bug was introduced in mediawiki-history creation
schema, and another one in mediawiki-history druid
loading job.
This patch corrects both.
Change-Id: I4db0a630b3a0e7dc13c4f18158ef1b0210b447a0
---
M hive/mediawiki/history/create_mediawiki_history_table.hql
M oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
2 files changed, 17 insertions(+), 7 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/33/382733/1
diff --git a/hive/mediawiki/history/create_mediawiki_history_table.hql
b/hive/mediawiki/history/create_mediawiki_history_table.hql
index 74e9e5d..f707b26 100644
--- a/hive/mediawiki/history/create_mediawiki_history_table.hql
+++ b/hive/mediawiki/history/create_mediawiki_history_table.hql
@@ -1,5 +1,10 @@
-- Creates table statement for mediawiki_history table.
--
+-- WARNING: Timestamp fields are commented in that files
+-- because our version of hive doesn't support them.
+-- Waiting for us to upgrade to hive 1.2 or higher
+-- to update the fields.
+--
-- Parameters:
-- <none>
--
@@ -12,7 +17,8 @@
`wiki_db` string COMMENT
'enwiki, dewiki, eswiktionary, etc.',
`event_entity` string COMMENT
'revision, user or page',
`event_type` string COMMENT
'create, move, delete, etc. Detailed explanation in the docs under
#Event_types',
- `event_timestamp` timestamp COMMENT 'When
this event ocurred',
+ `event_timestamp` string COMMENT 'When
this event ocurred',
+ --`event_timestamp` timestamp COMMENT
'When this event ocurred',
`event_comment` string COMMENT
'Comment related to this event, sourced from log_comment, rev_comment, etc.',
`event_user_id` bigint COMMENT 'Id of
the user that caused the event',
`event_user_text` string COMMENT
'Historical text of the user that caused the event',
@@ -26,7 +32,8 @@
`event_user_is_created_by_peer` boolean COMMENT
'Whether the event_user account was created by another user',
`event_user_is_anonymous` boolean COMMENT
'Whether the event_user is not registered',
`event_user_is_bot_by_name` boolean COMMENT
'Whether the event_user\'s name matches patterns we use to identify bots',
- `event_user_creation_timestamp` timestamp COMMENT
'Registration timestamp of the user that caused the event',
+ --`event_user_creation_timestamp` timestamp COMMENT
'Registration timestamp of the user that caused the event',
+ `event_user_creation_timestamp` string COMMENT
'Registration timestamp of the user that caused the event',
`event_user_revision_count` bigint COMMENT
'Cumulative revision count per user for the current event_user_id (only
available in revision-create events so far)',
`event_user_seconds_since_previous_revision` bigint COMMENT 'In
revision events: seconds elapsed since the previous revision made by the
current event_user_id (only available in revision-create events so far)',
@@ -38,7 +45,8 @@
`page_namespace_latest` int COMMENT 'In
revision/page events: current namespace of the page',
`page_namespace_is_content_latest` boolean COMMENT 'In
revision/page events: current namespace of the page is categorized as content',
`page_is_redirect_latest` boolean COMMENT 'In
revision/page events: whether the page is currently a redirect',
- `page_creation_timestamp` timestamp COMMENT 'In
revision/page events: creation timestamp of the page',
+ --`page_creation_timestamp` timestamp COMMENT 'In
revision/page events: creation timestamp of the page',
+ `page_creation_timestamp` string COMMENT 'In
revision/page events: creation timestamp of the page',
`page_revision_count` bigint COMMENT 'In
revision/page events: Cumulative revision count per page for the current
page_id (only available in revision-create events so far)',
`page_seconds_since_previous_revision` bigint COMMENT 'In
revision/page events: seconds elapsed since the previous revision made on the
current page_id (only available in revision-create events so far)',
@@ -54,7 +62,8 @@
`user_is_created_by_peer` boolean COMMENT 'In
user events: whether the user account was created by another user',
`user_is_anonymous` boolean COMMENT 'In
user events: whether the user is not registered',
`user_is_bot_by_name` boolean COMMENT 'In
user events: whether the user\'s name matches patterns we use to identify bots',
- `user_creation_timestamp` timestamp COMMENT 'In
user events: registration timestamp of the user.',
+ --`user_creation_timestamp` timestamp COMMENT 'In
user events: registration timestamp of the user.',
+ `user_creation_timestamp` string COMMENT 'In
user events: registration timestamp of the user.',
`revision_id` bigint COMMENT 'In
revision events: id of the revision',
`revision_parent_id` bigint COMMENT 'In
revision events: id of the parent revision',
@@ -65,7 +74,8 @@
`revision_content_model` string COMMENT 'In
revision events: content model of revision',
`revision_content_format` string COMMENT 'In
revision events: content format of revision',
`revision_is_deleted` boolean COMMENT 'In
revision events: whether this revision has been deleted (moved to archive
table)',
- `revision_deleted_timestamp` timestamp COMMENT 'In
revision events: the timestamp when the revision was deleted',
+ --`revision_deleted_timestamp` timestamp COMMENT 'In
revision events: the timestamp when the revision was deleted',
+ `revision_deleted_timestamp` string COMMENT 'In
revision events: the timestamp when the revision was deleted',
`revision_is_identity_reverted` boolean COMMENT 'In
revision events: whether this revision was reverted by another future revision',
`revision_first_identity_reverting_revision_id` bigint COMMENT 'In
revision events: id of the revision that reverted this revision',
`revision_seconds_to_identity_revert` bigint COMMENT 'In
revision events: seconds elapsed between revision posting and its revert (if
there was one)',
diff --git a/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
b/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
index fc32a1a..d28b3a3 100644
--- a/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
+++ b/oozie/mediawiki/history/druid/generate_json_mediawiki_history.hql
@@ -116,7 +116,7 @@
CASE WHEN event_user_is_bot_by_name THEN 1 ELSE 0 END AS
event_user_is_bot_by_name,
event_user_creation_timestamp,
event_user_revision_count,
- event_user_seconds_to_previous_revision,
+ event_user_seconds_since_previous_revision,
page_id,
page_title,
@@ -128,7 +128,7 @@
CASE WHEN page_is_redirect_latest THEN 1 ELSE 0 END AS
page_is_redirect_latest,
page_creation_timestamp,
page_revision_count,
- page_seconds_to_previous_revision,
+ page_seconds_since_previous_revision,
user_id,
user_text,
--
To view, visit https://gerrit.wikimedia.org/r/382733
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I4db0a630b3a0e7dc13c4f18158ef1b0210b447a0
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits