[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550270#comment-16550270
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user akshita-malhotra commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203943433
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread akshita-malhotra
Github user akshita-malhotra commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203943433
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;
+upsertData();
+split(sourceTableName, 4);
+split(targetTableName, 2);
+// ensure scans for each table touch multiple region servers
+ensureRegionsOnDifferentServers(sourceTableName);
+ 

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550241#comment-16550241
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203937295
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203937295
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+private String targetTableName;
+private String targetZkQuorum;
+private String sqlConditions;
+private long timestamp;
+
+VerifyReplicationTool(Configuration conf) {
+this.conf = 

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550239#comment-16550239
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203937013
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550240#comment-16550240
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203937052
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203937052
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+private String targetTableName;
+private String targetZkQuorum;
+private String sqlConditions;
+private long timestamp;
+
+VerifyReplicationTool(Configuration conf) {
+this.conf = 

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203937013
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+private String targetTableName;
+private String targetZkQuorum;
+private String sqlConditions;
+private long timestamp;
+
+VerifyReplicationTool(Configuration conf) {
+this.conf = 

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550202#comment-16550202
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203933573
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;
+   

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203933573
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;
+upsertData();
+split(sourceTableName, 4);
+split(targetTableName, 2);
+// ensure scans for each table touch multiple region servers
+ensureRegionsOnDifferentServers(sourceTableName);
--- 

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550195#comment-16550195
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203933404
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;
+   

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203933404
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;
+upsertData();
+split(sourceTableName, 4);
+split(targetTableName, 2);
+// ensure scans for each table touch multiple region servers
+ensureRegionsOnDifferentServers(sourceTableName);
+ 

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550190#comment-16550190
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203932680
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;
+   

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203932680
  
--- Diff: 
phoenix-core/src/it/java/org/apache/phoenix/mapreduce/VerifyReplicationToolIT.java
 ---
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.*;
+
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.apache.phoenix.util.ReadOnlyProps;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class VerifyReplicationToolIT extends BaseUniqueNamesOwnClusterIT {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationToolIT.class);
+private static final String CREATE_USER_TABLE = "CREATE TABLE IF NOT 
EXISTS %s ( " +
+" TENANT_ID VARCHAR NOT NULL, USER_ID VARCHAR NOT NULL, AGE 
INTEGER " +
+" CONSTRAINT pk PRIMARY KEY ( TENANT_ID, USER_ID ))";
+private static final String UPSERT_USER = "UPSERT INTO %s VALUES (?, 
?, ?)";
+private static final String UPSERT_SELECT_USERS =
+"UPSERT INTO %s SELECT TENANT_ID, USER_ID, %d FROM %s WHERE 
TENANT_ID = ? LIMIT %d";
+private static final Random RANDOM = new Random();
+
+private static int tenantNum = 0;
+private static int userNum = 0;
+private static String sourceTableName;
+private static String targetTableName;
+private List sourceTenants;
+private String sourceOnlyTenant;
+private String sourceAndTargetTenant;
+private String targetOnlyTenant;
+
+@BeforeClass
+public static void createTables() throws Exception {
+NUM_SLAVES_BASE = 2;
+Map props = Maps.newHashMapWithExpectedSize(1);
+setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+Connection conn = DriverManager.getConnection(getUrl());
+sourceTableName = generateUniqueName();
+targetTableName = generateUniqueName();
+// tables will have the same schema, but a different number of 
regions
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
sourceTableName));
+conn.createStatement().execute(String.format(CREATE_USER_TABLE, 
targetTableName));
+conn.commit();
+}
+
+@Before
+public void setupTenants() throws Exception {
+sourceTenants = new ArrayList<>(2);
+sourceTenants.add("tenant" + tenantNum++);
+sourceTenants.add("tenant" + tenantNum++);
+sourceOnlyTenant = sourceTenants.get(0);
+sourceAndTargetTenant = sourceTenants.get(1);
+targetOnlyTenant = "tenant" + tenantNum++;
+upsertData();
+split(sourceTableName, 4);
+split(targetTableName, 2);
+// ensure scans for each table touch multiple region servers
+ensureRegionsOnDifferentServers(sourceTableName);
+ 

Re:PhoenixCon Archive Page

2018-07-19 Thread Clay Baenziger (BLOOMBERG/ 731 LEX)
I was able to get a pointer to Anirudha and Gabriel's PhoenixCon 2017 talk, so 
I updated the archive again in 
https://github.com/cbaenziger/phoenix-site/commit/36b428063c512fb23fcafeeb4af229488fe6c3f4

-Clay

From: dev@phoenix.apache.org At: 07/19/18 17:11:46To:  DEV@PHOENIX.APACHE.ORG
Subject: PhoenixCon Archive Page

Hi all,

I don't know how best to recommend a change to the Phoenix-Site SVN repo 
(Phoenix JIRA's seem only built against the code repo) but I have a patch for 
the PhoenixCon Archive page including the recent PhoenixCon 2018.

Also, can any speakers from PhoenixCon 2017, please provide pointers to their 
slides!

I used OpenOffice to write this and follow the format of the HBaseCon Archive 
page. I cleaned up the OpenOffice generated HTML, moved it to XHTML, ran 
xmllint --format on it and verified it passes the W3C's validator tool.

I have the updated files at: 
https://github.com/cbaenziger/phoenix-site/commit/e508266a3fef0ef86127871692f7c02c256d47a8

What is the best way to get this committed, if okay?

-Clay



[GitHub] phoenix issue #308: Client-side hash aggregation

2018-07-19 Thread geraldss
Github user geraldss commented on the issue:

https://github.com/apache/phoenix/pull/308
  
@JamesRTaylor - I made the changes and they are ready for review. Thanks.


---


[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550161#comment-16550161
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203927762
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203927762
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+private String targetTableName;
+private String targetZkQuorum;
+private String sqlConditions;
+private long timestamp;
+
+VerifyReplicationTool(Configuration conf) {
+this.conf = 

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550160#comment-16550160
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203927581
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203927581
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/VerifyReplicationTool.java
 ---
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you maynot use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicablelaw or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.iterate.ResultIterator;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.apache.phoenix.util.EnvironmentEdgeManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+/**
+ * Map only job that compares data across a source and target table. The 
target table can be on the
+ * same cluster or on a remote cluster. SQL conditions may be specified to 
compare only a subset of
+ * both tables.
+ */
+public class VerifyReplicationTool implements Tool {
+private static final Logger LOG = 
LoggerFactory.getLogger(VerifyReplicationTool.class);
+
+static final Option
+ZK_QUORUM_OPT =
+new Option("z", "zookeeper", true, "ZooKeeper connection 
details (optional)");
+static final Option
+TABLE_NAME_OPT =
+new Option("t", "table", true, "Phoenix table name 
(required)");
+static final Option
+TARGET_TABLE_NAME_OPT =
+new Option("tt", "target-table", true, "Target Phoenix table 
name (optional)");
+static final Option
+TARGET_ZK_QUORUM_OPT =
+new Option("tz", "target-zookeeper", true,
+"Target ZooKeeper connection details (optional)");
+static final Option
+CONDITIONS_OPT =
+new Option("c", "conditions", true,
+"Conditions for select query WHERE clause (optional)");
+static final Option TIMESTAMP =
+new Option("ts", "timestamp", true,
+"Timestamp in millis used to compare the two tables.  
Defaults to current time minus 60 seconds");
+
+static final Option HELP_OPT = new Option("h", "help", false, "Show 
this help and quit");
+
+private Configuration conf;
+
+private String zkQuorum;
+private String tableName;
+private String targetTableName;
+private String targetZkQuorum;
+private String sqlConditions;
+private long timestamp;
+
+VerifyReplicationTool(Configuration conf) {
+this.conf = 

[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16550154#comment-16550154
 ] 

ASF GitHub Bot commented on PHOENIX-3817:
-

Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203927054
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/PhoenixMapReduceUtil.java
 ---
@@ -157,6 +192,192 @@ public static void setOutput(final Job job, final 
String tableName,final String
 
PhoenixConfigurationUtil.setUpsertColumnNames(configuration,columns.split(","));
 }
 
+/**
+ * Generate a query plan for a MapReduce job query.
+ * @param configuration The MapReduce job configuration
+ * @return Query plan for the MapReduce job
+ * @throws SQLException If the plan cannot be generated
+ */
+public static QueryPlan getQueryPlan(final Configuration configuration)
+throws SQLException {
+return getQueryPlan(configuration, false);
+}
+
+/**
+ * Generate a query plan for a MapReduce job query
+ * @param configuration The MapReduce job configuration
+ * @param isTargetConnection Whether the query plan is for the target 
HBase cluster
+ * @return Query plan for the MapReduce job
+ * @throws SQLException If the plan cannot be generated
+ */
+public static QueryPlan getQueryPlan(final Configuration configuration,
+boolean isTargetConnection) throws SQLException {
+Preconditions.checkNotNull(configuration);
+final String txnScnValue = 
configuration.get(PhoenixConfigurationUtil.TX_SCN_VALUE);
+final String currentScnValue = 
configuration.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE);
+final Properties overridingProps = new Properties();
+if(txnScnValue==null && currentScnValue!=null) {
+overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, 
currentScnValue);
+}
+final Connection connection;
+final String selectStatement;
+if (isTargetConnection) {
+String targetTable = 
PhoenixConfigurationUtil.getInputTargetTableName(configuration);
+if (!Strings.isNullOrEmpty(targetTable)) {
+// different table on same cluster
+connection = 
ConnectionUtil.getInputConnection(configuration, overridingProps);
+selectStatement = 
PhoenixConfigurationUtil.getSelectStatement(configuration, true);
+} else {
+// same table on different cluster
+connection =
+
ConnectionUtil.getTargetInputConnection(configuration, overridingProps);
+selectStatement = 
PhoenixConfigurationUtil.getSelectStatement(configuration);
+}
+} else {
+connection = ConnectionUtil.getInputConnection(configuration, 
overridingProps);
+selectStatement = 
PhoenixConfigurationUtil.getSelectStatement(configuration);
+}
+Preconditions.checkNotNull(selectStatement);
+final Statement statement = connection.createStatement();
+final PhoenixStatement pstmt = 
statement.unwrap(PhoenixStatement.class);
+// Optimize the query plan so that we potentially use secondary 
indexes
+final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement);
+final Scan scan = queryPlan.getContext().getScan();
+// since we can't set a scn on connections with txn set TX_SCN 
attribute so that the max time range is set by BaseScannerRegionObserver
+if (txnScnValue!=null) {
+scan.setAttribute(BaseScannerRegionObserver.TX_SCN, 
Bytes.toBytes(Long.valueOf(txnScnValue)));
+}
+// Initialize the query plan so it sets up the parallel scans
+queryPlan.iterator(MapReduceParallelScanGrouper.getInstance());
+return queryPlan;
+}
+
+/**
+ * Generates the input splits for a MapReduce job.
+ * @param qplan Query plan for the job
+ * @param splits The key range splits for the job
+ * @param config The job configuration
+ * @return Input splits for the job
+ * @throws IOException If the region information for the splits cannot 
be retrieved
+ */
+public static List generateSplits(final QueryPlan qplan,
+final List splits, Configuration config) throws 
IOException {
+Preconditions.checkNotNull(qplan);
+Preconditions.checkNotNull(splits);
+
+// Get the RegionSizeCalculator
+org.apache.hadoop.hbase.client.Connection 

[GitHub] phoenix pull request #309: [Do Not Merge] PHOENIX-3817 Verify Replication us...

2018-07-19 Thread karanmehta93
Github user karanmehta93 commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/309#discussion_r203927054
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/PhoenixMapReduceUtil.java
 ---
@@ -157,6 +192,192 @@ public static void setOutput(final Job job, final 
String tableName,final String
 
PhoenixConfigurationUtil.setUpsertColumnNames(configuration,columns.split(","));
 }
 
+/**
+ * Generate a query plan for a MapReduce job query.
+ * @param configuration The MapReduce job configuration
+ * @return Query plan for the MapReduce job
+ * @throws SQLException If the plan cannot be generated
+ */
+public static QueryPlan getQueryPlan(final Configuration configuration)
+throws SQLException {
+return getQueryPlan(configuration, false);
+}
+
+/**
+ * Generate a query plan for a MapReduce job query
+ * @param configuration The MapReduce job configuration
+ * @param isTargetConnection Whether the query plan is for the target 
HBase cluster
+ * @return Query plan for the MapReduce job
+ * @throws SQLException If the plan cannot be generated
+ */
+public static QueryPlan getQueryPlan(final Configuration configuration,
+boolean isTargetConnection) throws SQLException {
+Preconditions.checkNotNull(configuration);
+final String txnScnValue = 
configuration.get(PhoenixConfigurationUtil.TX_SCN_VALUE);
+final String currentScnValue = 
configuration.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE);
+final Properties overridingProps = new Properties();
+if(txnScnValue==null && currentScnValue!=null) {
+overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, 
currentScnValue);
+}
+final Connection connection;
+final String selectStatement;
+if (isTargetConnection) {
+String targetTable = 
PhoenixConfigurationUtil.getInputTargetTableName(configuration);
+if (!Strings.isNullOrEmpty(targetTable)) {
+// different table on same cluster
+connection = 
ConnectionUtil.getInputConnection(configuration, overridingProps);
+selectStatement = 
PhoenixConfigurationUtil.getSelectStatement(configuration, true);
+} else {
+// same table on different cluster
+connection =
+
ConnectionUtil.getTargetInputConnection(configuration, overridingProps);
+selectStatement = 
PhoenixConfigurationUtil.getSelectStatement(configuration);
+}
+} else {
+connection = ConnectionUtil.getInputConnection(configuration, 
overridingProps);
+selectStatement = 
PhoenixConfigurationUtil.getSelectStatement(configuration);
+}
+Preconditions.checkNotNull(selectStatement);
+final Statement statement = connection.createStatement();
+final PhoenixStatement pstmt = 
statement.unwrap(PhoenixStatement.class);
+// Optimize the query plan so that we potentially use secondary 
indexes
+final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement);
+final Scan scan = queryPlan.getContext().getScan();
+// since we can't set a scn on connections with txn set TX_SCN 
attribute so that the max time range is set by BaseScannerRegionObserver
+if (txnScnValue!=null) {
+scan.setAttribute(BaseScannerRegionObserver.TX_SCN, 
Bytes.toBytes(Long.valueOf(txnScnValue)));
+}
+// Initialize the query plan so it sets up the parallel scans
+queryPlan.iterator(MapReduceParallelScanGrouper.getInstance());
+return queryPlan;
+}
+
+/**
+ * Generates the input splits for a MapReduce job.
+ * @param qplan Query plan for the job
+ * @param splits The key range splits for the job
+ * @param config The job configuration
+ * @return Input splits for the job
+ * @throws IOException If the region information for the splits cannot 
be retrieved
+ */
+public static List generateSplits(final QueryPlan qplan,
+final List splits, Configuration config) throws 
IOException {
+Preconditions.checkNotNull(qplan);
+Preconditions.checkNotNull(splits);
+
+// Get the RegionSizeCalculator
+org.apache.hadoop.hbase.client.Connection connection = 
ConnectionFactory.createConnection(config);
--- End diff --

Any particular reason as to why you removed the try with resources block 
over here?
I added it because of a memory leak that I had found out here: 

[GitHub] phoenix pull request #308: Client-side hash aggregation

2018-07-19 Thread geraldss
Github user geraldss commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/308#discussion_r203926206
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/iterate/ClientHashAggregatingResultIterator.java
 ---
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.iterate;
+
+import static org.apache.phoenix.query.QueryConstants.AGG_TIMESTAMP;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN_FAMILY;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.aggregator.Aggregator;
+import org.apache.phoenix.expression.aggregator.Aggregators;
+import org.apache.phoenix.schema.tuple.MultiKeyValueTuple;
+import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.util.KeyValueUtil;
+import org.apache.phoenix.util.TupleUtil;
+
+/**
+ * 
+ * This class implements client-side hash aggregation in memory.
+ * Issue https://issues.apache.org/jira/browse/PHOENIX-4751.
+ * 
+ */
+public class ClientHashAggregatingResultIterator
+implements AggregatingResultIterator {
+
+private static final int HASH_AGG_INIT_SIZE = 64*1024;
+private static final byte[] UNITIALIZED_KEY_BUFFER = new byte[0];
+private final ResultIterator resultIterator;
+private final Aggregators aggregators;
+private final List groupByExpressions;
+private final int thresholdBytes;
+private HashMap hash;
+private List keyList;
+private Iterator keyIterator;
+
+public ClientHashAggregatingResultIterator(ResultIterator 
resultIterator, Aggregators aggregators, List groupByExpressions, 
int thresholdBytes) {
+Objects.requireNonNull(resultIterator);
+Objects.requireNonNull(aggregators);
+Objects.requireNonNull(groupByExpressions);
+this.resultIterator = resultIterator;
+this.aggregators = aggregators;
+this.groupByExpressions = groupByExpressions;
+this.thresholdBytes = thresholdBytes;
+}
+
+@Override
+public Tuple next() throws SQLException {
+if (keyIterator == null) {
+hash = populateHash();
+keyList = sortKeys();
+keyIterator = keyList.iterator();
+}
+
+if (!keyIterator.hasNext()) {
+return null;
+}
+
+ImmutableBytesWritable key = keyIterator.next();
+Aggregator[] rowAggregators = hash.get(key);
+byte[] value = aggregators.toBytes(rowAggregators);
+Tuple tuple = wrapKeyValueAsResult(KeyValueUtil.newKeyValue(key, 
SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length));
+return tuple;
+}
+
+@Override
+public void close() throws SQLException {
+keyIterator = null;
+keyList = null;
+hash = null;
+resultIterator.close();
+}
+
+@Override
+public Aggregator[] aggregate(Tuple result) {
+Aggregator[] rowAggregators = aggregators.getAggregators();
+aggregators.reset(rowAggregators);
+aggregators.aggregate(rowAggregators, result);
+return rowAggregators;
+}
+
+@Override
+public void explain(List planSteps) {
+resultIterator.explain(planSteps);
+}
+
+@Override
+public String 

[GitHub] phoenix pull request #308: Client-side hash aggregation

2018-07-19 Thread geraldss
Github user geraldss commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/308#discussion_r203926179
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/iterate/ClientHashAggregatingResultIterator.java
 ---
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.iterate;
+
+import static org.apache.phoenix.query.QueryConstants.AGG_TIMESTAMP;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN_FAMILY;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.aggregator.Aggregator;
+import org.apache.phoenix.expression.aggregator.Aggregators;
+import org.apache.phoenix.schema.tuple.MultiKeyValueTuple;
+import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.util.KeyValueUtil;
+import org.apache.phoenix.util.TupleUtil;
+
+/**
+ * 
+ * This class implements client-side hash aggregation in memory.
+ * Issue https://issues.apache.org/jira/browse/PHOENIX-4751.
+ * 
+ */
+public class ClientHashAggregatingResultIterator
+implements AggregatingResultIterator {
+
+private static final int HASH_AGG_INIT_SIZE = 64*1024;
+private static final byte[] UNITIALIZED_KEY_BUFFER = new byte[0];
+private final ResultIterator resultIterator;
+private final Aggregators aggregators;
+private final List groupByExpressions;
+private final int thresholdBytes;
+private HashMap hash;
+private List keyList;
+private Iterator keyIterator;
+
+public ClientHashAggregatingResultIterator(ResultIterator 
resultIterator, Aggregators aggregators, List groupByExpressions, 
int thresholdBytes) {
+Objects.requireNonNull(resultIterator);
+Objects.requireNonNull(aggregators);
+Objects.requireNonNull(groupByExpressions);
+this.resultIterator = resultIterator;
+this.aggregators = aggregators;
+this.groupByExpressions = groupByExpressions;
+this.thresholdBytes = thresholdBytes;
+}
+
+@Override
+public Tuple next() throws SQLException {
+if (keyIterator == null) {
+hash = populateHash();
+keyList = sortKeys();
+keyIterator = keyList.iterator();
+}
+
+if (!keyIterator.hasNext()) {
+return null;
+}
+
+ImmutableBytesWritable key = keyIterator.next();
+Aggregator[] rowAggregators = hash.get(key);
+byte[] value = aggregators.toBytes(rowAggregators);
+Tuple tuple = wrapKeyValueAsResult(KeyValueUtil.newKeyValue(key, 
SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length));
+return tuple;
+}
+
+@Override
+public void close() throws SQLException {
+keyIterator = null;
+keyList = null;
+hash = null;
+resultIterator.close();
+}
+
+@Override
+public Aggregator[] aggregate(Tuple result) {
+Aggregator[] rowAggregators = aggregators.getAggregators();
+aggregators.reset(rowAggregators);
+aggregators.aggregate(rowAggregators, result);
+return rowAggregators;
+}
+
+@Override
+public void explain(List planSteps) {
+resultIterator.explain(planSteps);
+}
+
+@Override
+public String 

[GitHub] phoenix pull request #308: Client-side hash aggregation

2018-07-19 Thread geraldss
Github user geraldss commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/308#discussion_r203926229
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/execute/ClientAggregatePlan.java 
---
@@ -135,17 +142,24 @@ public ResultIterator iterator(ParallelScanGrouper 
scanGrouper, Scan scan) throw
 aggResultIterator = new 
ClientUngroupedAggregatingResultIterator(LookAheadResultIterator.wrap(iterator),
 serverAggregators);
 aggResultIterator = new 
UngroupedAggregatingResultIterator(LookAheadResultIterator.wrap(aggResultIterator),
 clientAggregators);
 } else {
-if (!groupBy.isOrderPreserving()) {
-int thresholdBytes = 
context.getConnection().getQueryServices().getProps().getInt(
-QueryServices.SPOOL_THRESHOLD_BYTES_ATTRIB, 
QueryServicesOptions.DEFAULT_SPOOL_THRESHOLD_BYTES);
-List keyExpressions = 
groupBy.getKeyExpressions();
+List keyExpressions = groupBy.getKeyExpressions();
+if (groupBy.isOrderPreserving()) {
+aggResultIterator = new 
ClientGroupedAggregatingResultIterator(LookAheadResultIterator.wrap(iterator), 
serverAggregators, keyExpressions);
--- End diff --

Done.


---


[jira] [Commented] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread Akshita Malhotra (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16549976#comment-16549976
 ] 

Akshita Malhotra commented on PHOENIX-3817:
---

[~gjacoby] Added the support for scn setting in the latest patch

> VerifyReplication using SQL
> ---
>
> Key: PHOENIX-3817
> URL: https://issues.apache.org/jira/browse/PHOENIX-3817
> Project: Phoenix
>  Issue Type: Improvement
>Reporter: Alex Araujo
>Assignee: Akshita Malhotra
>Priority: Minor
> Fix For: 4.15.0
>
> Attachments: PHOENIX-3817.v1.patch, PHOENIX-3817.v2.patch, 
> PHOENIX-3817.v3.patch, PHOENIX-3817.v4.patch, PHOENIX-3817.v5.patch, 
> PHOENIX-3817.v6.patch
>
>
> Certain use cases may copy or replicate a subset of a table to a different 
> table or cluster. For example, application topologies may map data for 
> specific tenants to different peer clusters.
> It would be useful to have a Phoenix VerifyReplication tool that accepts an 
> SQL query, a target table, and an optional target cluster. The tool would 
> compare data returned by the query on the different tables and update various 
> result counters (similar to HBase's VerifyReplication).



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Updated] (PHOENIX-3817) VerifyReplication using SQL

2018-07-19 Thread Akshita Malhotra (JIRA)


 [ 
https://issues.apache.org/jira/browse/PHOENIX-3817?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Akshita Malhotra updated PHOENIX-3817:
--
Attachment: PHOENIX-3817.v6.patch

> VerifyReplication using SQL
> ---
>
> Key: PHOENIX-3817
> URL: https://issues.apache.org/jira/browse/PHOENIX-3817
> Project: Phoenix
>  Issue Type: Improvement
>Reporter: Alex Araujo
>Assignee: Akshita Malhotra
>Priority: Minor
> Fix For: 4.15.0
>
> Attachments: PHOENIX-3817.v1.patch, PHOENIX-3817.v2.patch, 
> PHOENIX-3817.v3.patch, PHOENIX-3817.v4.patch, PHOENIX-3817.v5.patch, 
> PHOENIX-3817.v6.patch
>
>
> Certain use cases may copy or replicate a subset of a table to a different 
> table or cluster. For example, application topologies may map data for 
> specific tenants to different peer clusters.
> It would be useful to have a Phoenix VerifyReplication tool that accepts an 
> SQL query, a target table, and an optional target cluster. The tool would 
> compare data returned by the query on the different tables and update various 
> result counters (similar to HBase's VerifyReplication).



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[GitHub] phoenix pull request #308: Client-side hash aggregation

2018-07-19 Thread JamesRTaylor
Github user JamesRTaylor commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/308#discussion_r203867850
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/iterate/ClientHashAggregatingResultIterator.java
 ---
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.iterate;
+
+import static org.apache.phoenix.query.QueryConstants.AGG_TIMESTAMP;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN_FAMILY;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.aggregator.Aggregator;
+import org.apache.phoenix.expression.aggregator.Aggregators;
+import org.apache.phoenix.schema.tuple.MultiKeyValueTuple;
+import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.util.KeyValueUtil;
+import org.apache.phoenix.util.TupleUtil;
+
+/**
+ * 
+ * This class implements client-side hash aggregation in memory.
+ * Issue https://issues.apache.org/jira/browse/PHOENIX-4751.
+ * 
+ */
+public class ClientHashAggregatingResultIterator
+implements AggregatingResultIterator {
+
+private static final int HASH_AGG_INIT_SIZE = 64*1024;
+private static final byte[] UNITIALIZED_KEY_BUFFER = new byte[0];
+private final ResultIterator resultIterator;
+private final Aggregators aggregators;
+private final List groupByExpressions;
+private final int thresholdBytes;
+private HashMap hash;
+private List keyList;
+private Iterator keyIterator;
+
+public ClientHashAggregatingResultIterator(ResultIterator 
resultIterator, Aggregators aggregators, List groupByExpressions, 
int thresholdBytes) {
+Objects.requireNonNull(resultIterator);
+Objects.requireNonNull(aggregators);
+Objects.requireNonNull(groupByExpressions);
+this.resultIterator = resultIterator;
+this.aggregators = aggregators;
+this.groupByExpressions = groupByExpressions;
+this.thresholdBytes = thresholdBytes;
+}
+
+@Override
+public Tuple next() throws SQLException {
+if (keyIterator == null) {
+hash = populateHash();
+keyList = sortKeys();
+keyIterator = keyList.iterator();
+}
+
+if (!keyIterator.hasNext()) {
+return null;
+}
+
+ImmutableBytesWritable key = keyIterator.next();
+Aggregator[] rowAggregators = hash.get(key);
+byte[] value = aggregators.toBytes(rowAggregators);
+Tuple tuple = wrapKeyValueAsResult(KeyValueUtil.newKeyValue(key, 
SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length));
+return tuple;
+}
+
+@Override
+public void close() throws SQLException {
+keyIterator = null;
+keyList = null;
+hash = null;
+resultIterator.close();
+}
+
+@Override
+public Aggregator[] aggregate(Tuple result) {
+Aggregator[] rowAggregators = aggregators.getAggregators();
+aggregators.reset(rowAggregators);
+aggregators.aggregate(rowAggregators, result);
+return rowAggregators;
+}
+
+@Override
+public void explain(List planSteps) {
+resultIterator.explain(planSteps);
+}
+
+@Override
+public 

[GitHub] phoenix pull request #308: Client-side hash aggregation

2018-07-19 Thread JamesRTaylor
Github user JamesRTaylor commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/308#discussion_r203882988
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/iterate/ClientHashSizeException.java
 ---
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.phoenix.iterate;
+
+/**
+ * Thrown by {@link 
org.apache.phoenix.iterate.ClientHashAggregatingResultIterator } when
+ * hash size exceeds memory threshold.
+ * 
+ */
+public class ClientHashSizeException extends RuntimeException {
--- End diff --

You won't need this as an InsufficientMemoryException will be thrown if you 
go above the specified memory limit (based on existing Phoenix config 
properties) and this will be unwound to become a SQLException with the code 
SQLExceptionCode.INSUFFICIENT_MEMORY.


---


[GitHub] phoenix pull request #308: Client-side hash aggregation

2018-07-19 Thread JamesRTaylor
Github user JamesRTaylor commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/308#discussion_r203868079
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/iterate/ClientHashAggregatingResultIterator.java
 ---
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.iterate;
+
+import static org.apache.phoenix.query.QueryConstants.AGG_TIMESTAMP;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN_FAMILY;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.aggregator.Aggregator;
+import org.apache.phoenix.expression.aggregator.Aggregators;
+import org.apache.phoenix.schema.tuple.MultiKeyValueTuple;
+import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.util.KeyValueUtil;
+import org.apache.phoenix.util.TupleUtil;
+
+/**
+ * 
+ * This class implements client-side hash aggregation in memory.
+ * Issue https://issues.apache.org/jira/browse/PHOENIX-4751.
+ * 
+ */
+public class ClientHashAggregatingResultIterator
+implements AggregatingResultIterator {
+
+private static final int HASH_AGG_INIT_SIZE = 64*1024;
+private static final byte[] UNITIALIZED_KEY_BUFFER = new byte[0];
+private final ResultIterator resultIterator;
+private final Aggregators aggregators;
+private final List groupByExpressions;
+private final int thresholdBytes;
+private HashMap hash;
+private List keyList;
+private Iterator keyIterator;
+
+public ClientHashAggregatingResultIterator(ResultIterator 
resultIterator, Aggregators aggregators, List groupByExpressions, 
int thresholdBytes) {
+Objects.requireNonNull(resultIterator);
+Objects.requireNonNull(aggregators);
+Objects.requireNonNull(groupByExpressions);
+this.resultIterator = resultIterator;
+this.aggregators = aggregators;
+this.groupByExpressions = groupByExpressions;
+this.thresholdBytes = thresholdBytes;
+}
+
+@Override
+public Tuple next() throws SQLException {
+if (keyIterator == null) {
+hash = populateHash();
+keyList = sortKeys();
+keyIterator = keyList.iterator();
+}
+
+if (!keyIterator.hasNext()) {
+return null;
+}
+
+ImmutableBytesWritable key = keyIterator.next();
+Aggregator[] rowAggregators = hash.get(key);
+byte[] value = aggregators.toBytes(rowAggregators);
+Tuple tuple = wrapKeyValueAsResult(KeyValueUtil.newKeyValue(key, 
SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length));
+return tuple;
+}
+
+@Override
+public void close() throws SQLException {
+keyIterator = null;
+keyList = null;
+hash = null;
+resultIterator.close();
+}
+
+@Override
+public Aggregator[] aggregate(Tuple result) {
+Aggregator[] rowAggregators = aggregators.getAggregators();
+aggregators.reset(rowAggregators);
+aggregators.aggregate(rowAggregators, result);
+return rowAggregators;
+}
+
+@Override
+public void explain(List planSteps) {
+resultIterator.explain(planSteps);
+}
+
+@Override
+public 

[GitHub] phoenix pull request #308: Client-side hash aggregation

2018-07-19 Thread JamesRTaylor
Github user JamesRTaylor commented on a diff in the pull request:

https://github.com/apache/phoenix/pull/308#discussion_r203879864
  
--- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/execute/ClientAggregatePlan.java 
---
@@ -135,17 +142,24 @@ public ResultIterator iterator(ParallelScanGrouper 
scanGrouper, Scan scan) throw
 aggResultIterator = new 
ClientUngroupedAggregatingResultIterator(LookAheadResultIterator.wrap(iterator),
 serverAggregators);
 aggResultIterator = new 
UngroupedAggregatingResultIterator(LookAheadResultIterator.wrap(aggResultIterator),
 clientAggregators);
 } else {
-if (!groupBy.isOrderPreserving()) {
-int thresholdBytes = 
context.getConnection().getQueryServices().getProps().getInt(
-QueryServices.SPOOL_THRESHOLD_BYTES_ATTRIB, 
QueryServicesOptions.DEFAULT_SPOOL_THRESHOLD_BYTES);
-List keyExpressions = 
groupBy.getKeyExpressions();
+List keyExpressions = groupBy.getKeyExpressions();
+if (groupBy.isOrderPreserving()) {
+aggResultIterator = new 
ClientGroupedAggregatingResultIterator(LookAheadResultIterator.wrap(iterator), 
serverAggregators, keyExpressions);
--- End diff --

Pass through context here too to ClientGroupedAggregatingResultIterator as 
you'll need it to get the memory manager. 


---


[jira] [Updated] (PHOENIX-3534) Support multi region SYSTEM.CATALOG table

2018-07-19 Thread Thomas D'Silva (JIRA)


 [ 
https://issues.apache.org/jira/browse/PHOENIX-3534?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Thomas D'Silva updated PHOENIX-3534:

Fix Version/s: (was: 5.0.0)
   5.1.0

> Support multi region SYSTEM.CATALOG table
> -
>
> Key: PHOENIX-3534
> URL: https://issues.apache.org/jira/browse/PHOENIX-3534
> Project: Phoenix
>  Issue Type: Bug
>Reporter: James Taylor
>Assignee: Thomas D'Silva
>Priority: Major
> Fix For: 4.15.0, 5.1.0
>
> Attachments: PHOENIX-3534-v2.patch, PHOENIX-3534-v3.patch, 
> PHOENIX-3534.patch
>
>
> Currently Phoenix requires that the SYSTEM.CATALOG table is single region 
> based on the server-side row locks being held for operations that impact a 
> table and all of it's views. For example, adding/removing a column from a 
> base table pushes this change to all views.
> As an alternative to making the SYSTEM.CATALOG transactional (PHOENIX-2431), 
> when a new table is created we can do a lazy cleanup  of any rows that may be 
> left over from a failed DDL call (kudos to [~lhofhansl] for coming up with 
> this idea). To implement this efficiently, we'd need to also do PHOENIX-2051 
> so that we can efficiently find derived views.
> The implementation would rely on an optimistic concurrency model based on 
> checking our sequence numbers for each table/view before/after updating. Each 
> table/view row would be individually locked for their change (metadata for a 
> view or table cannot span regions due to our split policy), with the sequence 
> number being incremented under lock and then returned to the client.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[jira] [Assigned] (PHOENIX-4799) Write cells using checkAndMutate to prevent conflicting changes

2018-07-19 Thread Thomas D'Silva (JIRA)


 [ 
https://issues.apache.org/jira/browse/PHOENIX-4799?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Thomas D'Silva reassigned PHOENIX-4799:
---

Assignee: Thomas D'Silva

> Write cells using checkAndMutate to prevent conflicting changes
> ---
>
> Key: PHOENIX-4799
> URL: https://issues.apache.org/jira/browse/PHOENIX-4799
> Project: Phoenix
>  Issue Type: Sub-task
>Reporter: Thomas D'Silva
>Assignee: Thomas D'Silva
>Priority: Major
>
> In order to prevent race conditions when multiple client try to mutate the 
> same column before sending the request to mutate the column to the server do 
> a checkAndMutate with the column name being added/dropped. Also:
>  1. When a view is created do a checkAndMutate with the columns in the view 
> where clause.
>  2. When an index on a view is created do a checkAndMutate with the indexed 
> columns.
>  
> To prevent a race condition in the DROP TABLE CASCADE case, when a table is 
> dropped do a checkAndMutate with the rowkey of the base table name. If a view 
> is created it also does a checkAndMutate with the same rowkey. 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


PhoenixCon Archive Page

2018-07-19 Thread Clay Baenziger (BLOOMBERG/ 731 LEX)
Hi all,

I don't know how best to recommend a change to the Phoenix-Site SVN repo 
(Phoenix JIRA's seem only built against the code repo) but I have a patch for 
the PhoenixCon Archive page including the recent PhoenixCon 2018.

Also, can any speakers from PhoenixCon 2017, please provide pointers to their 
slides!

I used OpenOffice to write this and follow the format of the HBaseCon Archive 
page. I cleaned up the OpenOffice generated HTML, moved it to XHTML, ran 
xmllint --format on it and verified it passes the W3C's validator tool.

I have the updated files at: 
https://github.com/cbaenziger/phoenix-site/commit/e508266a3fef0ef86127871692f7c02c256d47a8

What is the best way to get this committed, if okay?

-Clay

[jira] [Commented] (PHOENIX-3534) Support multi region SYSTEM.CATALOG table

2018-07-19 Thread Hudson (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3534?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16549799#comment-16549799
 ] 

Hudson commented on PHOENIX-3534:
-

FAILURE: Integrated in Jenkins build Phoenix-4.x-HBase-1.3 #165 (See 
[https://builds.apache.org/job/Phoenix-4.x-HBase-1.3/165/])
PHOENIX-3534 Support multi region SYSTEM.CATALOG table (Thomas D'Silva 
(tdsilva: rev 93fdd5bad22cde313c9f34fe7448dca44377a27c)
* (add) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/SplitSystemCatalogIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/compile/UpsertCompiler.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/index/IndexMetadataIT.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/util/MetaDataUtil.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/query/ConnectionlessQueryServicesImpl.java
* (edit) phoenix-protocol/src/main/PTable.proto
* (add) 
phoenix-core/src/test/java/org/apache/phoenix/coprocessor/MetaDataEndpointImplTest.java
* (add) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/SplitSystemCatalogTests.java
* (add) phoenix-core/src/main/java/org/apache/phoenix/coprocessor/TableInfo.java
* (edit) pom.xml
* (delete) phoenix-core/src/it/java/org/apache/phoenix/end2end/SaltedViewIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/compile/CreateTableCompiler.java
* (edit) phoenix-protocol/src/main/MetaDataService.proto
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/MetaDataProtos.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/schema/PTable.java
* (edit) phoenix-core/src/it/java/org/apache/phoenix/rpc/UpdateCacheIT.java
* (edit) 
phoenix-core/src/test/java/org/apache/phoenix/execute/CorrelatePlanTest.java
* (add) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/StatsEnabledSplitSystemCatalogIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/query/ConnectionQueryServicesImpl.java
* (add) 
phoenix-core/src/main/java/org/apache/phoenix/schema/ParentTableNotFoundException.java
* (add) 
phoenix-core/src/main/java/org/apache/phoenix/compile/ColumnNameTrackingExpressionCompiler.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/TenantSpecificViewIndexIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/replication/SystemCatalogWALEntryFilter.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/QueryDatabaseMetaDataIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/query/DelegateConnectionQueryServices.java
* (add) 
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/WhereConstantParser.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/AlterMultiTenantTableWithViewsIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/schema/DelegateColumn.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/schema/PColumnImpl.java
* (edit) phoenix-core/src/it/java/org/apache/phoenix/end2end/AlterTableIT.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixDriverIT.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/util/SchemaUtil.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/AlterTableWithViewsIT.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/util/PhoenixRuntime.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/coprocessor/StatisticsCollectionRunTrackerIT.java
* (edit) phoenix-core/src/it/java/org/apache/phoenix/end2end/ViewIT.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/replication/SystemCatalogWALEntryFilterIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/jdbc/PhoenixStatement.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/schema/TableProperty.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/util/IndexUtil.java
* (edit) phoenix-core/src/main/java/org/apache/phoenix/query/QueryConstants.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/index/ViewIndexIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/jdbc/PhoenixConnection.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/MigrateSystemTablesToSystemNamespaceIT.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataEndpointImpl.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PTableProtos.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataProtocol.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/parse/DropTableStatement.java
* (edit) 
phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataSplitPolicy.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/index/DropColumnIT.java
* (edit) 
phoenix-core/src/it/java/org/apache/phoenix/end2end/SystemCatalogCreationOnConnectionIT.java
* (edit) 

[jira] [Commented] (PHOENIX-3534) Support multi region SYSTEM.CATALOG table

2018-07-19 Thread ASF GitHub Bot (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-3534?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16549791#comment-16549791
 ] 

ASF GitHub Bot commented on PHOENIX-3534:
-

Github user twdsilva closed the pull request at:

https://github.com/apache/phoenix/pull/303


> Support multi region SYSTEM.CATALOG table
> -
>
> Key: PHOENIX-3534
> URL: https://issues.apache.org/jira/browse/PHOENIX-3534
> Project: Phoenix
>  Issue Type: Bug
>Reporter: James Taylor
>Assignee: Thomas D'Silva
>Priority: Major
> Fix For: 5.0.0, 4.15.0
>
> Attachments: PHOENIX-3534-v2.patch, PHOENIX-3534-v3.patch, 
> PHOENIX-3534.patch
>
>
> Currently Phoenix requires that the SYSTEM.CATALOG table is single region 
> based on the server-side row locks being held for operations that impact a 
> table and all of it's views. For example, adding/removing a column from a 
> base table pushes this change to all views.
> As an alternative to making the SYSTEM.CATALOG transactional (PHOENIX-2431), 
> when a new table is created we can do a lazy cleanup  of any rows that may be 
> left over from a failed DDL call (kudos to [~lhofhansl] for coming up with 
> this idea). To implement this efficiently, we'd need to also do PHOENIX-2051 
> so that we can efficiently find derived views.
> The implementation would rely on an optimistic concurrency model based on 
> checking our sequence numbers for each table/view before/after updating. Each 
> table/view row would be individually locked for their change (metadata for a 
> view or table cannot span regions due to our split policy), with the sequence 
> number being incremented under lock and then returned to the client.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)


[GitHub] phoenix pull request #303: PHOENIX-3534 Support multi region SYSTEM.CATALOG ...

2018-07-19 Thread twdsilva
Github user twdsilva closed the pull request at:

https://github.com/apache/phoenix/pull/303


---


[jira] [Commented] (PHOENIX-4797) file not found or file exist exception when create global index use -snaopshot option

2018-07-19 Thread James Taylor (JIRA)


[ 
https://issues.apache.org/jira/browse/PHOENIX-4797?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16549346#comment-16549346
 ] 

James Taylor commented on PHOENIX-4797:
---

[~karanmehta93] - please commit this to all branches as we’ll do a 4.14.1 
release and this will be a good one to have in that.

> file not found or file exist exception when create global index use 
> -snaopshot option
> -
>
> Key: PHOENIX-4797
> URL: https://issues.apache.org/jira/browse/PHOENIX-4797
> Project: Phoenix
>  Issue Type: Bug
>Affects Versions: 4.13.2-cdh5.11.2
>Reporter: sailingYang
>Priority: Major
>
> when use indextool with -snapshot option and if the mapreduce create multi 
> mapper.this will cause the hdfs file not found or  hdfs file exist 
> exception。finally the mapreduce task must be failed. because the mapper use 
> the same restore work dir.
> {code:java}
> Error: java.io.IOException: java.util.concurrent.ExecutionException: 
> java.io.IOException: The specified region already exists on disk: 
> hdfs://m12v1.mlamp.cn:8020/tmp/index-snapshot-dir/restore-dir/e738c85b-2394-43fc-b9de-b8280bc329ca/data/default/SCOPA.CETUS_EVENT_ZY_SCOPA_31_0516_TRAIN_EVENT/2ab2c1d73d2e31bb5a5e2b394da564f8
> at 
> org.apache.hadoop.hbase.util.ModifyRegionUtils.createRegions(ModifyRegionUtils.java:186)
> at 
> org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper.cloneHdfsRegions(RestoreSnapshotHelper.java:578)
> at 
> org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper.restoreHdfsRegions(RestoreSnapshotHelper.java:249)
> at 
> org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper.restoreHdfsRegions(RestoreSnapshotHelper.java:171)
> at 
> org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper.copySnapshotForScanner(RestoreSnapshotHelper.java:814)
> at 
> org.apache.phoenix.iterate.TableSnapshotResultIterator.init(TableSnapshotResultIterator.java:77)
> at 
> org.apache.phoenix.iterate.TableSnapshotResultIterator.(TableSnapshotResultIterator.java:73)
> at 
> org.apache.phoenix.mapreduce.PhoenixRecordReader.initialize(PhoenixRecordReader.java:126)
> at 
> org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:548)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:786)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:415)
> at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
> Caused by: java.util.concurrent.ExecutionException: java.io.IOException: The 
> specified region already exists on disk: 
> hdfs://m12v1.mlamp.cn:8020/tmp/index-snapshot-dir/restore-dir/e738c85b-2394-43fc-b9de-b8280bc329ca/data/default/SCOPA.CETUS_EVENT_ZY_SCOPA_31_0516_TRAIN_EVENT/2ab2c1d73d2e31bb5a5e2b394da564f8
> at java.util.concurrent.FutureTask.report(FutureTask.java:122)
> at java.util.concurrent.FutureTask.get(FutureTask.java:188)
> at 
> org.apache.hadoop.hbase.util.ModifyRegionUtils.createRegions(ModifyRegionUtils.java:180)
> ... 15 more
> Caused by: java.io.IOException: The specified region already exists on disk: 
> hdfs://m12v1.mlamp.cn:8020/tmp/index-snapshot-dir/restore-dir/e738c85b-2394-43fc-b9de-b8280bc329ca/data/default/SCOPA.CETUS_EVENT_ZY_SCOPA_31_0516_TRAIN_EVENT/2ab2c1d73d2e31bb5a5e2b394da564f8
> at 
> org.apache.hadoop.hbase.regionserver.HRegionFileSystem.createRegionOnFileSystem(HRegionFileSystem.java:877)
> at 
> org.apache.hadoop.hbase.regionserver.HRegion.createHRegion(HRegion.java:6252)
> at 
> org.apache.hadoop.hbase.util.ModifyRegionUtils.createRegion(ModifyRegionUtils.java:205)
> at 
> org.apache.hadoop.hbase.util.ModifyRegionUtils$1.call(ModifyRegionUtils.java:173)
> at 
> org.apache.hadoop.hbase.util.ModifyRegionUtils$1.call(ModifyRegionUtils.java:170)
> at java.util.concurrent.FutureTask.run(FutureTask.java:262)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
> at java.util.concurrent.FutureTask.run(FutureTask.java:262)
> at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> at java.lang.Thread.run(Thread.java:745)
> 2018-06-28 15:01:55 70909 [main] INFO org.apache.hadoop.mapreduce.Job - Task 
> Id : attempt_1530004808977_0011_m_01_0, Status : FAILED
> Error: java.io.IOException: java.util.concurrent.ExecutionException: 
> java.io.IOException: The specified region already exists on disk: 
>