PHOENIX-1248 is marked as resolved. Are you using a version of Phoenix before this fix?
On Sun, Jul 26, 2015 at 7:22 AM, Riesland, Zack <zack.riesl...@sensus.com> wrote: > Thanks James, > > I am not able to use salt_buckets because I need to import data to this > table from Hive via CsvBulkLoader and that doesn't work if the table is > created with 'salt_buckets': > https://issues.apache.org/jira/browse/PHOENIX-1248 > > > ------------------------------ > *From:* James Taylor [jamestay...@apache.org] > *Sent:* Saturday, July 25, 2015 1:23 PM > *To:* user > *Cc:* Haisty, Geoffrey > *Subject:* Re: Exception from RowCounter > > Hi Zack, > If you find an issue, the best way for us is to come up with the smallest > set of data the demonstrates the issue and a small script that consistently > reproduces it. It's just not feasible for us to work from a billion row > data set. > > RowCounter is not a Phoenix API, so you'll need to go to the HBase > email lists for help on that. > > From a schema design perspective, it seems a bit unwieldy what you're > doing. Why not just salt the data table instead of manually salting it and > managing that yourself? > > Thanks, > James > > On Sat, Jul 25, 2015 at 4:04 AM, Riesland, Zack <zack.riesl...@sensus.com> > wrote: > >> I decided to start from scratch with my table schema in attempt to get >> a better distribution across my regions/region servers. >> >> >> >> So, I created a table like this: >> >> >> >> CREATE TABLE fma.er_keyed_gz_hashed_indexed_meterkey_immutable ( >> >> hashed_key varchar not null, >> >> meter_key varchar , >> >> … >> >> endpoint_id integer, >> >> sample_point integer not null, >> >> … >> >> CONSTRAINT pk_fma_er_keyed_gz_hashed_indexed_meterkey_immutable >> PRIMARY KEY (hashed_key, sample_point) >> >> ) >> >> COMPRESSION='GZ' >> >> SPLIT ON ('0-', '1-', '2-', '3-', '4-', '5-', '6-', '7-', '8-', '9-', >> '10-', '11-', '12-', '13-', '14-', '15-', '16-', '17-', '18-', '19-', >> '20-', '21-', '22-', '23-', '24-', '25-', '26-', '27-', '28-', '29-', >> '30-', '31-', '32-', '33-', '34-', '35-', '36-', '37-', '38-', '39-', >> '40-', '41-', '42-', '43-', '44-', '45-', '46-', '47-', '48-', '49-', >> '50-', '51-', '52-', '53-', '54-', '55-', '56-', '57-', '58-', '59-', >> '60-', '61-', '62-', '63-', '64-', '65-', '66-', '67-', '68-', '69-', >> '70-', '71-', '72-', '73-', '74-', '75-', '76-', '77-', '78-', '79-', >> '80-', '81-', '82-', '83-', '84-', '85-', '86-', '87-', '88-', '89-', >> '90-', '91-', '92-', '93-', '94-', '95-', '96-', '97-', '98-', '99-', >> '100-', '101-', '102-', '103-', '104-', '105-', '106-', '107-', '108-', >> '109-', '110-', '111-', '112-', '113-', '114-', '115-', '116-', '117-', >> '118-', '119-', '120-', '121-', '122-', '123-', '124-', '125-', '126-', >> '127-', '128-', '129-', '130-', '131-', '132-', '133-', '134-', '135-', >> '136-', '137-', '138-', '139-', '140-', '141-', '142-', '143-', '144-', >> '145-', '146-', '147-', '148-', '149-', '150-', '151-', '152-', '153-', >> '154-', '155-', '156-', '157-', '158-', '159-', '160-', '161-', '162-', >> '163-', '164-', '165-', '166-', '167-', '168-', '169-', '170-', '171-', >> '172-', '173-', '174-', '175-', '176-', '177-', '178-', '179-', '180-', >> '181-', '182-', '183-', '184-', '185-', '186-', '187-', '188-', '189-', >> '190-', '191-', '192-', '193-', '194-', '195-', '196-', '197-', '198-', >> '199-', '200-', '201-', '202-', '203-', '204-', '205-', '206-', '207-', >> '208-', '209-', '210-', '211-', '212-', '213-', '214-', '215-', '216-', >> '217-', '218-', '219-', '220-', '221-', '222-', '223-', '224-', '225-', >> '226-', '227-', '228-', '229-', '230-', '231-', '232-', '233-', '234-', >> '235-', '236-', '237-', '238-', '239-', '240-', '241-', '242-', '243-', >> '244-', '245-', '246-', '247-', '248-', '249-', '250-', '251-', '252-', >> '253-', '254-', '255-', '256-', '257-', '258-', '259-', '260-', '261-', >> '262-', '263-', '264-', '265-', '266-', '267-', '268-', '269-', '270-', >> '271-', '272-', '273-', '274-', '275-', '276-', '277-', '278-', '279-', >> '280-', '281-', '282-', '283-', '284-', '285-', '286-', '287-', '288-', >> '289-', '290-', '291-', '292-', '293-', '294-', '295-', '296-', '297-', >> '298-', '299-', '300-', '301-', '302-', '303-', '304-', '305-', '306-', >> '307-', '308-', '309-', '310-', '311-', '312-', '313-', '314-', '315-', >> '316-', '317-', '318-', '319-', '320-', '321-', '322-', '323-', '324-', >> '325-', '326-', '327-', '328-', '329-', '330-', '331-', '332-', '333-', >> '334-', '335-', '336-', '337-', '338-', '339-', '340-', '341-', '342-', >> '343-', '344-', '345-', '346-', '347-', '348-', '349-', '350-', '351-', >> '352-', '353-', '354-', '355-', '356-', '357-', '358-', '359-', '360-', >> '361-', '362-', '363-', '364-', '365-', '366-', '367-', '368-', '369-', >> '370-', '371-', '372-', '373-', '374-', '375-', '376-', '377-', '378-', >> '379-', '380-', '381-', '382-', '383-', '384-', '385-', '386-', '387-', >> '388-', '389-', '390-', '391-', '392-', '393-', '394-', '395-', '396-', >> '397-', '398-', '399-', '400-', '401-', '402-', '403-', '404-', '405-', >> '406-', '407-', '408-', '409-', '410-', '411-', '412-', '413-', '414-', >> '415-', '416-', '417-', '418-', '419-', '420-', '421-', '422-', '423-', >> '424-', '425-', '426-', '427-', '428-', '429-', '430-', '431-', '432-', >> '433-', '434-', '435-', '436-', '437-', '438-', '439-', '440-', '441-', >> '442-', '443-', '444-', '445-', '446-', '447-', '448-', '449-', '450-', >> '451-', '452-', '453-', '454-', '455-', '456-', '457-', '458-', '459-', >> '460-', '461-', '462-', '463-', '464-', '465-', '466-', '467-', '468-', >> '469-', '470-', '471-', '472-', '473-', '474-', '475-', '476-', '477-', >> '478-', '479-', '480-', '481-', '482-', '483-', '484-', '485-', '486-', >> '487-', '488-', '489-', '490-', '491-', '492-', '493-', '494-', '495-', >> '496-', '497-', '498-', '499-', '500-', '501-', '502-', '503-', '504-', >> '505-', '506-', '507-', '508-', '509-', '510-', '511-') >> >> >> >> The hashed key is the endpoint_id % 511, which is why the splitting is >> done this way. >> >> >> >> I also added 2 secondary indexes, because I need to be able to query >> based on meter_key or endpoint_id + sample_point: >> >> >> >> CREATE INDEX fma_er_keyed_gz_hashed_indexed_endpoint_include_sample_point >> on fma.er_keyed_gz_hashed_indexed_meterkey_immutable (endpoint_id) include >> (sample_point) SALT_BUCKETS = 256; --256 is max >> >> >> >> CREATE INDEX fma_er_keyed_gz_hashed_indexed_meterkey on >> fma.er_keyed_gz_hashed_indexed_meterkey_immutable (meter_key) SALT_BUCKETS >> = 256; --256 is max >> >> >> >> This all seemed to work well. >> >> >> >> Then I use the bulk import from CSV tool and imported about 1 billion >> rows. >> >> >> >> This also seemed to work. >> >> >> >> I can query by hashed_key and get immediate results. >> >> >> >> If I query by endpoint_id or meter_key, it is MUCH slower (clue 1 that >> there’s a problem), but it gives me results eventually. >> >> >> >> However, when I try to get a count, it returns 0: >> >> >> >> >> >> > select count(*) from fma.er_keyed_gz_hashed_indexed_meterkey_immutable; >> >> +------------+ >> >> | COUNT(1) | >> >> +------------+ >> >> | 0 | >> >> +------------+ >> >> 1 row selected (0.075 seconds) >> >> >> >> And when I try to do a RowCounter job, it fails completely (see below). >> >> >> >> Can anyone help me diagnose what is going on here? >> >> >> >> This is a cluster with 6 large region servers, that each have about 600 >> regions (from this and other tables). >> >> >> >> This table has about 12 columns and 1 billion rows currently. >> >> >> >> Thanks! >> >> >> >> # hbase org.apache.hadoop.hbase.mapreduce.RowCounter >> fma.er_keyed_gz_hashed_indexed_meterkey_immutable; >> >> >> >> >> >> 2015-07-25 06:53:36,425 DEBUG [main] util.RegionSizeCalculator: Region >> sizes calculated >> >> 2015-07-25 06:53:36,427 WARN [main] hbase.HBaseConfiguration: Config >> option "hbase.regionserver.lease.period" is deprecated. Instead, use >> "hbase.client.scanner.timeout.period" >> >> 2015-07-25 06:53:36,468 WARN [main] >> client.ConnectionManager$HConnectionImplementation: Encountered problems >> when prefetch hbase:meta table: >> >> org.apache.hadoop.hbase.TableNotFoundException: Cannot find row in >> hbase:meta for table: fma.er_keyed_gz_hashed_indexed_meterkey_immutable, >> row=fma.er_keyed_gz_hashed_indexed_meterkey_immutable,,99999999999999 >> >> at >> org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:164) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.prefetchRegionCache(ConnectionManager.java:1222) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegionInMeta(ConnectionManager.java:1286) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1135) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1118) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1075) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.getRegionLocation(ConnectionManager.java:909) >> >> at >> org.apache.hadoop.hbase.client.HTable.getRegionLocation(HTable.java:528) >> >> at >> org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:165) >> >> at >> org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:597) >> >> at >> org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:614) >> >> at >> org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:492) >> >> at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1296) >> >> at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1293) >> >> at java.security.AccessController.doPrivileged(Native Method) >> >> at javax.security.auth.Subject.doAs(Subject.java:415) >> >> at >> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) >> >> at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293) >> >> at >> org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1314) >> >> at >> org.apache.hadoop.hbase.mapreduce.RowCounter.main(RowCounter.java:191) >> >> 2015-07-25 06:53:36,471 INFO [main] mapreduce.JobSubmitter: Cleaning up >> the staging area /user/root/.staging/job_1437395072897_1775 >> >> Exception in thread "main" >> org.apache.hadoop.hbase.TableNotFoundException: >> fma.er_keyed_gz_hashed_indexed_meterkey_immutable >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegionInMeta(ConnectionManager.java:1319) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1135) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1118) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1075) >> >> at >> org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.getRegionLocation(ConnectionManager.java:909) >> >> at >> org.apache.hadoop.hbase.client.HTable.getRegionLocation(HTable.java:528) >> >> at >> org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:165) >> >> at >> org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:597) >> >> at >> org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:614) >> >> at >> org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:492) >> >> at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1296) >> >> at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1293) >> >> at java.security.AccessController.doPrivileged(Native Method) >> >> at javax.security.auth.Subject.doAs(Subject.java:415) >> >> at >> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) >> >> at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293) >> >> at >> org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1314) >> >> at >> org.apache.hadoop.hbase.mapreduce.RowCounter.main(RowCounter.java:191) >> > >