Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCount.java Sat Sep 20 17:34:39 2014 @@ -46,15 +46,7 @@ public class VectorUDAFCount extends Vec private static final long serialVersionUID = 1L; - transient private long value; - transient private boolean isNull; - - public void initIfNull() { - if (isNull) { - isNull = false; - value = 0; - } - } + transient private long count; @Override public int getVariableSize() { @@ -63,8 +55,7 @@ public class VectorUDAFCount extends Vec @Override public void reset() { - isNull = true; - value = 0L; + count = 0L; } } @@ -131,8 +122,7 @@ public class VectorUDAFCount extends Vec aggregationBufferSets, aggregateIndex, i); - myagg.initIfNull(); - myagg.value++; + myagg.count++; } } @@ -148,8 +138,7 @@ public class VectorUDAFCount extends Vec aggregationBufferSets, aggregateIndex, i); - myagg.initIfNull(); - myagg.value++; + myagg.count++; } } } @@ -168,8 +157,7 @@ public class VectorUDAFCount extends Vec aggregationBufferSets, aggregateIndex, j); - myagg.initIfNull(); - myagg.value++; + myagg.count++; } } } @@ -191,17 +179,15 @@ public class VectorUDAFCount extends Vec Aggregation myagg = (Aggregation)agg; - myagg.initIfNull(); - if (inputVector.isRepeating) { if (inputVector.noNulls || !inputVector.isNull[0]) { - myagg.value += batchSize; + myagg.count += batchSize; } return; } if (inputVector.noNulls) { - myagg.value += batchSize; + myagg.count += batchSize; return; } else if (!batch.selectedInUse) { @@ -221,7 +207,7 @@ public class VectorUDAFCount extends Vec for (int j=0; j< batchSize; ++j) { int i = selected[j]; if (!isNull[i]) { - myagg.value += 1; + myagg.count += 1; } } } @@ -233,7 +219,7 @@ public class VectorUDAFCount extends Vec for (int i=0; i< batchSize; ++i) { if (!isNull[i]) { - myagg.value += 1; + myagg.count += 1; } } } @@ -251,14 +237,9 @@ public class VectorUDAFCount extends Vec @Override public Object evaluateOutput(AggregationBuffer agg) throws HiveException { - Aggregation myagg = (Aggregation) agg; - if (myagg.isNull) { - return null; - } - else { - result.set (myagg.value); + Aggregation myagg = (Aggregation) agg; + result.set (myagg.count); return result; - } } @Override
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountStar.java Sat Sep 20 17:34:39 2014 @@ -44,8 +44,7 @@ public class VectorUDAFCountStar extends private static final long serialVersionUID = 1L; - transient private long value; - transient private boolean isNull; + transient private long count; @Override public int getVariableSize() { @@ -54,8 +53,7 @@ public class VectorUDAFCountStar extends @Override public void reset() { - isNull = true; - value = 0L; + count = 0L; } } @@ -95,8 +93,7 @@ public class VectorUDAFCountStar extends for (int i=0; i < batchSize; ++i) { Aggregation myAgg = getCurrentAggregationBuffer( aggregationBufferSets, aggregateIndex, i); - myAgg.isNull = false; - ++myAgg.value; + ++myAgg.count; } } @@ -111,8 +108,7 @@ public class VectorUDAFCountStar extends } Aggregation myagg = (Aggregation)agg; - myagg.isNull = false; - myagg.value += batchSize; + myagg.count += batchSize; } @Override @@ -128,14 +124,9 @@ public class VectorUDAFCountStar extends @Override public Object evaluateOutput(AggregationBuffer agg) throws HiveException { - Aggregation myagg = (Aggregation) agg; - if (myagg.isNull) { - return null; - } - else { - result.set (myagg.value); + Aggregation myagg = (Aggregation) agg; + result.set (myagg.count); return result; - } } @Override Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java Sat Sep 20 17:34:39 2014 @@ -305,6 +305,28 @@ public class AcidUtils { } /** + * Is the given directory in ACID format? + * @param directory the partition directory to check + * @param conf the query configuration + * @return true, if it is an ACID directory + * @throws IOException + */ + public static boolean isAcid(Path directory, + Configuration conf) throws IOException { + FileSystem fs = directory.getFileSystem(conf); + for(FileStatus file: fs.listStatus(directory)) { + String filename = file.getPath().getName(); + if (filename.startsWith(BASE_PREFIX) || + filename.startsWith(DELTA_PREFIX)) { + if (file.isDir()) { + return true; + } + } + } + return false; + } + + /** * Get the ACID state of the given directory. It finds the minimal set of * base and diff directories. Note that because major compactions don't * preserve the history, we can't use a base directory that includes a Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Sat Sep 20 17:34:39 2014 @@ -122,24 +122,7 @@ public class BucketizedHiveInputFormat<K public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { init(job); - Path[] dirs = FileInputFormat.getInputPaths(job); - if (dirs.length == 0) { - // on tez we're avoiding to duplicate the file info in FileInputFormat. - if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { - try { - List<Path> paths = Utilities.getInputPathsTez(job, mrwork); - dirs = paths.toArray(new Path[paths.size()]); - if (dirs.length == 0) { - // if we still don't have any files it's time to fail. - throw new IOException("No input paths specified in job"); - } - } catch (Exception e) { - throw new IOException("Could not create input paths", e); - } - } else { - throw new IOException("No input paths specified in job"); - } - } + Path[] dirs = getInputPaths(job); JobConf newjob = new JobConf(job); ArrayList<InputSplit> result = new ArrayList<InputSplit>(); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Sat Sep 20 17:34:39 2014 @@ -33,6 +33,7 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -264,8 +265,8 @@ public class CombineHiveInputFormat<K ex /** * Create Hive splits based on CombineFileSplit. */ - @Override - public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { + private InputSplit[] getCombineSplits(JobConf job, + int numSplits) throws IOException { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); init(job); @@ -274,17 +275,6 @@ public class CombineHiveInputFormat<K ex mrwork.getAliasToWork(); CombineFileInputFormatShim combine = ShimLoader.getHadoopShims() .getCombineFileInputFormat(); - - // on tez we're avoiding duplicating path info since the info will go over - // rpc - if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { - try { - List<Path> dirs = Utilities.getInputPathsTez(job, mrwork); - Utilities.setInputPaths(job, dirs); - } catch (Exception e) { - throw new IOException("Could not create input paths", e); - } - } InputSplit[] splits = null; if (combine == null) { @@ -327,13 +317,6 @@ public class CombineHiveInputFormat<K ex // ignore } FileSystem inpFs = path.getFileSystem(job); - if (inputFormatClass.isAssignableFrom(OrcInputFormat.class)) { - if (inpFs.exists(new Path(path, OrcRecordUpdater.ACID_FORMAT))) { - throw new IOException("CombineHiveInputFormat is incompatible " + - " with ACID tables. Please set hive.input.format=" + - "org.apache.hadoop.hive.ql.io.HiveInputFormat"); - } - } // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not, // we use a configuration variable for the same @@ -461,6 +444,84 @@ public class CombineHiveInputFormat<K ex return result.toArray(new CombineHiveInputSplit[result.size()]); } + /** + * Create Hive splits based on CombineFileSplit. + */ + @Override + public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { + init(job); + Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases(); + Map<String, Operator<? extends OperatorDesc>> aliasToWork = + mrwork.getAliasToWork(); + + ArrayList<InputSplit> result = new ArrayList<InputSplit>(); + + Path[] paths = getInputPaths(job); + + List<Path> nonCombinablePaths = new ArrayList<Path>(paths.length / 2); + List<Path> combinablePaths = new ArrayList<Path>(paths.length / 2); + + for (Path path : paths) { + + PartitionDesc part = + HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, path, + IOPrepareCache.get().allocatePartitionDescMap()); + + // Use HiveInputFormat if any of the paths is not splittable + Class inputFormatClass = part.getInputFileFormatClass(); + String inputFormatClassName = inputFormatClass.getName(); + InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); + if (inputFormat instanceof AvoidSplitCombination && + ((AvoidSplitCombination) inputFormat).shouldSkipCombine(path, job)) { + if (LOG.isDebugEnabled()) { + LOG.debug("The split [" + path + + "] is being parked for HiveInputFormat.getSplits"); + } + nonCombinablePaths.add(path); + } else { + combinablePaths.add(path); + } + } + + // Store the previous value for the path specification + String oldPaths = job.get(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname); + if (LOG.isDebugEnabled()) { + LOG.debug("The received input paths are: [" + oldPaths + + "] against the property " + + HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname); + } + + // Process the normal splits + if (nonCombinablePaths.size() > 0) { + FileInputFormat.setInputPaths(job, nonCombinablePaths.toArray + (new Path[nonCombinablePaths.size()])); + InputSplit[] splits = super.getSplits(job, numSplits); + for (InputSplit split : splits) { + result.add(split); + } + } + + // Process the combine splits + if (combinablePaths.size() > 0) { + FileInputFormat.setInputPaths(job, combinablePaths.toArray + (new Path[combinablePaths.size()])); + InputSplit[] splits = getCombineSplits(job, numSplits); + for (InputSplit split : splits) { + result.add(split); + } + } + + // Restore the old path information back + // This is just to prevent incompatibilities with previous versions Hive + // if some application depends on the original value being set. + if (oldPaths != null) { + job.set(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname, oldPaths); + } + LOG.info("Number of all splits " + result.size()); + return result.toArray(new InputSplit[result.size()]); + } + private void processPaths(JobConf job, CombineFileInputFormatShim combine, List<InputSplitShim> iss, Path... path) throws IOException { JobConf currJob = new JobConf(job); @@ -635,4 +696,12 @@ public class CombineHiveInputFormat<K ex return s.toString(); } } + + /** + * This is a marker interface that is used to identify the formats where + * combine split generation is not applicable + */ + public interface AvoidSplitCombination { + boolean shouldSkipCombine(Path path, Configuration conf) throws IOException; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Sat Sep 20 17:34:39 2014 @@ -295,11 +295,7 @@ public class HiveInputFormat<K extends W } } - public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { - PerfLogger perfLogger = PerfLogger.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); - init(job); - + Path[] getInputPaths(JobConf job) throws IOException { Path[] dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { // on tez we're avoiding to duplicate the file info in FileInputFormat. @@ -314,6 +310,14 @@ public class HiveInputFormat<K extends W throw new IOException("No input paths specified in job"); } } + return dirs; + } + + public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { + PerfLogger perfLogger = PerfLogger.getPerfLogger(); + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); + init(job); + Path[] dirs = getInputPaths(job); JobConf newjob = new JobConf(job); List<InputSplit> result = new ArrayList<InputSplit>(); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java Sat Sep 20 17:34:39 2014 @@ -21,6 +21,8 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.EnumSet; +import javax.annotation.Nullable; + interface CompressionCodec { public enum Modifier { @@ -62,6 +64,6 @@ interface CompressionCodec { * @param modifiers compression modifiers * @return codec for use after optional modification */ - CompressionCodec modify(EnumSet<Modifier> modifiers); + CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sat Sep 20 17:34:39 2014 @@ -24,6 +24,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; +import java.util.NavigableMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -46,6 +48,7 @@ import org.apache.hadoop.hive.ql.exec.ve import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; @@ -99,7 +102,8 @@ import com.google.common.util.concurrent */ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, InputFormatChecker, VectorizedInputFormatInterface, - AcidInputFormat<NullWritable, OrcStruct> { + AcidInputFormat<NullWritable, OrcStruct>, + CombineHiveInputFormat.AvoidSplitCombination { private static final Log LOG = LogFactory.getLog(OrcInputFormat.class); static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); @@ -125,6 +129,12 @@ public class OrcInputFormat implements */ private static final double MIN_INCLUDED_LOCATION = 0.80; + @Override + public boolean shouldSkipCombine(Path path, + Configuration conf) throws IOException { + return AcidUtils.isAcid(path, conf); + } + private static class OrcRecordReader implements org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>, StatsProvidingRecordReader { @@ -610,7 +620,7 @@ public class OrcInputFormat implements private final FileSystem fs; private final FileStatus file; private final long blockSize; - private final BlockLocation[] locations; + private final TreeMap<Long, BlockLocation> locations; private final FileInfo fileInfo; private List<StripeInformation> stripes; private ReaderImpl.FileMetaInfo fileMetaInfo; @@ -630,7 +640,7 @@ public class OrcInputFormat implements this.file = file; this.blockSize = file.getBlockSize(); this.fileInfo = fileInfo; - locations = SHIMS.getLocations(fs, file); + locations = SHIMS.getLocationsWithOffset(fs, file); this.isOriginal = isOriginal; this.deltas = deltas; this.hasBase = hasBase; @@ -641,8 +651,8 @@ public class OrcInputFormat implements } void schedule() throws IOException { - if(locations.length == 1 && file.getLen() < context.maxSize) { - String[] hosts = locations[0].getHosts(); + if(locations.size() == 1 && file.getLen() < context.maxSize) { + String[] hosts = locations.firstEntry().getValue().getHosts(); synchronized (context.splits) { context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(), hosts, fileMetaInfo, isOriginal, hasBase, deltas)); @@ -690,15 +700,22 @@ public class OrcInputFormat implements void createSplit(long offset, long length, ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException { String[] hosts; - if ((offset % blockSize) + length <= blockSize) { + Map.Entry<Long, BlockLocation> startEntry = locations.floorEntry(offset); + BlockLocation start = startEntry.getValue(); + if (offset + length <= start.getOffset() + start.getLength()) { // handle the single block case - hosts = locations[(int) (offset / blockSize)].getHosts(); + hosts = start.getHosts(); } else { + Map.Entry<Long, BlockLocation> endEntry = locations.floorEntry(offset + length); + BlockLocation end = endEntry.getValue(); + //get the submap + NavigableMap<Long, BlockLocation> navigableMap = locations.subMap(startEntry.getKey(), + true, endEntry.getKey(), true); // Calculate the number of bytes in the split that are local to each // host. Map<String, LongWritable> sizes = new HashMap<String, LongWritable>(); long maxSize = 0; - for(BlockLocation block: locations) { + for (BlockLocation block : navigableMap.values()) { long overlap = getOverlap(offset, length, block.getOffset(), block.getLength()); if (overlap > 0) { @@ -711,6 +728,9 @@ public class OrcInputFormat implements val.set(val.get() + overlap); maxSize = Math.max(maxSize, val.get()); } + } else { + throw new IOException("File " + file.getPath().toString() + + " should have had overlap on block starting at " + block.getOffset()); } } // filter the list of locations to those that have at least 80% of the @@ -718,7 +738,7 @@ public class OrcInputFormat implements long threshold = (long) (maxSize * MIN_INCLUDED_LOCATION); List<String> hostList = new ArrayList<String>(); // build the locations in a predictable order to simplify testing - for(BlockLocation block: locations) { + for(BlockLocation block: navigableMap.values()) { for(String host: block.getHosts()) { if (sizes.containsKey(host)) { if (sizes.get(host).get() >= threshold) { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Sep 20 17:34:39 2014 @@ -77,6 +77,7 @@ import com.google.common.collect.Compari class RecordReaderImpl implements RecordReader { private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class); + private static final boolean isLogTraceEnabled = LOG.isTraceEnabled(); private final FSDataInputStream file; private final long firstRow; @@ -3133,9 +3134,9 @@ class RecordReaderImpl implements Record // find the next row rowInStripe += 1; advanceToNextRow(rowInStripe + rowBaseInStripe); - if (LOG.isDebugEnabled()) { - LOG.debug("row from " + reader.path); - LOG.debug("orc row = " + result); + if (isLogTraceEnabled) { + LOG.trace("row from " + reader.path); + LOG.trace("orc row = " + result); } return result; } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sat Sep 20 17:34:39 2014 @@ -485,6 +485,7 @@ class WriterImpl implements Writer, Memo modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY); break; default: + LOG.warn("Missing ORC compression modifiers for " + kind); modifiers = null; break; } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java Sat Sep 20 17:34:39 2014 @@ -24,6 +24,8 @@ import java.util.zip.DataFormatException import java.util.zip.Deflater; import java.util.zip.Inflater; +import javax.annotation.Nullable; + import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType; import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim; @@ -130,7 +132,12 @@ class ZlibCodec implements CompressionCo } @Override - public CompressionCodec modify(EnumSet<Modifier> modifiers) { + public CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers) { + + if (modifiers == null) { + return this; + } + int l = this.level; int s = this.strategy; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java Sat Sep 20 17:34:39 2014 @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.lockmgr import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData; import org.apache.hadoop.hive.ql.metadata.*; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.KeeperException; @@ -73,31 +74,6 @@ public class ZooKeeperHiveLockManager im } /** - * @param conf The hive configuration - * Get the quorum server address from the configuration. The format is: - * host1:port, host2:port.. - **/ - @VisibleForTesting - static String getQuorumServers(HiveConf conf) { - String[] hosts = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM).split(","); - String port = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT); - StringBuilder quorum = new StringBuilder(); - for(int i=0; i<hosts.length; i++) { - quorum.append(hosts[i].trim()); - if (!hosts[i].contains(":")) { - // if the hostname doesn't contain a port, add the configured port to hostname - quorum.append(":"); - quorum.append(port); - } - - if (i != hosts.length-1) - quorum.append(","); - } - - return quorum.toString(); - } - - /** * @param ctx The lock manager context (containing the Hive configuration file) * Start the ZooKeeper client based on the zookeeper cluster specified in the conf. **/ @@ -105,7 +81,7 @@ public class ZooKeeperHiveLockManager im this.ctx = ctx; HiveConf conf = ctx.getConf(); sessionTimeout = conf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT); - quorumServers = ZooKeeperHiveLockManager.getQuorumServers(conf); + quorumServers = ZooKeeperHiveHelper.getQuorumServers(conf); sleepTime = conf.getTimeVar( HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES, TimeUnit.MILLISECONDS); @@ -146,7 +122,7 @@ public class ZooKeeperHiveLockManager im return; } - zooKeeper = new ZooKeeper(quorumServers, sessionTimeout, new DummyWatcher()); + zooKeeper = new ZooKeeper(quorumServers, sessionTimeout, new ZooKeeperHiveHelper.DummyWatcher()); } /** @@ -517,8 +493,8 @@ public class ZooKeeperHiveLockManager im ZooKeeper zkpClient = null; try { int sessionTimeout = conf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT); - String quorumServers = getQuorumServers(conf); - Watcher dummyWatcher = new DummyWatcher(); + String quorumServers = ZooKeeperHiveHelper.getQuorumServers(conf); + Watcher dummyWatcher = new ZooKeeperHiveHelper.DummyWatcher(); zkpClient = new ZooKeeper(quorumServers, sessionTimeout, dummyWatcher); String parent = conf.getVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_NAMESPACE); List<HiveLock> locks = getLocks(conf, zkpClient, null, parent, false, false); @@ -629,7 +605,8 @@ public class ZooKeeperHiveLockManager im if (fetchData) { try { - data = new HiveLockObjectData(new String(zkpClient.getData(curChild, new DummyWatcher(), null))); + data = new HiveLockObjectData(new String(zkpClient.getData(curChild, + new ZooKeeperHiveHelper.DummyWatcher(), null))); data.setClientIp(clientIp); } catch (Exception e) { LOG.error("Error in getting data for " + curChild, e); @@ -789,11 +766,6 @@ public class ZooKeeperHiveLockManager im return null; } - public static class DummyWatcher implements Watcher { - public void process(org.apache.zookeeper.WatchedEvent event) { - } - } - @Override public void prepareRetry() throws LockException { try { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java Sat Sep 20 17:34:39 2014 @@ -193,11 +193,12 @@ public class StatsOptimizer implements T } SelectOperator selOp = (SelectOperator)tsOp.getChildren().get(0); for(ExprNodeDesc desc : selOp.getConf().getColList()) { - if (!(desc instanceof ExprNodeColumnDesc)) { + if (!((desc instanceof ExprNodeColumnDesc) || (desc instanceof ExprNodeConstantDesc))) { // Probably an expression, cant handle that return null; } } + Map<String, ExprNodeDesc> exprMap = selOp.getColumnExprMap(); // Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS // we need not to do any instanceof checks for following. GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0); @@ -215,6 +216,12 @@ public class StatsOptimizer implements T return null; } + for(ExprNodeDesc desc : selOp.getConf().getColList()) { + if (!(desc instanceof ExprNodeColumnDesc)) { + // Probably an expression, cant handle that + return null; + } + } FileSinkOperator fsOp = (FileSinkOperator)(selOp.getChildren().get(0)); if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) { // looks like a subq plan. @@ -236,22 +243,28 @@ public class StatsOptimizer implements T GenericUDAFResolver udaf = FunctionRegistry.getGenericUDAFResolver(aggr.getGenericUDAFName()); if (udaf instanceof GenericUDAFSum) { - if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){ + ExprNodeDesc desc = aggr.getParameters().get(0); + String constant; + if (desc instanceof ExprNodeConstantDesc) { + constant = ((ExprNodeConstantDesc) desc).getValue().toString(); + } else if (desc instanceof ExprNodeColumnDesc && exprMap.get(((ExprNodeColumnDesc)desc).getColumn()) instanceof ExprNodeConstantDesc) { + constant = ((ExprNodeConstantDesc)exprMap.get(((ExprNodeColumnDesc)desc).getColumn())).getValue().toString(); + } else { return null; } Long rowCnt = getRowCnt(pctx, tsOp, tbl); if(rowCnt == null) { return null; } - oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0)) - .getValue().toString()).multiply(HiveDecimal.create(rowCnt))); + oneRow.add(HiveDecimal.create(constant).multiply(HiveDecimal.create(rowCnt))); ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( PrimitiveCategory.DECIMAL)); } else if (udaf instanceof GenericUDAFCount) { Long rowCnt = 0L; - if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof - ExprNodeConstantDesc)) { + if (aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof + ExprNodeConstantDesc || ((aggr.getParameters().get(0) instanceof ExprNodeColumnDesc) && + exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()) instanceof ExprNodeConstantDesc)) { // Its either count (*) or count(1) case rowCnt = getRowCnt(pctx, tsOp, tbl); if(rowCnt == null) { @@ -259,12 +272,7 @@ public class StatsOptimizer implements T } } else { // Its count(col) case - if (!(aggr.getParameters().get(0) instanceof ExprNodeColumnDesc)) { - // this is weird, we got expr or something in there, bail out - Log.debug("Unexpected expression : " + aggr.getParameters().get(0)); - return null; - } - ExprNodeColumnDesc desc = (ExprNodeColumnDesc)aggr.getParameters().get(0); + ExprNodeColumnDesc desc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()); String colName = desc.getColumn(); StatType type = getType(desc.getTypeString()); if(!tbl.isPartitioned()) { @@ -330,7 +338,7 @@ public class StatsOptimizer implements T ois.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); } else if (udaf instanceof GenericUDAFMax) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()); String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if(!tbl.isPartitioned()) { @@ -419,7 +427,7 @@ public class StatsOptimizer implements T } } } else if (udaf instanceof GenericUDAFMin) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()); String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if (!tbl.isPartitioned()) { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Sat Sep 20 17:34:39 2014 @@ -115,6 +115,10 @@ public abstract class BaseSemanticAnalyz protected LineageInfo linfo; protected TableAccessInfo tableAccessInfo; protected ColumnAccessInfo columnAccessInfo; + /** + * Columns accessed by updates + */ + protected ColumnAccessInfo updateColumnAccessInfo; public boolean skipAuthorization() { @@ -866,6 +870,14 @@ public abstract class BaseSemanticAnalyz this.columnAccessInfo = columnAccessInfo; } + public ColumnAccessInfo getUpdateColumnAccessInfo() { + return updateColumnAccessInfo; + } + + public void setUpdateColumnAccessInfo(ColumnAccessInfo updateColumnAccessInfo) { + this.updateColumnAccessInfo = updateColumnAccessInfo; + } + protected LinkedHashMap<String, String> extractPartitionSpecs(Tree partspec) throws SemanticException { LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>(); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java Sat Sep 20 17:34:39 2014 @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -54,4 +56,21 @@ public class ColumnAccessInfo { } return mapping; } + + /** + * Strip a virtual column out of the set of columns. This is useful in cases where we do not + * want to be checking against the user reading virtual columns, namely update and delete. + * @param vc + */ + public void stripVirtualColumn(VirtualColumn vc) { + for (Map.Entry<String, Set<String>> e : tableToColumnAccessMap.entrySet()) { + for (String columnName : e.getValue()) { + if (vc.getName().equalsIgnoreCase(columnName)) { + e.getValue().remove(columnName); + break; + } + } + } + + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Sat Sep 20 17:34:39 2014 @@ -1145,7 +1145,10 @@ public class DDLSemanticAnalyzer extends } } - inputs.add(new ReadEntity(getTable(tableName))); + Table tbl = getTable(tableName, false); + if (tbl != null) { + inputs.add(new ReadEntity(getTable(tableName))); + } DropIndexDesc dropIdxDesc = new DropIndexDesc(indexName, tableName); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Sat Sep 20 17:34:39 2014 @@ -1530,8 +1530,8 @@ principalSpecification principalName @init {pushMsg("user|group|role name", state);} @after {popMsg(state);} - : KW_USER identifier -> ^(TOK_USER identifier) - | KW_GROUP identifier -> ^(TOK_GROUP identifier) + : KW_USER principalIdentifier -> ^(TOK_USER principalIdentifier) + | KW_GROUP principalIdentifier -> ^(TOK_GROUP principalIdentifier) | KW_ROLE identifier -> ^(TOK_ROLE identifier) ; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Sat Sep 20 17:34:39 2014 @@ -536,6 +536,13 @@ functionIdentifier identifier ; +principalIdentifier +@init { gParent.pushMsg("identifier for principal spec", state); } +@after { gParent.popMsg(state); } + : identifier + | QuotedIdentifier + ; + nonReserved : KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_AN ALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION | KW_VALUES Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sat Sep 20 17:34:39 2014 @@ -47,7 +47,6 @@ import org.apache.hadoop.fs.FSDataOutput import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; @@ -805,7 +804,8 @@ public class SemanticAnalyzer extends Ba return PlanUtils.stripQuotes(expr.getText()); case HiveParser.KW_FALSE: - return "FALSE"; + // UDFToBoolean casts any non-empty string to true, so set this to false + return ""; case HiveParser.KW_TRUE: return "TRUE"; @@ -813,6 +813,10 @@ public class SemanticAnalyzer extends Ba case HiveParser.MINUS: return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0)); + case HiveParser.TOK_NULL: + // Hive's text input will translate this as a null + return "\\N"; + default: throw new SemanticException("Expression of type " + expr.getText() + " not supported in insert/values"); @@ -5866,7 +5870,7 @@ public class SemanticAnalyzer extends Ba if (!isNonNativeTable) { AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + checkAcidConstraints(qb, table_desc); } ltd = new LoadTableDesc(queryTmpdir,table_desc, dpCtx, acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -5973,7 +5977,7 @@ public class SemanticAnalyzer extends Ba dest_part.isStoredAsSubDirectories(), conf); AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass()); if (acidOp != AcidUtils.Operation.NOT_ACID) { - checkIfAcidAndOverwriting(qb, table_desc); + checkAcidConstraints(qb, table_desc); } ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -6233,15 +6237,25 @@ public class SemanticAnalyzer extends Ba return output; } - // Check if we are overwriting any tables. If so, throw an exception as that is not allowed - // when using an Acid compliant txn manager and operating on an acid table. - private void checkIfAcidAndOverwriting(QB qb, TableDesc tableDesc) throws SemanticException { + // Check constraints on acid tables. This includes + // * no insert overwrites + // * no use of vectorization + // * turns off reduce deduplication optimization, as that sometimes breaks acid + // This method assumes you have already decided that this is an Acid write. Don't call it if + // that isn't true. + private void checkAcidConstraints(QB qb, TableDesc tableDesc) throws SemanticException { String tableName = tableDesc.getTableName(); if (!qb.getParseInfo().isInsertIntoTable(tableName)) { LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } - + if (conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) { + LOG.info("Turning off vectorization for acid write operation"); + conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false); + } + LOG.info("Modifying config values for ACID write"); + conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, false); + conf.setBoolVar(ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true); } /** Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Sat Sep 20 17:34:39 2014 @@ -751,12 +751,10 @@ public final class TypeCheckProcFactory if (myt.getCategory() == Category.LIST) { // Only allow integer index for now - if (!(children.get(1) instanceof ExprNodeConstantDesc) - || !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() - .equals(TypeInfoFactory.intTypeInfo))) { + if (!FunctionRegistry.implicitConvertible(children.get(1).getTypeInfo(), + TypeInfoFactory.intTypeInfo)) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg())); + expr, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); } // Calculate TypeInfo @@ -764,14 +762,8 @@ public final class TypeCheckProcFactory desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry .getGenericUDFForIndex(), children); } else if (myt.getCategory() == Category.MAP) { - // Only allow constant map key for now - if (!(children.get(1) instanceof ExprNodeConstantDesc)) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg())); - } - if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() - .equals(((MapTypeInfo) myt).getMapKeyTypeInfo()))) { + if (!FunctionRegistry.implicitConvertible(children.get(1).getTypeInfo(), + ((MapTypeInfo) myt).getMapKeyTypeInfo())) { throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE .getMsg(expr)); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java Sat Sep 20 17:34:39 2014 @@ -28,11 +28,13 @@ import org.apache.hadoop.hive.ql.io.Acid import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.session.SessionState; import java.io.IOException; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -127,7 +129,9 @@ public class UpdateDeleteSemanticAnalyze try { mTable = db.getTable(tableName[0], tableName[1]); } catch (HiveException e) { - throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e); + LOG.error("Failed to find table " + getDotName(tableName) + " got exception " + + e.getMessage()); + throw new SemanticException(ErrorMsg.INVALID_TABLE, getDotName(tableName)); } List<FieldSchema> partCols = mTable.getPartCols(); @@ -148,6 +152,8 @@ public class UpdateDeleteSemanticAnalyze rewrittenQueryStr.append(" select ROW__ID"); Map<Integer, ASTNode> setColExprs = null; + Map<String, ASTNode> setCols = null; + Set<String> setRCols = new HashSet<String>(); if (updating()) { // An update needs to select all of the columns, as we rewrite the entire row. Also, // we need to figure out which columns we are going to replace. We won't write the set @@ -160,7 +166,7 @@ public class UpdateDeleteSemanticAnalyze // Get the children of the set clause, each of which should be a column assignment List<? extends Node> assignments = setClause.getChildren(); - Map<String, ASTNode> setCols = new HashMap<String, ASTNode>(assignments.size()); + setCols = new HashMap<String, ASTNode>(assignments.size()); setColExprs = new HashMap<Integer, ASTNode>(assignments.size()); for (Node a : assignments) { ASTNode assignment = (ASTNode)a; @@ -173,6 +179,8 @@ public class UpdateDeleteSemanticAnalyze assert colName.getToken().getType() == HiveParser.Identifier : "Expected column name"; + addSetRCols((ASTNode) assignment.getChildren().get(1), setRCols); + String columnName = colName.getText(); // Make sure this isn't one of the partitioning columns, that's not supported. @@ -323,6 +331,28 @@ public class UpdateDeleteSemanticAnalyze WriteEntity.WriteType.UPDATE); } } + + // For updates, we need to set the column access info so that it contains information on + // the columns we are updating. + if (updating()) { + ColumnAccessInfo cai = new ColumnAccessInfo(); + for (String colName : setCols.keySet()) { + cai.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName); + } + setUpdateColumnAccessInfo(cai); + + // Add the setRCols to the input list + for (String colName : setRCols) { + columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), + colName); + } + } + + // We need to weed ROW__ID out of the input column info, as it doesn't make any sense to + // require the user to have authorization on that column. + if (columnAccessInfo != null) { + columnAccessInfo.stripVirtualColumn(VirtualColumn.ROWID); + } } private String operation() { @@ -342,4 +372,22 @@ public class UpdateDeleteSemanticAnalyze } return false; } + + // This method find any columns on the right side of a set statement (thus rcols) and puts them + // in a set so we can add them to the list of input cols to check. + private void addSetRCols(ASTNode node, Set<String> setRCols) { + + // See if this node is a TOK_TABLE_OR_COL. If so, find the value and put it in the list. If + // not, recurse on any children + if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) { + ASTNode colName = (ASTNode)node.getChildren().get(0); + assert colName.getToken().getType() == HiveParser.Identifier : + "Expected column name"; + setRCols.add(colName.getText()); + } else if (node.getChildren() != null) { + for (Node n : node.getChildren()) { + addSetRCols(node, setRCols); + } + } + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java Sat Sep 20 17:34:39 2014 @@ -21,6 +21,9 @@ package org.apache.hadoop.hive.ql.proces import java.util.Arrays; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; @@ -31,6 +34,7 @@ import org.apache.hadoop.hive.ql.session import com.google.common.base.Joiner; class CommandUtil { + public static final Log LOG = LogFactory.getLog(CommandUtil.class); /** * Authorize command of given type and arguments @@ -47,14 +51,19 @@ class CommandUtil { // ss can be null in unit tests return null; } - if (ss.isAuthorizationModeV2()) { + + if (ss.isAuthorizationModeV2() && + HiveConf.getBoolVar(ss.getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + String errMsg = "Error authorizing command " + command; try { authorizeCommandThrowEx(ss, type, command); // authorized to perform action return null; } catch (HiveAuthzPluginException e) { + LOG.error(errMsg, e); return CommandProcessorResponse.create(e); } catch (HiveAccessControlException e) { + LOG.error(errMsg, e); return CommandProcessorResponse.create(e); } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java Sat Sep 20 17:34:39 2014 @@ -310,9 +310,12 @@ public class AuthorizationUtils { return HivePrivObjectActionType.INSERT; case INSERT_OVERWRITE: return HivePrivObjectActionType.INSERT_OVERWRITE; + case UPDATE: + return HivePrivObjectActionType.UPDATE; + case DELETE: + return HivePrivObjectActionType.DELETE; default: - // Ignore other types for purposes of authorization, we are interested only - // in INSERT vs INSERT_OVERWRITE as of now + // Ignore other types for purposes of authorization break; } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java Sat Sep 20 17:34:39 2014 @@ -81,7 +81,7 @@ public class HivePrivilegeObject impleme GLOBAL, DATABASE, TABLE_OR_VIEW, PARTITION, COLUMN, LOCAL_URI, DFS_URI, COMMAND_PARAMS, FUNCTION } ; public enum HivePrivObjectActionType { - OTHER, INSERT, INSERT_OVERWRITE + OTHER, INSERT, INSERT_OVERWRITE, UPDATE, DELETE }; private final HivePrivilegeObjectType type; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java Sat Sep 20 17:34:39 2014 @@ -118,6 +118,7 @@ public class Operation2Privilege { private static SQLPrivTypeGrant[] ADMIN_PRIV_AR = arr(SQLPrivTypeGrant.ADMIN_PRIV); private static SQLPrivTypeGrant[] INS_NOGRANT_AR = arr(SQLPrivTypeGrant.INSERT_NOGRANT); private static SQLPrivTypeGrant[] DEL_NOGRANT_AR = arr(SQLPrivTypeGrant.DELETE_NOGRANT); + private static SQLPrivTypeGrant[] UPD_NOGRANT_AR = arr(SQLPrivTypeGrant.UPDATE_NOGRANT); private static SQLPrivTypeGrant[] OWNER_INS_SEL_DEL_NOGRANT_AR = arr(SQLPrivTypeGrant.OWNER_PRIV, SQLPrivTypeGrant.INSERT_NOGRANT, @@ -287,8 +288,14 @@ public class Operation2Privilege { op2Priv.put(HiveOperationType.QUERY, arr( new PrivRequirement(SEL_NOGRANT_AR, IOType.INPUT), - new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, null), - new PrivRequirement(DEL_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.INSERT_OVERWRITE) + new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.INSERT), + new PrivRequirement( + arr(SQLPrivTypeGrant.INSERT_NOGRANT, SQLPrivTypeGrant.DELETE_NOGRANT), + IOType.OUTPUT, + HivePrivObjectActionType.INSERT_OVERWRITE), + new PrivRequirement(DEL_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.DELETE), + new PrivRequirement(UPD_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.UPDATE), + new PrivRequirement(INS_NOGRANT_AR, IOType.OUTPUT, HivePrivObjectActionType.OTHER) ) ); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java Sat Sep 20 17:34:39 2014 @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Database; @@ -53,6 +54,8 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilege; @@ -455,4 +458,23 @@ public class SQLAuthorizationUtils { return hivePrincipals; } + /** + * Change the session context based on configuration to aid in testing of sql + * std auth + * + * @param ctx + * @param conf + * @return + */ + static HiveAuthzSessionContext applyTestSettings(HiveAuthzSessionContext ctx, HiveConf conf) { + if (conf.getBoolVar(ConfVars.HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE) + && ctx.getClientType() == CLIENT_TYPE.HIVECLI) { + // create new session ctx object with HS2 as client type + HiveAuthzSessionContext.Builder ctxBuilder = new HiveAuthzSessionContext.Builder(ctx); + ctxBuilder.setClientType(CLIENT_TYPE.HIVESERVER2); + return ctxBuilder.build(); + } + return ctx; + } + } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAccessController.java Sat Sep 20 17:34:39 2014 @@ -90,42 +90,11 @@ public class SQLStdHiveAccessController HiveAuthenticationProvider authenticator, HiveAuthzSessionContext ctx) throws HiveAuthzPluginException { this.metastoreClientFactory = metastoreClientFactory; this.authenticator = authenticator; - this.sessionCtx = applyTestSettings(ctx, conf); - - assertHiveCliAuthDisabled(conf); - initUserRoles(); + this.sessionCtx = SQLAuthorizationUtils.applyTestSettings(ctx, conf); LOG.info("Created SQLStdHiveAccessController for session context : " + sessionCtx); } /** - * Change the session context based on configuration to aid in testing of sql std auth - * @param ctx - * @param conf - * @return - */ - private HiveAuthzSessionContext applyTestSettings(HiveAuthzSessionContext ctx, HiveConf conf) { - if(conf.getBoolVar(ConfVars.HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE) && - ctx.getClientType() == CLIENT_TYPE.HIVECLI - ){ - // create new session ctx object with HS2 as client type - HiveAuthzSessionContext.Builder ctxBuilder = new HiveAuthzSessionContext.Builder(ctx); - ctxBuilder.setClientType(CLIENT_TYPE.HIVESERVER2); - return ctxBuilder.build(); - } - return ctx; - } - - private void assertHiveCliAuthDisabled(HiveConf conf) throws HiveAuthzPluginException { - if (sessionCtx.getClientType() == CLIENT_TYPE.HIVECLI - && conf.getBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - throw new HiveAuthzPluginException( - "SQL standards based authorization should not be enabled from hive cli" - + "Instead the use of storage based authorization in hive metastore is reccomended. Set " - + ConfVars.HIVE_AUTHORIZATION_ENABLED.varname + "=false to disable authz within cli"); - } - } - - /** * (Re-)initialize currentRoleNames if necessary. * @throws HiveAuthzPluginException */ Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java Sat Sep 20 17:34:39 2014 @@ -25,12 +25,15 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationValidator; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactory; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrincipal; @@ -44,16 +47,30 @@ public class SQLStdHiveAuthorizationVali private final HiveConf conf; private final HiveAuthenticationProvider authenticator; private final SQLStdHiveAccessControllerWrapper privController; + private final HiveAuthzSessionContext ctx; public static final Log LOG = LogFactory.getLog(SQLStdHiveAuthorizationValidator.class); public SQLStdHiveAuthorizationValidator(HiveMetastoreClientFactory metastoreClientFactory, HiveConf conf, HiveAuthenticationProvider authenticator, - SQLStdHiveAccessControllerWrapper privilegeManager) { + SQLStdHiveAccessControllerWrapper privilegeManager, HiveAuthzSessionContext ctx) + throws HiveAuthzPluginException { this.metastoreClientFactory = metastoreClientFactory; this.conf = conf; this.authenticator = authenticator; this.privController = privilegeManager; + this.ctx = SQLAuthorizationUtils.applyTestSettings(ctx, conf); + assertHiveCliAuthDisabled(conf); + } + + private void assertHiveCliAuthDisabled(HiveConf conf) throws HiveAuthzPluginException { + if (ctx.getClientType() == CLIENT_TYPE.HIVECLI + && conf.getBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + throw new HiveAuthzPluginException( + "SQL standards based authorization should not be enabled from hive cli" + + "Instead the use of storage based authorization in hive metastore is reccomended. Set " + + ConfVars.HIVE_AUTHORIZATION_ENABLED.varname + "=false to disable authz within cli"); + } } @Override Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java Sat Sep 20 17:34:39 2014 @@ -37,7 +37,7 @@ public class SQLStdHiveAuthorizerFactory return new HiveAuthorizerImpl( privilegeManager, new SQLStdHiveAuthorizationValidator(metastoreClientFactory, conf, authenticator, - privilegeManager) + privilegeManager, ctx) ); } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Sat Sep 20 17:34:39 2014 @@ -519,16 +519,17 @@ public class SessionState { */ private Path createRootHDFSDir(HiveConf conf) throws IOException { Path rootHDFSDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR)); - FsPermission expectedHDFSDirPermission = new FsPermission("777"); + FsPermission writableHDFSDirPermission = new FsPermission((short)00733); FileSystem fs = rootHDFSDirPath.getFileSystem(conf); if (!fs.exists(rootHDFSDirPath)) { - Utilities.createDirsWithPermission(conf, rootHDFSDirPath, expectedHDFSDirPermission, true); + Utilities.createDirsWithPermission(conf, rootHDFSDirPath, writableHDFSDirPermission, true); } FsPermission currentHDFSDirPermission = fs.getFileStatus(rootHDFSDirPath).getPermission(); LOG.debug("HDFS root scratch dir: " + rootHDFSDirPath + ", permission: " + currentHDFSDirPermission); - // If the root HDFS scratch dir already exists, make sure the permissions are 777. - if (!expectedHDFSDirPermission.equals(fs.getFileStatus(rootHDFSDirPath).getPermission())) { + // If the root HDFS scratch dir already exists, make sure it is writeable. + if (!((currentHDFSDirPermission.toShort() & writableHDFSDirPermission + .toShort()) == writableHDFSDirPermission.toShort())) { throw new RuntimeException("The root scratch dir: " + rootHDFSDirPath + " on HDFS should be writable. Current permissions are: " + currentHDFSDirPermission); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java Sat Sep 20 17:34:39 2014 @@ -26,9 +26,11 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.IntWritable; /** * GenericUDFIndex. @@ -36,11 +38,10 @@ import org.apache.hadoop.hive.serde2.obj */ @Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ") public class GenericUDFIndex extends GenericUDF { + private transient MapObjectInspector mapOI; - private boolean mapKeyPreferWritable; private transient ListObjectInspector listOI; - private transient PrimitiveObjectInspector indexOI; - private transient ObjectInspector returnOI; + private transient ObjectInspectorConverters.Converter converter; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -66,21 +67,22 @@ public class GenericUDFIndex extends Gen } // index has to be a primitive - if (arguments[1] instanceof PrimitiveObjectInspector) { - indexOI = (PrimitiveObjectInspector) arguments[1]; - } else { + if (!(arguments[1] instanceof PrimitiveObjectInspector)) { throw new UDFArgumentTypeException(1, "Primitive Type is expected but " + arguments[1].getTypeName() + "\" is found"); } - + PrimitiveObjectInspector inputOI = (PrimitiveObjectInspector) arguments[1]; + ObjectInspector returnOI; + ObjectInspector indexOI; if (mapOI != null) { + indexOI = ObjectInspectorConverters.getConvertedOI( + inputOI, mapOI.getMapKeyObjectInspector()); returnOI = mapOI.getMapValueObjectInspector(); - ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); - mapKeyPreferWritable = ((PrimitiveObjectInspector) keyOI) - .preferWritable(); } else { + indexOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; returnOI = listOI.getListElementObjectInspector(); } + converter = ObjectInspectorConverters.getConverter(inputOI, indexOI); return returnOI; } @@ -88,35 +90,16 @@ public class GenericUDFIndex extends Gen @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { assert (arguments.length == 2); - Object main = arguments[0].get(); Object index = arguments[1].get(); + Object indexObject = converter.convert(index); + if (indexObject == null) { + return null; + } if (mapOI != null) { - - Object indexObject; - if (mapKeyPreferWritable) { - indexObject = indexOI.getPrimitiveWritableObject(index); - } else { - indexObject = indexOI.getPrimitiveJavaObject(index); - } - return mapOI.getMapValueElement(main, indexObject); - - } else { - - assert (listOI != null); - int intIndex = 0; - try { - intIndex = PrimitiveObjectInspectorUtils.getInt(index, indexOI); - } catch (NullPointerException e) { - // If index is null, we should return null. - return null; - } catch (NumberFormatException e) { - // If index is not a number, we should return null. - return null; - } - return listOI.getListElement(main, intIndex); - + return mapOI.getMapValueElement(arguments[0].get(), indexObject); } + return listOI.getListElement(arguments[0].get(), ((IntWritable)indexObject).get()); } @Override Modified: hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original) +++ hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Sat Sep 20 17:34:39 2014 @@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec; import java.lang.reflect.Method; import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -80,7 +79,7 @@ public class TestFunctionRegistry extend } private void implicit(TypeInfo a, TypeInfo b, boolean convertible) { - assertEquals(convertible, FunctionRegistry.implicitConvertable(a,b)); + assertEquals(convertible, FunctionRegistry.implicitConvertible(a, b)); } public void testImplicitConversion() { Modified: hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (original) +++ hive/branches/spark/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java Sat Sep 20 17:34:39 2014 @@ -18,17 +18,24 @@ package org.apache.hadoop.hive.ql.exec; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.CollectDesc; @@ -42,6 +49,10 @@ import org.apache.hadoop.hive.ql.plan.Pl import org.apache.hadoop.hive.ql.plan.ScriptDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.processors.CommandProcessor; +import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -49,8 +60,14 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TextInputFormat; +import org.junit.Test; /** * TestOperators. @@ -274,7 +291,7 @@ public class TestOperators extends TestC cd, sop); op.initialize(new JobConf(TestOperators.class), - new ObjectInspector[] {r[0].oi}); + new ObjectInspector[]{r[0].oi}); // evaluate on row for (int i = 0; i < 5; i++) { @@ -379,4 +396,82 @@ public class TestOperators extends TestC throw (e); } } + + @Test + public void testFetchOperatorContextQuoting() throws Exception { + JobConf conf = new JobConf(); + ArrayList<Path> list = new ArrayList<Path>(); + list.add(new Path("hdfs://nn.example.com/fi\tl\\e\t1")); + list.add(new Path("hdfs://nn.example.com/file\t2")); + list.add(new Path("file:/file3")); + FetchOperator.setFetchOperatorContext(conf, list); + String[] parts = + conf.get(FetchOperator.FETCH_OPERATOR_DIRECTORY_LIST).split("\t"); + assertEquals(3, parts.length); + assertEquals("hdfs://nn.example.com/fi\\tl\\\\e\\t1", parts[0]); + assertEquals("hdfs://nn.example.com/file\\t2", parts[1]); + assertEquals("file:/file3", parts[2]); + } + + /** + * A custom input format that checks to make sure that the fetch operator + * sets the required attributes. + */ + public static class CustomInFmt extends TextInputFormat { + + @Override + public InputSplit[] getSplits(JobConf job, int splits) throws IOException { + + // ensure that the table properties were copied + assertEquals("val1", job.get("myprop1")); + assertEquals("val2", job.get("myprop2")); + + // ensure that both of the partitions are in the complete list. + String[] dirs = job.get("hive.complete.dir.list").split("\t"); + assertEquals(2, dirs.length); + assertEquals(true, dirs[0].endsWith("/state=CA")); + assertEquals(true, dirs[1].endsWith("/state=OR")); + return super.getSplits(job, splits); + } + } + + @Test + public void testFetchOperatorContext() throws Exception { + HiveConf conf = new HiveConf(); + conf.set("hive.support.concurrency", "false"); + SessionState.start(conf); + String cmd = "create table fetchOp (id int, name string) " + + "partitioned by (state string) " + + "row format delimited fields terminated by '|' " + + "stored as " + + "inputformat 'org.apache.hadoop.hive.ql.exec.TestOperators$CustomInFmt' " + + "outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' " + + "tblproperties ('myprop1'='val1', 'myprop2' = 'val2')"; + Driver driver = new Driver(); + driver.init(); + CommandProcessorResponse response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + List<Object> result = new ArrayList<Object>(); + + cmd = "load data local inpath '../data/files/employee.dat' " + + "overwrite into table fetchOp partition (state='CA')"; + driver.init(); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + + cmd = "load data local inpath '../data/files/employee2.dat' " + + "overwrite into table fetchOp partition (state='OR')"; + driver.init(); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + + cmd = "select * from fetchOp"; + driver.init(); + driver.setMaxRows(500); + response = driver.run(cmd); + assertEquals(0, response.getResponseCode()); + driver.getResults(result); + assertEquals(20, result.size()); + driver.close(); + } }
