[ 
https://issues.apache.org/jira/browse/HAWQ-44?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15002775#comment-15002775
 ] 

ASF GitHub Bot commented on HAWQ-44:
------------------------------------

Github user shivzone commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/92#discussion_r44705379
  
    --- Diff: 
pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FragmentsStats.java ---
    @@ -0,0 +1,226 @@
    +package org.apache.hawq.pxf.api;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.codehaus.jackson.map.ObjectMapper;
    +
    +import java.io.IOException;
    +
    +/**
    + * FragmentsStats holds statistics for a given path.
    + */
    +public class FragmentsStats {
    +
    +    /**
    +     * Default fragment size. Assuming a fragment is equivalent to a block 
in
    +     * HDFS, we guess a full fragment size is 64MB.
    +     */
    +    public static final long DEFAULT_FRAGMENT_SIZE = 67108864L;
    +
    +    private static Log Log = LogFactory.getLog(FragmentsStats.class);
    +
    +    // number of fragments
    +    private long fragmentsNumber;
    +    // first fragment size
    +    private SizeAndUnit firstFragmentSize;
    +    // total fragments size
    +    private SizeAndUnit totalSize;
    +
    +    /**
    +     * Enum to represent unit (Bytes/MB/GB/TB)
    +     */
    +    public enum SizeUnit {
    +        /**
    +         * Byte
    +         */
    +        B,
    +        /**
    +         * KB
    +         */
    +        KB,
    +        /**
    +         * MB
    +         */
    +        MB,
    +        /**
    +         * GB
    +         */
    +        GB,
    +        /**
    +         * TB
    +         */
    +        TB;
    +    };
    +
    +    /**
    +     * Container for size and unit
    +     */
    +    public class SizeAndUnit {
    +        long size;
    +        SizeUnit unit;
    +
    +        /**
    +         * Default constructor.
    +         */
    +        public SizeAndUnit() {
    +            this.size = 0;
    +            this.unit = SizeUnit.B;
    +        }
    +
    +        /**
    +         * Constructor.
    +         *
    +         * @param size size
    +         * @param unit unit
    +         */
    +        public SizeAndUnit(long size, SizeUnit unit) {
    +            this.size = size;
    +            this.unit = unit;
    +        }
    +
    +        /**
    +         * Returns size.
    +         *
    +         * @return size
    +         */
    +        public long getSize() {
    +            return this.size;
    +        }
    +
    +        /**
    +         * Returns unit (Byte/KB/MB/etc.).
    +         *
    +         * @return unit
    +         */
    +        public SizeUnit getUnit() {
    +            return this.unit;
    +        }
    +
    +        @Override
    +        public String toString() {
    +            return size + "" + unit;
    +        }
    +    }
    +
    +    /**
    +     * Constructs an FragmentsStats.
    +     *
    +     * @param fragmentsNumber number of fragments
    +     * @param firstFragmentSize first fragment size (in bytes)
    +     * @param totalSize total size (in bytes)
    +     */
    +    public FragmentsStats(long fragmentsNumber, long firstFragmentSize,
    +                          long totalSize) {
    +        this.setFragmentsNumber(fragmentsNumber);
    +        this.setFirstFragmentSize(firstFragmentSize);
    +        this.setTotalSize(totalSize);
    +    }
    +
    +    /**
    +     * Given a {@link FragmentsStats}, serialize it in JSON to be used as 
the
    +     * result string for HAWQ. An example result is as follows:
    +     * <code>{"PXFFragmentsStats":{"fragmentsNumber"
    +     * :3,"firstFragmentSize":67108864,"totalSize":200000000}}</code>
    +     *
    +     * @param stats the data to be serialized
    +     * @return the result in json format
    +     * @throws IOException if converting to JSON format failed
    +     */
    +    public static String dataToJSON(FragmentsStats stats) throws 
IOException {
    +        ObjectMapper mapper = new ObjectMapper();
    +        // mapper serializes all members of the class by default
    +        return "{\"PXFFragmentsStats\":" + mapper.writeValueAsString(stats)
    +                + "}";
    +    }
    +
    +    /**
    +     * Given a stats structure, convert it to be readable. Intended for
    +     * debugging purposes only.
    +     *
    +     * @param stats the data to be stringify
    +     * @param datapath the data path part of the original URI (e.g., table 
name,
    +     *            *.csv, etc.)
    +     * @return the stringified data
    +     */
    +    public static String dataToString(FragmentsStats stats, String 
datapath) {
    +        return "Statistics information for \"" + datapath + "\" "
    +                + " Number of Fragments: " + stats.fragmentsNumber
    +                + ", first Fragment size: " + stats.firstFragmentSize
    +                + ", total size: " + stats.totalSize;
    +    }
    +
    +    /**
    +     * Returns number of fragments for a given data source.
    +     *
    +     * @return number of fragments
    +     */
    +    public long getFragmentsNumber() {
    +        return fragmentsNumber;
    +    }
    +
    +    private void setFragmentsNumber(long fragmentsNumber) {
    +        this.fragmentsNumber = fragmentsNumber;
    +    }
    +
    +    /**
    +     * Returns the size in bytes of the first fragment.
    +     *
    +     * @return first fragment size (in byte)
    +     */
    +    public SizeAndUnit getFirstFragmentSize() {
    +        return firstFragmentSize;
    +    }
    +
    +    private void setFirstFragmentSize(long firstFragmentSize) {
    +        this.firstFragmentSize = setSizeAndUnit(firstFragmentSize);
    +    }
    +
    +    /**
    +     * Returns the total size of a given source. Usually it means the
    +     * aggregation of all its fragments size.
    +     *
    +     * @return total size
    +     */
    +    public SizeAndUnit getTotalSize() {
    +        return totalSize;
    +    }
    +
    +    private void setTotalSize(long totalSize) {
    +        this.totalSize = setSizeAndUnit(totalSize);
    +    }
    +
    +    private SizeAndUnit setSizeAndUnit(long originalSize) {
    +        final long THRESHOLD = Integer.MAX_VALUE / 2;
    +        int orderOfMagnitude = 0;
    +        SizeAndUnit sizeAndUnit = new SizeAndUnit();
    +        sizeAndUnit.size = originalSize;
    +
    +        while (sizeAndUnit.size > THRESHOLD) {
    +            sizeAndUnit.size /= 1024;
    +            orderOfMagnitude++;
    +        }
    +
    +        sizeAndUnit.unit = getSizeUnit(orderOfMagnitude);
    +        return sizeAndUnit;
    +    }
    +
    +    private SizeUnit getSizeUnit(int orderOfMagnitude) {
    +        switch (orderOfMagnitude) {
    --- End diff --
    
    missing break for all the cases


> Advanced statistics for PXF tables
> ----------------------------------
>
>                 Key: HAWQ-44
>                 URL: https://issues.apache.org/jira/browse/HAWQ-44
>             Project: Apache HAWQ
>          Issue Type: New Feature
>          Components: PXF
>            Reporter: Noa Horn
>            Assignee: Noa Horn
>              Labels: Performance
>
> PXF will get full statistics on a table using sampling.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to