[
https://issues.apache.org/jira/browse/HAWQ-44?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15002773#comment-15002773
]
ASF GitHub Bot commented on HAWQ-44:
------------------------------------
Github user shivzone commented on a diff in the pull request:
https://github.com/apache/incubator-hawq/pull/92#discussion_r44705317
--- Diff:
pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FragmentsStats.java ---
@@ -0,0 +1,226 @@
+package org.apache.hawq.pxf.api;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.codehaus.jackson.map.ObjectMapper;
+
+import java.io.IOException;
+
+/**
+ * FragmentsStats holds statistics for a given path.
+ */
+public class FragmentsStats {
+
+ /**
+ * Default fragment size. Assuming a fragment is equivalent to a block
in
+ * HDFS, we guess a full fragment size is 64MB.
+ */
+ public static final long DEFAULT_FRAGMENT_SIZE = 67108864L;
+
+ private static Log Log = LogFactory.getLog(FragmentsStats.class);
+
+ // number of fragments
+ private long fragmentsNumber;
+ // first fragment size
+ private SizeAndUnit firstFragmentSize;
+ // total fragments size
+ private SizeAndUnit totalSize;
+
+ /**
+ * Enum to represent unit (Bytes/MB/GB/TB)
+ */
+ public enum SizeUnit {
+ /**
+ * Byte
+ */
+ B,
+ /**
+ * KB
+ */
+ KB,
+ /**
+ * MB
+ */
+ MB,
+ /**
+ * GB
+ */
+ GB,
+ /**
+ * TB
+ */
+ TB;
+ };
+
+ /**
+ * Container for size and unit
+ */
+ public class SizeAndUnit {
+ long size;
+ SizeUnit unit;
+
+ /**
+ * Default constructor.
+ */
+ public SizeAndUnit() {
+ this.size = 0;
+ this.unit = SizeUnit.B;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param size size
+ * @param unit unit
+ */
+ public SizeAndUnit(long size, SizeUnit unit) {
+ this.size = size;
+ this.unit = unit;
+ }
+
+ /**
+ * Returns size.
+ *
+ * @return size
+ */
+ public long getSize() {
+ return this.size;
+ }
+
+ /**
+ * Returns unit (Byte/KB/MB/etc.).
+ *
+ * @return unit
+ */
+ public SizeUnit getUnit() {
+ return this.unit;
+ }
+
+ @Override
+ public String toString() {
+ return size + "" + unit;
+ }
+ }
+
+ /**
+ * Constructs an FragmentsStats.
+ *
+ * @param fragmentsNumber number of fragments
+ * @param firstFragmentSize first fragment size (in bytes)
+ * @param totalSize total size (in bytes)
+ */
+ public FragmentsStats(long fragmentsNumber, long firstFragmentSize,
+ long totalSize) {
+ this.setFragmentsNumber(fragmentsNumber);
+ this.setFirstFragmentSize(firstFragmentSize);
+ this.setTotalSize(totalSize);
+ }
+
+ /**
+ * Given a {@link FragmentsStats}, serialize it in JSON to be used as
the
+ * result string for HAWQ. An example result is as follows:
+ * <code>{"PXFFragmentsStats":{"fragmentsNumber"
+ * :3,"firstFragmentSize":67108864,"totalSize":200000000}}</code>
+ *
+ * @param stats the data to be serialized
+ * @return the result in json format
+ * @throws IOException if converting to JSON format failed
+ */
+ public static String dataToJSON(FragmentsStats stats) throws
IOException {
+ ObjectMapper mapper = new ObjectMapper();
+ // mapper serializes all members of the class by default
+ return "{\"PXFFragmentsStats\":" + mapper.writeValueAsString(stats)
+ + "}";
+ }
+
+ /**
+ * Given a stats structure, convert it to be readable. Intended for
+ * debugging purposes only.
+ *
+ * @param stats the data to be stringify
+ * @param datapath the data path part of the original URI (e.g., table
name,
+ * *.csv, etc.)
+ * @return the stringified data
+ */
+ public static String dataToString(FragmentsStats stats, String
datapath) {
+ return "Statistics information for \"" + datapath + "\" "
+ + " Number of Fragments: " + stats.fragmentsNumber
+ + ", first Fragment size: " + stats.firstFragmentSize
+ + ", total size: " + stats.totalSize;
+ }
+
+ /**
+ * Returns number of fragments for a given data source.
+ *
+ * @return number of fragments
+ */
+ public long getFragmentsNumber() {
+ return fragmentsNumber;
+ }
+
+ private void setFragmentsNumber(long fragmentsNumber) {
+ this.fragmentsNumber = fragmentsNumber;
+ }
+
+ /**
+ * Returns the size in bytes of the first fragment.
+ *
+ * @return first fragment size (in byte)
+ */
+ public SizeAndUnit getFirstFragmentSize() {
+ return firstFragmentSize;
+ }
+
+ private void setFirstFragmentSize(long firstFragmentSize) {
+ this.firstFragmentSize = setSizeAndUnit(firstFragmentSize);
+ }
+
+ /**
+ * Returns the total size of a given source. Usually it means the
+ * aggregation of all its fragments size.
+ *
+ * @return total size
+ */
+ public SizeAndUnit getTotalSize() {
+ return totalSize;
+ }
+
+ private void setTotalSize(long totalSize) {
+ this.totalSize = setSizeAndUnit(totalSize);
+ }
+
+ private SizeAndUnit setSizeAndUnit(long originalSize) {
+ final long THRESHOLD = Integer.MAX_VALUE / 2;
+ int orderOfMagnitude = 0;
+ SizeAndUnit sizeAndUnit = new SizeAndUnit();
+ sizeAndUnit.size = originalSize;
+
+ while (sizeAndUnit.size > THRESHOLD) {
+ sizeAndUnit.size /= 1024;
+ orderOfMagnitude++;
+ }
+
+ sizeAndUnit.unit = getSizeUnit(orderOfMagnitude);
+ return sizeAndUnit;
+ }
+
+ private SizeUnit getSizeUnit(int orderOfMagnitude) {
+ switch (orderOfMagnitude) {
+ case 0:
+ return SizeUnit.B;
--- End diff --
no break statement
> Advanced statistics for PXF tables
> ----------------------------------
>
> Key: HAWQ-44
> URL: https://issues.apache.org/jira/browse/HAWQ-44
> Project: Apache HAWQ
> Issue Type: New Feature
> Components: PXF
> Reporter: Noa Horn
> Assignee: Noa Horn
> Labels: Performance
>
> PXF will get full statistics on a table using sampling.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)