spark git commit: [SPARK-10863][SPARKR] Method coltypes() (New version)

2015-11-10 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 7c4ade0d7 -> d2405cb5e


[SPARK-10863][SPARKR] Method coltypes() (New version)

This is a follow up on PR #8984, as the corresponding branch for such PR was 
damaged.

Author: Oscar D. Lara Yejas 

Closes #9579 from olarayej/SPARK-10863_NEW14.

(cherry picked from commit 47735cdc2a878cfdbe76316d3ff8314a45dabf54)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2405cb5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2405cb5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2405cb5

Branch: refs/heads/branch-1.6
Commit: d2405cb5e1e219ca38ed5debf360191df84dd94b
Parents: 7c4ade0
Author: Oscar D. Lara Yejas 
Authored: Tue Nov 10 11:07:57 2015 -0800
Committer: Shivaram Venkataraman 
Committed: Tue Nov 10 11:08:08 2015 -0800

--
 R/pkg/DESCRIPTION|  1 +
 R/pkg/NAMESPACE  |  6 ++---
 R/pkg/R/DataFrame.R  | 49 +++
 R/pkg/R/generics.R   |  4 +++
 R/pkg/R/schema.R | 15 +--
 R/pkg/R/types.R  | 43 ++
 R/pkg/inst/tests/test_sparkSQL.R | 24 -
 7 files changed, 124 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d2405cb5/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 3d6edb7..369714f 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -34,4 +34,5 @@ Collate:
 'serialize.R'
 'sparkR.R'
 'stats.R'
+'types.R'
 'utils.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/d2405cb5/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 56b8ed0..52fd6c9 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -23,9 +23,11 @@ export("setJobGroup",
 exportClasses("DataFrame")
 
 exportMethods("arrange",
+  "as.data.frame",
   "attach",
   "cache",
   "collect",
+  "coltypes",
   "columns",
   "count",
   "cov",
@@ -262,6 +264,4 @@ export("structField",
"structType",
"structType.jobj",
"structType.structField",
-   "print.structType")
-
-export("as.data.frame")
+   "print.structType")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/d2405cb5/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e9013aa..cc86806 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2152,3 +2152,52 @@ setMethod("with",
 newEnv <- assignNewEnv(data)
 eval(substitute(expr), envir = newEnv, enclos = newEnv)
   })
+
+#' Returns the column types of a DataFrame.
+#' 
+#' @name coltypes
+#' @title Get column types of a DataFrame
+#' @family dataframe_funcs
+#' @param x (DataFrame)
+#' @return value (character) A character vector with the column types of the 
given DataFrame
+#' @rdname coltypes
+#' @examples \dontrun{
+#' irisDF <- createDataFrame(sqlContext, iris)
+#' coltypes(irisDF)
+#' }
+setMethod("coltypes",
+  signature(x = "DataFrame"),
+  function(x) {
+# Get the data types of the DataFrame by invoking dtypes() function
+types <- sapply(dtypes(x), function(x) {x[[2]]})
+
+# Map Spark data types into R's data types using DATA_TYPES 
environment
+rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
+
+  # Check for primitive types
+  type <- PRIMITIVE_TYPES[[x]]
+
+  if (is.null(type)) {
+# Check for complex types
+for (t in names(COMPLEX_TYPES)) {
+  if (substring(x, 1, nchar(t)) == t) {
+type <- COMPLEX_TYPES[[t]]
+break
+  }
+}
+
+if (is.null(type)) {
+  stop(paste("Unsupported data type: ", x))
+}
+  }
+  type
+})
+
+# Find which types don't have mapping to R
+naIndices <- which(is.na(rTypes))
+
+# Assign the original scala data types to the unmatched ones
+rTypes[naIndices] <- types[naIndices]
+
+rTypes
+  })
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/d2405cb5/R/pkg/R/generics.R

spark git commit: [SPARK-10863][SPARKR] Method coltypes() (New version)

2015-11-10 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master e0701c756 -> 47735cdc2


[SPARK-10863][SPARKR] Method coltypes() (New version)

This is a follow up on PR #8984, as the corresponding branch for such PR was 
damaged.

Author: Oscar D. Lara Yejas 

Closes #9579 from olarayej/SPARK-10863_NEW14.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47735cdc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47735cdc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47735cdc

Branch: refs/heads/master
Commit: 47735cdc2a878cfdbe76316d3ff8314a45dabf54
Parents: e0701c7
Author: Oscar D. Lara Yejas 
Authored: Tue Nov 10 11:07:57 2015 -0800
Committer: Shivaram Venkataraman 
Committed: Tue Nov 10 11:07:57 2015 -0800

--
 R/pkg/DESCRIPTION|  1 +
 R/pkg/NAMESPACE  |  6 ++---
 R/pkg/R/DataFrame.R  | 49 +++
 R/pkg/R/generics.R   |  4 +++
 R/pkg/R/schema.R | 15 +--
 R/pkg/R/types.R  | 43 ++
 R/pkg/inst/tests/test_sparkSQL.R | 24 -
 7 files changed, 124 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/47735cdc/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 3d6edb7..369714f 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -34,4 +34,5 @@ Collate:
 'serialize.R'
 'sparkR.R'
 'stats.R'
+'types.R'
 'utils.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/47735cdc/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 56b8ed0..52fd6c9 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -23,9 +23,11 @@ export("setJobGroup",
 exportClasses("DataFrame")
 
 exportMethods("arrange",
+  "as.data.frame",
   "attach",
   "cache",
   "collect",
+  "coltypes",
   "columns",
   "count",
   "cov",
@@ -262,6 +264,4 @@ export("structField",
"structType",
"structType.jobj",
"structType.structField",
-   "print.structType")
-
-export("as.data.frame")
+   "print.structType")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/47735cdc/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e9013aa..cc86806 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2152,3 +2152,52 @@ setMethod("with",
 newEnv <- assignNewEnv(data)
 eval(substitute(expr), envir = newEnv, enclos = newEnv)
   })
+
+#' Returns the column types of a DataFrame.
+#' 
+#' @name coltypes
+#' @title Get column types of a DataFrame
+#' @family dataframe_funcs
+#' @param x (DataFrame)
+#' @return value (character) A character vector with the column types of the 
given DataFrame
+#' @rdname coltypes
+#' @examples \dontrun{
+#' irisDF <- createDataFrame(sqlContext, iris)
+#' coltypes(irisDF)
+#' }
+setMethod("coltypes",
+  signature(x = "DataFrame"),
+  function(x) {
+# Get the data types of the DataFrame by invoking dtypes() function
+types <- sapply(dtypes(x), function(x) {x[[2]]})
+
+# Map Spark data types into R's data types using DATA_TYPES 
environment
+rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
+
+  # Check for primitive types
+  type <- PRIMITIVE_TYPES[[x]]
+
+  if (is.null(type)) {
+# Check for complex types
+for (t in names(COMPLEX_TYPES)) {
+  if (substring(x, 1, nchar(t)) == t) {
+type <- COMPLEX_TYPES[[t]]
+break
+  }
+}
+
+if (is.null(type)) {
+  stop(paste("Unsupported data type: ", x))
+}
+  }
+  type
+})
+
+# Find which types don't have mapping to R
+naIndices <- which(is.na(rTypes))
+
+# Assign the original scala data types to the unmatched ones
+rTypes[naIndices] <- types[naIndices]
+
+rTypes
+  })
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/47735cdc/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index efef7d6..89731af 100644
--- a/R/pkg/R/generics.R
+++