use hdfs

王斌斌 Mon, 14 May 2018 00:59:09 -0700

Hi, 
   I want the tmp models datas been sotred in HDFS, not the local /tmp. And I 
modified the code like this:
classALSModel(
override val rank: Int,
override val userFeatures: RDD[(Int, Array[Double])],
override val productFeatures: RDD[(Int, Array[Double])],
val userStringIntMap: BiMap[String, Int],
val itemStringIntMap: BiMap[String, Int])
extends MatrixFactorizationModel(rank, userFeatures, productFeatures)
with PersistentModel[ALSAlgorithmParams] {


def save(id: String, params: ALSAlgorithmParams,
    sc: SparkContext): Boolean = {

    
sc.parallelize(Seq(rank)).saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/rank")
    
userFeatures.saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/userFeatures")
    
productFeatures.saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/productFeatures")
    sc.parallelize(Seq(userStringIntMap))
      
.saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/userStringIntMap")
    sc.parallelize(Seq(itemStringIntMap))
      
.saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/itemStringIntMap")
true
}

override def toString = {
s"userFeatures: [${userFeatures.count()}]" +
s"(${userFeatures.take(2).toList}...)" +
s" productFeatures: [${productFeatures.count()}]" +
s"(${productFeatures.take(2).toList}...)" +
s" userStringIntMap: [${userStringIntMap.size}]" +
s"(${userStringIntMap.take(2)}...)" +
s" itemStringIntMap: [${itemStringIntMap.size}]" +
s"(${itemStringIntMap.take(2)}...)"
}
}

object ALSModel
extends PersistentModelLoader[ALSAlgorithmParams, ALSModel] {
def apply(id: String, params: ALSAlgorithmParams,
    sc: Option[SparkContext]) = {
new ALSModel(
      rank = 
sc.get.objectFile[Int](s"hdfs://predictionspark:9000/tmp/${id}/rank").first,
      userFeatures = 
sc.get.objectFile(s"hdfs://predictionspark:9000/tmp/${id}/userFeatures"),
      productFeatures = 
sc.get.objectFile(s"hdfs://predictionspark:9000/tmp/${id}/productFeatures"),
      userStringIntMap = sc.get
        .objectFile[BiMap[String, 
Int]](s"hdfs://predictionspark:9000/tmp/${id}/userStringIntMap").first,
      itemStringIntMap = sc.get
        .objectFile[BiMap[String, 
Int]](s"hdfs://predictionspark:9000/tmp/${id}/itemStringIntMap").first)
  }
}


It works.
But why the pio-env.sh says:
# HADOOP_CONF_DIR: You must configure this if you intend to run PredictionIO
#                  with Hadoop 2.
# HADOOP_CONF_DIR=/opt/hadoop


I don't do this, it also works. So someone can explain this? And what is 
HADOOP_CONF_DIR? All the configurations in the hadoop server's etc/ ?

use hdfs

Reply via email to