Hello!
What is the throughput of Hudi? I currently use spark to insert 10,000 records 
(300 bytes each), which takes one minute. Is it too slow?
my program:
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}

object HudiDataGen {
    def main(args: Array[String]): Unit = {
        import org.apache.hudi.DataSourceWriteOptions._
        import org.apache.hudi.QuickstartUtils._
        import org.apache.hudi.config.HoodieWriteConfig._
        import org.apache.spark.sql.SaveMode._

        import scala.collection.JavaConversions._

        //初始化
        val conf = new SparkConf().setAppName("HudiTest")
                .setMaster("local[*]")
        conf.set("spark.serializer", 
"org.apache.spark.serializer.KryoSerializer") //使用Kryo序列化库
        val sc = new SparkContext(conf)
        val spark = new SQLContext(sc)

        //设置表名、基本路径和数据生成器来为本指南生成记录。
        val tableName = Constant.tableName
        //        val basePath = Constant.hdfsPath
        val basePath = args(0)
        //        val basePath = "file:///e:/hudi_cow_table"
        val count = args(1)
        for (i <- 1 to count.toInt) {
            println("start:" + System.currentTimeMillis())
            val dataGen = new DataGenerator
            //生成一些新的行程样本,将其加载到DataFrame中,然后将DataFrame写入Hudi数据集中,如下所示。
            val inserts = convertToStringList(dataGen.generateInserts(10000))
            //        println("insert:"+System.currentTimeMillis())
            println("start insert:" + System.currentTimeMillis())
            val df = spark.read.json(spark.sparkContext.parallelize(inserts, 
32))
            df.write.format("org.apache.hudi").
                    options(getQuickstartWriteConfigs).
                    option(PRECOMBINE_FIELD_OPT_KEY, "ts").
                    option(RECORDKEY_FIELD_OPT_KEY, "uuid").
                    option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
                    option(TABLE_NAME, tableName).
                    //                option(STORAGE_TYPE_OPT_KEY, 
"MERGE_ON_READ").
                    mode(Append).
                    save(basePath);
            println("finish" + i + " " + System.currentTimeMillis())
        }

    }
}


[email protected]

Reply via email to