Hello, It seems that metadata is not propagating when using Dataset.map(). Is there a workaround?
Below are the steps to reproduce: import spark.implicits._ val columnName = "col1" val meta = new MetadataBuilder().putString("foo", "bar").build() val schema = StructType(Array(StructField(columnName, DoubleType, true, metadata = meta))) def printSchema(d: Dataset[_]) = { d.printSchema() d.schema.fields.foreach(field => println("metadata for '" + field.name + "': " + field.metadata.json)) } val rows = spark.sparkContext.parallelize(Seq(1.0, 5.0, 3.0, 2.0, 6.0, null).map(Row(_))) val df = spark.createDataFrame(rows, schema) printSchema(df) // metadata printed correctly printSchema(df.select(columnName)) // metadata printed correctly printSchema(df.map(r => r.getDouble(0))) // metadata is missing Thank you, -Matthew