[
https://issues.apache.org/jira/browse/SPARK-17573?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jianfei Wang updated SPARK-17573:
---------------------------------
Description:
I find that there are many places in spark that we don't close the input/output
Streams manually, if so ,there will potential "OOM" errors and some other
errors
such as:
{code}
private[sql] def bytesToRow(bytes: Array[Byte], schema: StructType): Row = {
val bis = new ByteArrayInputStream(bytes)
val dis = new DataInputStream(bis)
val num = SerDe.readInt(dis)
Row.fromSeq((0 until num).map { i =>
doConversion(SerDe.readObject(dis), schema.fields(i).dataType)
})
}
private[sql] def rowToRBytes(row: Row): Array[Byte] = {
val bos = new ByteArrayOutputStream()
val dos = new DataOutputStream(bos)
val cols = (0 until row.length).map(row(_).asInstanceOf[Object]).toArray
SerDe.writeObject(dos, cols)
bos.toByteArray()
}
override def deserialize(storageFormat: Array[Byte]): MaxValue = {
val in = new ByteArrayInputStream(storageFormat)
val stream = new DataInputStream(in)
val isValueSet = stream.readBoolean()
val value = stream.readInt()
new MaxValue(value, isValueSet)
}
{code}
was:
I found that there are many places in spark that we don't close the
input/output Streams manually, if so ,there will potential "OOM" errors and
some other errors
such as:
{code}
private[sql] def bytesToRow(bytes: Array[Byte], schema: StructType): Row = {
val bis = new ByteArrayInputStream(bytes)
val dis = new DataInputStream(bis)
val num = SerDe.readInt(dis)
Row.fromSeq((0 until num).map { i =>
doConversion(SerDe.readObject(dis), schema.fields(i).dataType)
})
}
private[sql] def rowToRBytes(row: Row): Array[Byte] = {
val bos = new ByteArrayOutputStream()
val dos = new DataOutputStream(bos)
val cols = (0 until row.length).map(row(_).asInstanceOf[Object]).toArray
SerDe.writeObject(dos, cols)
bos.toByteArray()
}
override def deserialize(storageFormat: Array[Byte]): MaxValue = {
val in = new ByteArrayInputStream(storageFormat)
val stream = new DataInputStream(in)
val isValueSet = stream.readBoolean()
val value = stream.readInt()
new MaxValue(value, isValueSet)
}
{code}
> Why don't we close the input/output Streams
> -------------------------------------------
>
> Key: SPARK-17573
> URL: https://issues.apache.org/jira/browse/SPARK-17573
> Project: Spark
> Issue Type: Improvement
> Components: SQL
> Affects Versions: 2.0.0
> Reporter: Jianfei Wang
> Labels: performance
>
> I find that there are many places in spark that we don't close the
> input/output Streams manually, if so ,there will potential "OOM" errors and
> some other errors
> such as:
> {code}
> private[sql] def bytesToRow(bytes: Array[Byte], schema: StructType): Row = {
> val bis = new ByteArrayInputStream(bytes)
> val dis = new DataInputStream(bis)
> val num = SerDe.readInt(dis)
> Row.fromSeq((0 until num).map { i =>
> doConversion(SerDe.readObject(dis), schema.fields(i).dataType)
> })
> }
> private[sql] def rowToRBytes(row: Row): Array[Byte] = {
> val bos = new ByteArrayOutputStream()
> val dos = new DataOutputStream(bos)
> val cols = (0 until row.length).map(row(_).asInstanceOf[Object]).toArray
> SerDe.writeObject(dos, cols)
> bos.toByteArray()
> }
> override def deserialize(storageFormat: Array[Byte]): MaxValue = {
> val in = new ByteArrayInputStream(storageFormat)
> val stream = new DataInputStream(in)
> val isValueSet = stream.readBoolean()
> val value = stream.readInt()
> new MaxValue(value, isValueSet)
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]