[ https://issues.apache.org/jira/browse/STORM-1220?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15021154#comment-15021154 ]
ASF GitHub Bot commented on STORM-1220: --------------------------------------- Github user arunmahadevan commented on a diff in the pull request: https://github.com/apache/storm/pull/894#discussion_r45563324 --- Diff: external/storm-kafka/src/jvm/storm/kafka/StringScheme.java --- @@ -20,23 +20,27 @@ import backtype.storm.spout.Scheme; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; +import backtype.storm.utils.Utils; -import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.List; public class StringScheme implements Scheme { - + private static final Charset UTF8_CHARSET = StandardCharsets.UTF_8; public static final String STRING_SCHEME_KEY = "str"; - public List<Object> deserialize(byte[] bytes) { + public List<Object> deserialize(ByteBuffer bytes) { return new Values(deserializeString(bytes)); } - public static String deserializeString(byte[] string) { - try { - return new String(string, "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); + public static String deserializeString(ByteBuffer string) { + if (!string.hasArray()) { --- End diff -- I think this should be `if (string.hasArray())` > Avoid double copying in the Kafka spout > --------------------------------------- > > Key: STORM-1220 > URL: https://issues.apache.org/jira/browse/STORM-1220 > Project: Apache Storm > Issue Type: Bug > Reporter: Haohui Mai > Assignee: Haohui Mai > > Currently the kafka spout takes a {{ByteBuffer}} from Kafka. However, the > serialization scheme takes a {{byte[]}} array as input. Therefore the current > implementation copies the {{ByteBuffer}} to a new {{byte[]}} array in order > to hook everything together. > This jira proposes to changes the interfaces of serialization scheme to avoid > copying the data twice in the spout. -- This message was sent by Atlassian JIRA (v6.3.4#6332)