Kungu, Can you please open a jira and attach this patch there?
More details here: http://wiki.apache.org/hadoop/HowToContribute thanks! Arun On Feb 20, 2012, at 1:23 PM, 坤谷 wrote: > Hi experts, > > I have a question on IFileOutputStream. > I guess when writing key/value length (1 or 2bytes)to IFile, this function is > inefficient: > public void write(byte[] b, int off, int len) throws IOException { > sum.update(b, off,len); > out.write(b,off,len); > } > I make the following changes, and see some improve under certain circumstance. > Waiting for your professional answer. > > Regards, > Kungu > > > > @@ -36,43 +36,128 @@ > private final DataChecksum sum; > private byte[] barray; > private boolean closed = false; > + > + /** > + * The internal buffer where data is stored. > + */ > + protected byte buf[]; > > /** > - * Create a checksum output stream that writes > - * the bytes to the given stream. > - * @param out > + * The number of valid bytes in the buffer. This value is always > + * in the range <tt>0</tt> through <tt>buf.length</tt>; elements > + * <tt>buf[0]</tt> through <tt>buf[count-1]</tt> contain valid > + * byte data. > */ > + protected int count; > + > + /** > + * Creates a new checksum buffered output stream to write data to > + * the specified underlying output stream. > + * > + * @param out the underlying output stream. > + */ > public IFileOutputStream(OutputStream out) { > - super(out); > - sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, > - Integer.MAX_VALUE); > - barray = new byte[sum.getChecksumSize()]; > + this(out, 8192); > } > - > + > + /** > + * Creates a new buffered output stream to write data to the > + * specified underlying output stream with the specified buffer > + * size. > + * > + * @param out the underlying output stream. > + * @param size the buffer size. > + * @exception IllegalArgumentException if size <= 0. > + */ > + public IFileOutputStream(OutputStream out, int size) { > + super(out); > + if (size <= 0) { > + throw new IllegalArgumentException("Buffer size <= 0"); > + } > + buf = new byte[size]; > + > + sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, > + Integer.MAX_VALUE); > + barray = new byte[sum.getChecksumSize()]; > + } > + > + /** Flush the internal buffer */ > + private void flushBuffer() throws IOException { > + if (count > 0) { > + sum.update(buf, 0, count); > + out.write(buf, 0, count); > + count = 0; > + } > + } > + > + /** > + * Writes the specified byte to this buffered output stream. > + * > + * @param b the byte to be written. > + * @exception IOException if an I/O error occurs. > + */ > + public void write(int b) throws IOException { > + if (count >= buf.length) { > + flushBuffer(); > + } > + buf[count++] = (byte)(b & 0xFF); > + } > + > + /** > + * Writes <code>len</code> bytes from the specified byte array > + * starting at offset <code>off</code> to this buffered output stream. > + * > + * <p> Ordinarily this method stores bytes from the given array into this > + * stream's buffer, flushing the buffer to the underlying output stream as > + * needed. If the requested length is at least as large as this stream's > + * buffer, however, then this method will flush the buffer and write the > + * bytes directly to the underlying output stream. Thus redundant > + * <code>BufferedOutputStream</code>s will not copy data unnecessarily. > + * > + * @param b the data. > + * @param off the start offset in the data. > + * @param len the number of bytes to write. > + * @exception IOException if an I/O error occurs. > + */ > + public void write(byte b[], int off, int len) throws IOException { > + if (len >= buf.length) { > + /* If the request length exceeds the size of the output buffer, > + flush the output buffer and then write the data directly. > + In this way buffered streams will cascade harmlessly. */ > + flushBuffer(); > + sum.update(b, off, len); > + out.write(b, off, len); > + return; > + } > + if (len > buf.length - count) { > + flushBuffer(); > + } > + System.arraycopy(b, off, buf, count, len); > + count += len; > + } > + > + /** > + * Flushes this buffered output stream. This forces any buffered > + * output bytes to be written out to the underlying output stream. > + * > + * @exception IOException if an I/O error occurs. > + * @see java.io.FilterOutputStream#out > + */ > + public void flush() throws IOException { > + flushBuffer(); > + out.flush(); > + } > + > @Override > public void close() throws IOException { > if (closed) { > return; > } > closed = true; > + flushBuffer(); > sum.writeValue(barray, 0, false); > out.write (barray, 0, sum.getChecksumSize()); > out.flush(); > } > - > - /** > - * Write bytes to the stream. > - */ > - @Override > - public void write(byte[] b, int off, int len) throws IOException { > - sum.update(b, off,len); > - out.write(b,off,len); > - } > - > - @Override > - public void write(int b) throws IOException { > - barray[0] = (byte) (b & 0xFF); > - write(barray,0,1); > - } > > } > > ________________________________ > > This email (including any attachments) is confidential and may be legally > privileged. If you received this email in error, please delete it immediately > and do not copy it or use it for any purpose or disclose its contents to any > other person. Thank you. > > 本电邮(包括任何附件)可能含有机密资料并受法律保护。如您不是正确的收件人,请您立即删除本邮件。请不要将本电邮进行复制并用作任何其他用途、或透露本邮件之内容。谢谢。 -- Arun C. Murthy Hortonworks Inc. http://hortonworks.com/