[ https://issues.apache.org/jira/browse/HADOOP-3784?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12635863#action_12635863 ]
Pete Wyckoff commented on HADOOP-3784: -------------------------------------- This is my cleaned up version of dfs_read. I renamed the variables to be more clear (craig may not like it :)). {code} static int dfs_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { // retrieve dfs specific data dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; // check params and the context var assert(dfs); assert(path); assert(buf); assert(offset >= 0); assert(size >= 0); dfs_fh *fh = (dfs_fh*)fi->fh; if (size > dfs->rdbuffer_size && ! dfs->direct_io) { if (fh->buf != NULL) { free(fh->buf); } if ((fh->buf = (char*)malloc(size * sizeof (char))) == NULL) { syslog(LOG_ERR, "ERROR: could not allocate memory for file buffer for a read for file %s dfs %s:%d\n", path,__FILE__, __LINE__); return -EIO; } fh->bufferSize = 0; } assert(fh->bufferSize >= 0); // check if the buffer is empty or // the read starts before the buffer starts or // the read ends after the buffer ends if (fh->bufferSize == 0 || offset < fh->buffersStartOffset || offset + size > fh->buffersStartOffset + fh->bufferSize) { // Read into the buffer from DFS assert(dfs->rdbuffer_size > 0); size_t num_read = 0; off_t tmp_offset = offset; size_t cur_left = dfs->rdbuffer_size; char *cur_ptr = fh->buf; while(cur_left > 0 && (num_read = hdfsPread(fh->fs, fh->hdfsFH, tmp_offset, cur_ptr, cur_left)) > 0) { cur_ptr += num_read; cur_left -= num_read; } if (num_read < 0) { syslog(LOG_ERR, "Read error - pread failed for %s with return code %d %s:%d", path, (int)num_read, __FILE__, __LINE__); return -EIO; } fh->bufferSize = dfs->rdbuffer_size - cur_left; fh->buffersStartOffset = offset; } assert(offset >= fh->buffersStartOffset && offset + size < fh->buffersStartOffset + fh->bufferSize); const size_t bufferReadIndex = offset - fh->buffersStartOffset; assert(bufferReadIndex >= 0 && bufferReadIndex < fh->bufferSize); const size_t amount = min(fh->buffersStartOffset + fh->bufferSize - offset, size); assert(amount >= 0 && amount <= fh->bufferSize); const char *offsetPtr = fh->buf + bufferReadIndex; assert(offsetPtr >= fh->buf); assert(offsetPtr + amount <= fh->buf + fh->bufferSize); memcpy(buf, offsetPtr, amount); return amount; } {code} > Cleanup optimization of reads and change it to a flag and remove #ifdefs > ------------------------------------------------------------------------ > > Key: HADOOP-3784 > URL: https://issues.apache.org/jira/browse/HADOOP-3784 > Project: Hadoop Core > Issue Type: Improvement > Components: contrib/fuse-dfs > Reporter: Pete Wyckoff > > Looks like optimized reads work so let's make them part of the regular core > of code. But, should allow a flag and custom sized buffer. -- This message is automatically generated by JIRA. - You can reply to this email to add a comment to the issue online.