Veena Basavaraj created SQOOP-2010:
--------------------------------------
Summary: Matching is invoked on every record ( row ) we write, is
not this super expensive?
Key: SQOOP-2010
URL: https://issues.apache.org/jira/browse/SQOOP-2010
Project: Sqoop
Issue Type: Sub-task
Reporter: Veena Basavaraj
{code}
@Override
public void writeArrayRecord(Object[] array) {
fromIDF.setObjectData(array);
writeContent();
}
@Override
public void writeStringRecord(String text) {
fromIDF.setCSVTextData(text);
writeContent();
}
@Override
public void writeRecord(Object obj) {
fromIDF.setData(obj);
writeContent();
}
private void writeContent() {
try {
if (LOG.isDebugEnabled()) {
LOG.debug("Extracted data: " + fromIDF.getCSVTextData());
}
// NOTE: The fromIDF and the corresponding fromSchema is used only for
the matching process
// The output of the mappers is finally written to the toIDF object
after the matching process
// since the writable encapsulates the toIDF ==> new
SqoopWritable(toIDF)
toIDF.setObjectData(matcher.getMatchingData(fromIDF.getObjectData()));
// NOTE: We do not use the reducer to do the writing (a.k.a LOAD in
ETL). Hence the mapper sets up the writable
context.write(writable, NullWritable.get());
} catch (Exception e) {
throw new SqoopException(MRExecutionError.MAPRED_EXEC_0013, e);
}
}
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)