Veena Basavaraj created SQOOP-2010:
--------------------------------------

             Summary: Matching is invoked on every record ( row ) we write, is 
not this super expensive?
                 Key: SQOOP-2010
                 URL: https://issues.apache.org/jira/browse/SQOOP-2010
             Project: Sqoop
          Issue Type: Sub-task
            Reporter: Veena Basavaraj


{code}
    @Override
    public void writeArrayRecord(Object[] array) {
      fromIDF.setObjectData(array);
      writeContent();
    }

    @Override
    public void writeStringRecord(String text) {
      fromIDF.setCSVTextData(text);
      writeContent();
    }

    @Override
    public void writeRecord(Object obj) {
      fromIDF.setData(obj);
      writeContent();
    }

    private void writeContent() {
      try {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Extracted data: " + fromIDF.getCSVTextData());
        }
        // NOTE: The fromIDF and the corresponding fromSchema is used only for 
the matching process
        // The output of the mappers is finally written to the toIDF object 
after the matching process
        // since the writable encapsulates the toIDF ==> new 
SqoopWritable(toIDF)
        toIDF.setObjectData(matcher.getMatchingData(fromIDF.getObjectData()));
        // NOTE: We do not use the reducer to do the writing (a.k.a LOAD in 
ETL). Hence the mapper sets up the writable
        context.write(writable, NullWritable.get());
      } catch (Exception e) {
        throw new SqoopException(MRExecutionError.MAPRED_EXEC_0013, e);
      }
    }

{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to