[
https://issues.apache.org/jira/browse/IMPALA-6744?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Tim Armstrong resolved IMPALA-6744.
-----------------------------------
Resolution: Duplicate
> Inline codegend_compare_fn_ to avoid per row memory loads and function call
> ---------------------------------------------------------------------------
>
> Key: IMPALA-6744
> URL: https://issues.apache.org/jira/browse/IMPALA-6744
> Project: IMPALA
> Issue Type: Bug
> Components: Backend
> Affects Versions: Impala 2.13.0
> Reporter: Mostafa Mokhtar
> Assignee: Tianyi Wang
> Priority: Major
> Attachments: percentile query profile.txt
>
>
> While evaluating Sort performance I noticed that the codegened compare
> function is not inlined which results in large overhead per row.
> Expected speedup is 10-15%
> {code}
> /// Returns a negative value if lhs is less than rhs, a positive value if
> lhs is
> /// greater than rhs, or 0 if they are equal. All exprs
> (ordering_exprs_lhs_ and
> /// ordering_exprs_rhs_) must have been prepared and opened before calling
> this,
> /// i.e. 'sort_key_exprs' in the constructor must have been opened.
> int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow* rhs) const {
> return codegend_compare_fn_ == NULL ?
> CompareInterpreted(lhs, rhs) :
> (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),
> ordering_expr_evals_rhs_.data(), lhs, rhs);
> }
> {code}
> From Perf
> {code}
> │ bool Sorter::TupleSorter::Less(const TupleRow* lhs, const
> TupleRow* rhs) {
>
> ▒
> 7.43 │ push %rbp
>
>
> ▒
> 3.23 │ mov %rsp,%rbp
>
>
> ▒
> 9.44 │ push %r12
>
>
> ▒
> 2.69 │ push %rbx
>
>
> ▒
> 3.89 │ mov %rsi,%r12
>
>
> ▒
> 2.98 │ mov %rdi,%rbx
>
>
> ▒
> 6.06 │ sub $0x10,%rsp
>
>
> ◆
> │ --num_comparisons_till_free_;
>
>
> ▒
> │ DCHECK_GE(num_comparisons_till_free_, 0);
>
>
> ▒
> │ if (UNLIKELY(num_comparisons_till_free_ == 0)) {
>
>
> ▒
> 3.75 │ subl $0x1,0x18(%rdi)
>
>
> ▒
> 9.42 │ ↓ je 58
>
>
> ▒
> │ parent_->expr_results_pool_.Clear();
>
>
> ▒
> │ num_comparisons_till_free_ = state_->batch_size();
>
>
> ▒
> │ }
>
>
> ▒
> │ return comparator_.Less(lhs, rhs);
>
>
> ▒
> 1.09 │17: mov 0x10(%rbx),%rdi
>
>
> ▒
> │ /// Returns a negative value if lhs is less than rhs, a
> positive value if lhs is
>
> ▒
> │ /// greater than rhs, or 0 if they are equal. All exprs
> (ordering_exprs_lhs_ and
>
> ▒
> │ /// ordering_exprs_rhs_) must have been prepared and opened
> before calling this,
>
> ▒
> │ /// i.e. 'sort_key_exprs' in the constructor must have been
> opened.
>
> ▒
> │ int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow*
> rhs) const {
>
> ▒
> │ return codegend_compare_fn_ == NULL ?
>
>
> ▒
> 2.69 │ mov 0x58(%rdi),%rax
>
>
> ▒
> │ CompareInterpreted(lhs, rhs) :
>
>
> ▒
> │ (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),
>
>
> ▒
> │ ordering_expr_evals_rhs_.data(), lhs, rhs);
>
>
> ▒
> 5.43 │ test %rax,%rax
>
>
> ▒
> │ ↓ je 40
>
>
> ▒
> 6.85 │ mov 0x20(%rdi),%rsi
>
>
> ▒
> 0.86 │ mov %rdx,%rcx
>
>
> ▒
> 2.55 │ mov 0x8(%rdi),%rdi
>
>
> ▒
> 3.38 │ mov %r12,%rdx
>
>
> ▒
> 6.10 │ → callq *(%rax)
>
>
> ▒
> │ }
>
>
> ▒
> 5.84 │ add $0x10,%rsp
>
>
> ▒
> │ /// All exprs (ordering_exprs_lhs_ and ordering_exprs_rhs_)
> must have been prepared
>
> ▒
> │ /// and opened before calling this.
>
>
> ▒
> │ /// Force inlining because it tends not to be always inlined at
> callsites, even in
>
> ▒
> │ /// hot loops.
>
>
> ▒
> │ bool ALWAYS_INLINE Less(const TupleRow* lhs, const TupleRow*
> rhs) const {
>
> ▒
> │ return Compare(lhs, rhs) < 0;
>
>
> ▒
> 1.77 │ shr $0x1f,%eax
>
>
> ▒
> 7.91 │ pop %rbx
>
>
> ▒
> 4.11 │ pop %r12
>
>
> ▒
> 0.49 │ pop %rbp
>
>
> ▒
> 1.75 │ ← retq
>
>
> ▒
> │ /// i.e. 'sort_key_exprs' in the constructor must have been
> opened.
>
> ▒
> │ int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow*
> rhs) const {
>
> ▒
> │ return codegend_compare_fn_ == NULL ?
>
>
> ▒
> │ CompareInterpreted(lhs, rhs) :
>
>
> ▒
> │ (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),
>
>
> ▒
> │ ordering_expr_evals_rhs_.data(), lhs, rhs);
>
>
> ▒
> │40: mov %r12,%rsi
>
>
> ▒
> │ → callq
> impala::TupleRowComparator::CompareInterpreted(impala::TupleRow const*,
> impala::TupleRow const*) const
> ▒
> │ add $0x10,%rsp
>
>
> ▒
> │ /// All exprs (ordering_exprs_lhs_ and ordering_exprs_rhs_)
> must have been prepared
>
> ▒
> Press 'h' for help on key bindings
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
