Mostafa Mokhtar created IMPALA-6744: ---------------------------------------
Summary: Inline codegend_compare_fn_ to avoid per row memory loads
and function call
Key: IMPALA-6744
URL: https://issues.apache.org/jira/browse/IMPALA-6744
Project: IMPALA
Issue Type: Bug
Components: Backend
Affects Versions: Impala 2.13.0
Reporter: Mostafa Mokhtar
Assignee: Tianyi Wang
Attachments: percentile query profile.txt
While evaluating Sort performance I noticed that the codegened compare function
is not inlined which results in large overhead per row.
Expected speedup is 10-15%
{code}
/// Returns a negative value if lhs is less than rhs, a positive value if lhs
is
/// greater than rhs, or 0 if they are equal. All exprs (ordering_exprs_lhs_
and
/// ordering_exprs_rhs_) must have been prepared and opened before calling
this,
/// i.e. 'sort_key_exprs' in the constructor must have been opened.
int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow* rhs) const {
return codegend_compare_fn_ == NULL ?
CompareInterpreted(lhs, rhs) :
(*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),
ordering_expr_evals_rhs_.data(), lhs, rhs);
}
{code}
>From Perf
{code}
│ bool Sorter::TupleSorter::Less(const TupleRow* lhs, const TupleRow*
rhs) {
▒
7.43 │ push %rbp
▒
3.23 │ mov %rsp,%rbp
▒
9.44 │ push %r12
▒
2.69 │ push %rbx
▒
3.89 │ mov %rsi,%r12
▒
2.98 │ mov %rdi,%rbx
▒
6.06 │ sub $0x10,%rsp
◆
│ --num_comparisons_till_free_;
▒
│ DCHECK_GE(num_comparisons_till_free_, 0);
▒
│ if (UNLIKELY(num_comparisons_till_free_ == 0)) {
▒
3.75 │ subl $0x1,0x18(%rdi)
▒
9.42 │ ↓ je 58
▒
│ parent_->expr_results_pool_.Clear();
▒
│ num_comparisons_till_free_ = state_->batch_size();
▒
│ }
▒
│ return comparator_.Less(lhs, rhs);
▒
1.09 │17: mov 0x10(%rbx),%rdi
▒
│ /// Returns a negative value if lhs is less than rhs, a positive
value if lhs is
▒
│ /// greater than rhs, or 0 if they are equal. All exprs
(ordering_exprs_lhs_ and
▒
│ /// ordering_exprs_rhs_) must have been prepared and opened
before calling this,
▒
│ /// i.e. 'sort_key_exprs' in the constructor must have been
opened.
▒
│ int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow*
rhs) const {
▒
│ return codegend_compare_fn_ == NULL ?
▒
2.69 │ mov 0x58(%rdi),%rax
▒
│ CompareInterpreted(lhs, rhs) :
▒
│ (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),
▒
│ ordering_expr_evals_rhs_.data(), lhs, rhs);
▒
5.43 │ test %rax,%rax
▒
│ ↓ je 40
▒
6.85 │ mov 0x20(%rdi),%rsi
▒
0.86 │ mov %rdx,%rcx
▒
2.55 │ mov 0x8(%rdi),%rdi
▒
3.38 │ mov %r12,%rdx
▒
6.10 │ → callq *(%rax)
▒
│ }
▒
5.84 │ add $0x10,%rsp
▒
│ /// All exprs (ordering_exprs_lhs_ and ordering_exprs_rhs_) must
have been prepared
▒
│ /// and opened before calling this.
▒
│ /// Force inlining because it tends not to be always inlined at
callsites, even in
▒
│ /// hot loops.
▒
│ bool ALWAYS_INLINE Less(const TupleRow* lhs, const TupleRow* rhs)
const {
▒
│ return Compare(lhs, rhs) < 0;
▒
1.77 │ shr $0x1f,%eax
▒
7.91 │ pop %rbx
▒
4.11 │ pop %r12
▒
0.49 │ pop %rbp
▒
1.75 │ ← retq
▒
│ /// i.e. 'sort_key_exprs' in the constructor must have been
opened.
▒
│ int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow*
rhs) const {
▒
│ return codegend_compare_fn_ == NULL ?
▒
│ CompareInterpreted(lhs, rhs) :
▒
│ (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),
▒
│ ordering_expr_evals_rhs_.data(), lhs, rhs);
▒
│40: mov %r12,%rsi
▒
│ → callq
impala::TupleRowComparator::CompareInterpreted(impala::TupleRow const*,
impala::TupleRow const*) const
▒
│ add $0x10,%rsp
▒
│ /// All exprs (ordering_exprs_lhs_ and ordering_exprs_rhs_) must
have been prepared
▒
Press 'h' for help on key bindings
{code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
