On Wednesday, June 27, 2018 5:38:31 PM CEST Pavel Stehule wrote:
> 2018-06-27 17:19 GMT+02:00 Tomas Vondra <tomas.von...@2ndquadrant.com>:
> > On 06/26/2018 09:25 PM, Pavel Stehule wrote:
> >> Hi
> >> 
> >> ...
> >> 
> >> So I am able to see effect of jit_tuple_deforming, and very well, but
> >> only if optimization is active. When optimization is not active then
> >> jit_tuple_deforming does slowdown.
> >> 
> >> So maybe a usage of jit_tuple_deforming can be conditioned by
> >> jit_optimization?
> > 
> > Can you share the test case and some detail about the hardware and
> > PostgreSQL configuration?
> 
> I did very simple test
> 
> 
> 0.
> 
> master branch without asserts, shared buffer to 1GB
> tested on Lenovo T520 8GB RAM 8CPU, i7
> Fedora 28, gcc  CFLAGS="-ggdb -Og -g3 -fno-omit-frame-pointer"  --with-llvm
> 
> 1.
> 
> select 'create table wt(' || string_agg(format('%I int', 'c' || i),',') ||
> ')' from generate_series(1,100) g(i) \gexec
> 
> 
> 2.
> 
> begin;
> select 'insert into wt values(' || (select
> string_agg((random()*10000)::int::text,',') from generate_series(1,j - j +
> 100) g(i)) || ')' from generate_series(1,1000000) gg(j) \gexec
> insert into wt select * from wt;
> commit;
> 
> 3.
> 
> set max_paralel_workers to 0; -- the effect of JIT will be more visible
> 
> analyze wt;
> \timing
> 
> select sum(c99) from wt;
> 
> I tested some combination of:
> 
> jit: off on
> jit_inline_above_cost: 0, 10000000000000
> jit_optimize_above_cost: 0, 10000000000000
> jit_tuple_deforming: on, off
> 
> 
> My primitive tests shows nice possitive effect of jit_tuple_deforming if
> jit optimization is active. When jit optimization is not active, then
> jit_tuple_deforming did slowdown in my test.
> 
> So there is range of costs between 100000 and 500000 where
> jit_tuple_deforming didn't work well (without optimization)
> 
> I am limmited by small memory of my notebook - when I created table larger
> than 3GB, then I got IO waits on my crypted disc, and any effect of JIT was
> eliminated.
> 
> Regards
> 
> Pavel

Hi

I have studied this case a bit, and I think too that there is something wrong 
here.
Right now, jit_optimize is a -O3. It's really expensive, and triggering it 
here is not the right solution. In the attached patch, I force a -O1 for tuple 
deforming. With your test case, on my computer, the results are :
- no jit : 850ms
- jit with tuple deforming without optimizations : 1650 ms (1.5ms spent 
optimizing)
- jit without tuple deforming : 820ms (0.2ms)
- jit with tuple deforming with optimization (-O3) : 770ms (105ms)
- jit with tuple deforming with patch (-O1) : 725ms (54ms)

I will look at the generated code for tuple deforming, but I think we should 
pre-optimize the LLVM bytecode if we do not want to involve the LLVM 
optimization passes.

Regards

 Pierre
>From c2e70c8fbb7715283d3d53bdf5a70e4db18c99a9 Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Mon, 2 Jul 2018 13:44:10 +0200
Subject: [PATCH] Introduce opt1 in LLVM/JIT, and force it with deforming

---
 src/backend/jit/llvm/llvmjit.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c
index 5d0cdab1fc..025319e9c1 100644
--- a/src/backend/jit/llvm/llvmjit.c
+++ b/src/backend/jit/llvm/llvmjit.c
@@ -91,10 +91,12 @@ static const char *llvm_layout = NULL;
 
 
 static LLVMTargetMachineRef llvm_opt0_targetmachine;
+static LLVMTargetMachineRef llvm_opt1_targetmachine;
 static LLVMTargetMachineRef llvm_opt3_targetmachine;
 
 static LLVMTargetRef llvm_targetref;
 static LLVMOrcJITStackRef llvm_opt0_orc;
+static LLVMOrcJITStackRef llvm_opt1_orc;
 static LLVMOrcJITStackRef llvm_opt3_orc;
 
 
@@ -277,6 +279,8 @@ llvm_get_function(LLVMJitContext *context, const char *funcname)
 #if LLVM_VERSION_MAJOR < 5
 	if ((addr = LLVMOrcGetSymbolAddress(llvm_opt0_orc, funcname)))
 		return (void *) (uintptr_t) addr;
+	if ((addr = LLVMOrcGetSymbolAddress(llvm_opt1_orc, funcname)))
+		return (void *) (uintptr_t) addr;
 	if ((addr = LLVMOrcGetSymbolAddress(llvm_opt3_orc, funcname)))
 		return (void *) (uintptr_t) addr;
 #else
@@ -284,6 +288,10 @@ llvm_get_function(LLVMJitContext *context, const char *funcname)
 		elog(ERROR, "failed to look up symbol \"%s\"", funcname);
 	if (addr)
 		return (void *) (uintptr_t) addr;
+	if (LLVMOrcGetSymbolAddress(llvm_opt1_orc, &addr, funcname))
+		elog(ERROR, "failed to look up symbol \"%s\"", funcname);
+	if (addr)
+		return (void *) (uintptr_t) addr;
 	if (LLVMOrcGetSymbolAddress(llvm_opt3_orc, &addr, funcname))
 		elog(ERROR, "failed to look up symbol \"%s\"", funcname);
 	if (addr)
@@ -420,6 +428,8 @@ llvm_optimize_module(LLVMJitContext *context, LLVMModuleRef module)
 
 	if (context->base.flags & PGJIT_OPT3)
 		compile_optlevel = 3;
+	else if (context->base.flags & PGJIT_DEFORM)
+		compile_optlevel = 1;
 	else
 		compile_optlevel = 0;
 
@@ -491,6 +501,8 @@ llvm_compile_module(LLVMJitContext *context)
 
 	if (context->base.flags & PGJIT_OPT3)
 		compile_orc = llvm_opt3_orc;
+	else if (context->base.flags & PGJIT_DEFORM)
+		compile_orc = llvm_opt1_orc;
 	else
 		compile_orc = llvm_opt0_orc;
 
@@ -646,6 +658,11 @@ llvm_session_initialize(void)
 								LLVMCodeGenLevelNone,
 								LLVMRelocDefault,
 								LLVMCodeModelJITDefault);
+	llvm_opt1_targetmachine =
+		LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features,
+								LLVMCodeGenLevelLess,
+								LLVMRelocDefault,
+								LLVMCodeModelJITDefault);
 	llvm_opt3_targetmachine =
 		LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features,
 								LLVMCodeGenLevelAggressive,
@@ -661,12 +678,14 @@ llvm_session_initialize(void)
 	LLVMLoadLibraryPermanently(NULL);
 
 	llvm_opt0_orc = LLVMOrcCreateInstance(llvm_opt0_targetmachine);
+	llvm_opt1_orc = LLVMOrcCreateInstance(llvm_opt1_targetmachine);
 	llvm_opt3_orc = LLVMOrcCreateInstance(llvm_opt3_targetmachine);
 
 #if defined(HAVE_DECL_LLVMORCREGISTERGDB) && HAVE_DECL_LLVMORCREGISTERGDB
 	if (jit_debugging_support)
 	{
 		LLVMOrcRegisterGDB(llvm_opt0_orc);
+		LLVMOrcRegisterGDB(llvm_opt1_orc);
 		LLVMOrcRegisterGDB(llvm_opt3_orc);
 	}
 #endif
@@ -674,6 +693,7 @@ llvm_session_initialize(void)
 	if (jit_profiling_support)
 	{
 		LLVMOrcRegisterPerf(llvm_opt0_orc);
+		LLVMOrcRegisterPerf(llvm_opt1_orc);
 		LLVMOrcRegisterPerf(llvm_opt3_orc);
 	}
 #endif
@@ -700,6 +720,16 @@ llvm_shutdown(int code, Datum arg)
 		llvm_opt3_orc = NULL;
 	}
 
+	if (llvm_opt1_orc)
+	{
+#if defined(HAVE_DECL_LLVMORCREGISTERPERF) && HAVE_DECL_LLVMORCREGISTERPERF
+		if (jit_profiling_support)
+			LLVMOrcUnregisterPerf(llvm_opt1_orc);
+#endif
+		LLVMOrcDisposeInstance(llvm_opt1_orc);
+		llvm_opt1_orc = NULL;
+	}
+
 	if (llvm_opt0_orc)
 	{
 #if defined(HAVE_DECL_LLVMORCREGISTERPERF) && HAVE_DECL_LLVMORCREGISTERPERF
-- 
2.18.0

Reply via email to