On Mon, Apr 21, 2014 at 3:15 PM, dave <[email protected]> wrote: > On 04/18/2014 12:10 PM, Steve Borho wrote: >> >> # HG changeset patch >> # User Steve Borho <[email protected]> >> # Date 1397762142 18000 >> # Thu Apr 17 14:15:42 2014 -0500 >> # Node ID df76c716a254ba1b3fdc563d9e7803c4f4df1829 >> # Parent 1fab04de065a3f7f5fedc128f572b860d6df0de2 >> manually align the stack for GCC x86_32 builds >> >> This version declares x265_stack_align as extern "C" since it is an >> assembly >> function, fixes the return of count from Encoder::encode >> >> This needs testing on GCC built x86_32 platforms, any volunteers? >> >> For all threads x265 creates I'm hoping we can align the stack immediately >> in >> the call to threadMain(). >> >> At first glance, it seems only the call to x265_encoder_encode() needs to >> be >> stack aligned. >> >> diff -r 1fab04de065a -r df76c716a254 >> source/cmake/CMakeASM_YASMInformation.cmake >> --- a/source/cmake/CMakeASM_YASMInformation.cmake Fri Apr 18 >> 18:00:58 2014 +0530 >> +++ b/source/cmake/CMakeASM_YASMInformation.cmake Thu Apr 17 >> 14:15:42 2014 -0500 >> @@ -21,8 +21,7 @@ >> endif() >> endif() >> -# we cannot assume 16-byte stack alignment on x86_32 even with GCC >> -if(GCC AND X64) >> +if(GCC) >> set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=1") >> else() >> set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=0") >> diff -r 1fab04de065a -r df76c716a254 source/common/common.h >> --- a/source/common/common.h Fri Apr 18 18:00:58 2014 +0530 >> +++ b/source/common/common.h Thu Apr 17 14:15:42 2014 -0500 >> @@ -47,10 +47,21 @@ >> #define ALIGN_VAR_8(T, var) T var __attribute__((aligned(8))) >> #define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16))) >> #define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32))) >> + >> +#if X265_ARCH_X86 && !defined(X86_64) >> +extern "C" intptr_t x265_stack_align( void (*func)(), ... ); >> +#define x265_stack_align(func,...) x265_stack_align((void (*)())func, >> __VA_ARGS__) >> +#else >> +#define x265_stack_align(func,...) func(__VA_ARGS__) >> +#endif >> + >> #elif defined(_MSC_VER) >> + >> #define ALIGN_VAR_8(T, var) __declspec(align(8)) T var >> #define ALIGN_VAR_16(T, var) __declspec(align(16)) T var >> #define ALIGN_VAR_32(T, var) __declspec(align(32)) T var >> +#define x265_stack_align(func,...) func(__VA_ARGS__) >> + >> #endif // if defined(__GNUC__) >> #if HIGH_BIT_DEPTH >> diff -r 1fab04de065a -r df76c716a254 source/common/threading.cpp >> --- a/source/common/threading.cpp Fri Apr 18 18:00:58 2014 +0530 >> +++ b/source/common/threading.cpp Thu Apr 17 14:15:42 2014 -0500 >> @@ -28,12 +28,18 @@ >> namespace x265 { >> // x265 private namespace >> +/* C shim for forced stack alignment */ >> +static void stackAlignMain(Thread *instance) >> +{ >> + instance->threadMain(); >> +} >> + >> #if _WIN32 >> static DWORD WINAPI ThreadShim(Thread *instance) >> { >> // defer processing to the virtual function implemented in the >> derived class >> - instance->threadMain(); >> + x265_stack_align(stackAlignMain, instance); >> return 0; >> } >> @@ -70,7 +76,7 @@ >> // defer processing to the virtual function implemented in the >> derived class >> Thread *instance = reinterpret_cast<Thread *>(opaque); >> - instance->threadMain(); >> + x265_stack_align(stackAlignMain, instance); >> return NULL; >> } >> diff -r 1fab04de065a -r df76c716a254 source/encoder/api.cpp >> --- a/source/encoder/api.cpp Fri Apr 18 18:00:58 2014 +0530 >> +++ b/source/encoder/api.cpp Thu Apr 17 14:15:42 2014 -0500 >> @@ -103,6 +103,15 @@ >> return ret; >> } >> +#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64) >> +/* C wrapper for Encoder::encode() so we can align the stack prior to >> entry >> + * since the caller may not have aligned the stack enough for us */ >> +static intptr_t encode_stack_frame(Encoder *enc, bool bEos, const >> x265_picture* pic, x265_picture *pic_out, NALUnitEBSP **nalunits) >> +{ >> + return (intptr_t)enc->encode(bEos, pic, pic_out, nalunits); >> +} >> +#endif >> + >> extern "C" >> int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t >> *pi_nal, x265_picture *pic_in, x265_picture *pic_out) >> { >> @@ -111,7 +120,12 @@ >> Encoder *encoder = static_cast<Encoder*>(enc); >> NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 }; >> + >> +#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64) >> + int numEncoded = (int)x265_stack_align(encode_stack_frame, encoder, >> !pic_in, pic_in, pic_out, nalunits); >> +#else >> int numEncoded = encoder->encode(!pic_in, pic_in, pic_out, >> nalunits); >> +#endif >> if (pp_nal && numEncoded > 0) >> { >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel > > I resolved build dependencies for 32 bit on my system but it seems like > cmake is primarily setup to just build for the host system. It's ignoring > "-f elf32" for CMAKE_ASM_YASM_FLAGS and generating "-f elf64...". GCC and > ld are accepting -m32 but then ld fails to link yasm's 64bit object code > with GCC's 32bit code. > > I tried tinkering with the cmake generated build.make file to get yasm to > create 32bit code but then yasm doesn't like 64bit register symbols. How do > you get yasm to build 32bit object code from 64bit source code? > > Am I wasting my time trying to build x265 for 32 bit linux from 64 bit linux > or is this something that is needed?
I'm really not familiar with cross-compiling to x86_32 on a 64bit linux, never been compelled to try it. This would be much easier to verify on an x86_32 linux machine or on Windows with a Win32 MinGW setup (it doesn't matter if Windows is 64bit or not, since Windows ships with Win32 compatibility libraries). I used to have a 32bit linux setup until very recently when that computer died. Murugan should be able to test the latest patch today, so hopefully this can be pushed. -- Steve Borho _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
