Hello community, here is the log from the commit of package babl for openSUSE:Factory checked in at 2018-05-25 21:35:56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/babl (Old) and /work/SRC/openSUSE:Factory/.babl.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "babl" Fri May 25 21:35:56 2018 rev:41 rq:610968 version:0.1.50 Changes: -------- --- /work/SRC/openSUSE:Factory/babl/babl.changes 2018-05-13 15:53:34.871473053 +0200 +++ /work/SRC/openSUSE:Factory/.babl.new/babl.changes 2018-05-25 21:35:57.290921426 +0200 @@ -1,0 +2,14 @@ +Mon May 21 00:47:05 UTC 2018 - [email protected] + +- Improvements to speed and precision of indexed code, + improvements to mesonbuild. + +------------------------------------------------------------------- +Wed May 16 18:21:48 UTC 2018 - [email protected] + +- Update to version 0.1.48: + + Fix u8 <-> double conversions for chroma, SSE2 version of RGBA + float to CIE L / Lab. + + Build with -Ofast by default. + +------------------------------------------------------------------- Old: ---- babl-0.1.46.tar.bz2 New: ---- babl-0.1.50.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ babl.spec ++++++ --- /var/tmp/diff_new_pack.Kl9eNQ/_old 2018-05-25 21:35:57.930898263 +0200 +++ /var/tmp/diff_new_pack.Kl9eNQ/_new 2018-05-25 21:35:57.930898263 +0200 @@ -18,7 +18,7 @@ %define debug_package_requires libbabl-0_1-0 = %{version}-%{release} Name: babl -Version: 0.1.46 +Version: 0.1.50 Release: 0 Summary: Dynamic Pixel Format Translation Library License: LGPL-3.0-or-later ++++++ babl-0.1.46.tar.bz2 -> babl-0.1.50.tar.bz2 ++++++ ++++ 4680 lines of diff (skipped) ++++ retrying with extended exclude list diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/INSTALL new/babl-0.1.50/INSTALL --- old/babl-0.1.46/INSTALL 2018-04-10 23:11:28.000000000 +0200 +++ new/babl-0.1.50/INSTALL 2018-05-20 10:48:54.000000000 +0200 @@ -1,5 +1,5 @@ -babl 0.1.46 +babl 0.1.50 Dynamic; any to any, pixel format conversion library. @@ -12,10 +12,10 @@ installation (or a variation on this theme): ------------------------------------------------------------ - foo$ wget ftp://ftp.gtk.org/pub/babl/0.1/babl-0.1.46.tar.bz2 - foo$ tar jxf babl-0.1.46.tar.gz - foo$ cd babl-0.1.46 - foo/babl-0.1.46$ ./configure && make && sudo make install + foo$ wget ftp://ftp.gtk.org/pub/babl/0.1/babl-0.1.50.tar.bz2 + foo$ tar jxf babl-0.1.50.tar.gz + foo$ cd babl-0.1.50 + foo/babl-0.1.50$ ./configure && make && sudo make install ------------------------------------------------------------ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/NEWS new/babl-0.1.50/NEWS --- old/babl-0.1.46/NEWS 2018-04-10 23:08:46.000000000 +0200 +++ new/babl-0.1.50/NEWS 2018-05-20 10:45:34.000000000 +0200 @@ -3,7 +3,14 @@ the news section both in the README and the webpage. --> - +2018-05-20 babl-0.1.50 </dt><dd> +Improvements to speed and precision of indexed code, improvements to meson +build. + </dd><dt> +2018-05-15 babl-0.1.48 </dt><dd> +fix u8 <-> double conversions for chroma, SSE2 version of RGBA float to +CIE L / Lab. Build with -Ofast by default. + </dd><dt> 2018-04-10 babl-0.1.46 </dt><dd> added extensions with more coverage for u32, half and other utilit fast paths improving fast path coverage. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/README new/babl-0.1.50/README --- old/babl-0.1.46/README 2018-04-10 23:12:04.000000000 +0200 +++ new/babl-0.1.50/README 2018-05-20 10:49:19.000000000 +0200 @@ -1,4 +1,4 @@ -Babl-0.1.46 +Babl-0.1.50 Contents @@ -63,6 +63,12 @@ release is done a babl release is most often put out just prior to the GEGL release. +2018-05-20 babl-0.1.50 + Improvements to speed and precision of indexed code, improvements + to meson build. +2018-05-15 babl-0.1.48 + fix u8 <-> double conversions for chroma, SSE2 version of RGBA + float to CIE L / Lab. Build with -Ofast by default. 2018-04-10 babl-0.1.46 added extensions with more coverage for u32, half and other utilit fast paths improving fast path coverage. @@ -1643,19 +1649,6 @@ u8 Y -cairo-ARGB32 - -bytes/pixel - 4 -model - R'aG'aB'aA -components - - u8 B'a - u8 G'a - u8 R'a - u8 A - cairo-RGB24 bytes/pixel @@ -1977,6 +1970,19 @@ float saturation float lightness +cairo-ARGB32 + +bytes/pixel + 4 +model + R'aG'aB'aA +components + + u8 B'a + u8 G'a + u8 R'a + u8 A + cairo-A8 bytes/pixel @@ -2281,4 +2287,4 @@ Félix Piédallu Initial meson build -/babl-0.1.46 +/babl-0.1.50 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/babl/babl-icc.c new/babl-0.1.50/babl/babl-icc.c --- old/babl-0.1.46/babl/babl-icc.c 2017-10-25 16:48:41.000000000 +0200 +++ new/babl-0.1.50/babl/babl-icc.c 2018-04-25 20:22:04.000000000 +0200 @@ -342,7 +342,7 @@ fprintf (stderr, "%f %f %f %f %f %f %f\n", g, a, b, c, d, e, f); { - fprintf (stdout, "unhandled parametric sRGB formula TRC type %i\n", function_type); + fprintf (stderr, "unhandled parametric sRGB formula TRC type %i\n", function_type); *error = "unhandled sRGB formula like TRC"; return babl_trc_gamma (2.2); } @@ -350,7 +350,7 @@ break; default: *error = "unhandled parametric TRC"; - fprintf (stdout, "unhandled parametric TRC type %i\n", function_type); + fprintf (stderr, "unhandled parametric TRC type %i\n", function_type); return babl_trc_gamma (2.2); break; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/babl/babl-image.c new/babl-0.1.50/babl/babl-image.c --- old/babl-0.1.46/babl/babl-image.c 2017-09-21 21:06:11.000000000 +0200 +++ new/babl-0.1.50/babl/babl-image.c 2018-05-19 17:21:32.000000000 +0200 @@ -64,7 +64,7 @@ babl->class_type = BABL_IMAGE; babl->instance.id = 0; - babl->instance.name = "slaritbartfast"; + babl->instance.name = "slartibartfast"; babl->image.format = format; babl->image.model = model; babl->image.components = components; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/babl/babl-internal.h new/babl-0.1.50/babl/babl-internal.h --- old/babl-0.1.46/babl/babl-internal.h 2018-01-20 18:39:30.000000000 +0100 +++ new/babl-0.1.50/babl/babl-internal.h 2018-04-25 20:20:56.000000000 +0200 @@ -127,7 +127,7 @@ __android_log_print (ANDROID_LOG_DEBUG, "BABL", "When loading %s:\n\t", babl_extender()->instance.name); #else - fprintf (stdout, "When loading %s:\n\t", babl_extender()->instance.name); + fprintf (stderr, "When loading %s:\n\t", babl_extender()->instance.name); #endif } @@ -135,7 +135,7 @@ __android_log_print (ANDROID_LOG_DEBUG, "BABL", "%s:%i %s()", file, line, function); #else - fprintf (stdout, "%s:%i %s()\n\t", file, line, function); + fprintf (stderr, "%s:%i %s()\n\t", file, line, function); #endif } @@ -143,8 +143,8 @@ __android_log_vprint (ANDROID_LOG_DEBUG, "BABL", fmt, varg); #else - vfprintf (stdout, fmt, varg); - fprintf (stdout, "\n"); + vfprintf (stderr, fmt, varg); + fprintf (stderr, "\n"); fflush (NULL); #endif return; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/babl/babl-palette.c new/babl-0.1.50/babl/babl-palette.c --- old/babl-0.1.46/babl/babl-palette.c 2018-01-03 17:01:19.000000000 +0100 +++ new/babl-0.1.50/babl/babl-palette.c 2018-05-18 15:32:14.000000000 +0200 @@ -19,6 +19,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <math.h> #include <limits.h> #include <assert.h> #include "config.h" @@ -31,8 +32,8 @@ /* A default palette, containing standard ANSI / EGA colors * */ -static unsigned char defpal_data[4*16] = -{ +static unsigned char defpal_data[4*16] = +{ 0 ,0 ,0 ,255, 127,0 ,0 ,255, 0 ,127,0 ,255, @@ -52,6 +53,14 @@ }; static double defpal_double[4*8*16]; +static unsigned short ceil_sqrt_u8[3 * 255 * 255 + 1]; + + +typedef struct BablPaletteRadius +{ + unsigned char idx; + unsigned short diff; +} BablPaletteRadius; typedef struct BablPalette { @@ -62,10 +71,88 @@ */ double *data_double; unsigned char *data_u8; + BablPaletteRadius *radii; volatile unsigned int hash[HASH_TABLE_SIZE]; } BablPalette; static void +init_ceil_sqrt_u8 (void) +{ + int i; + + babl_mutex_lock (babl_format_mutex); + + if (! ceil_sqrt_u8[1]) + { + for (i = 0; i <= 3 * 255 * 255; i++) + ceil_sqrt_u8[i] = ceil (sqrt (i)); + } + + babl_mutex_unlock (babl_format_mutex); +} + +static inline int +diff2_u8 (const unsigned char *p1, + const unsigned char *p2) +{ + return ((int) p1[0] - (int) p2[0]) * ((int) p1[0] - (int) p2[0]) + + ((int) p1[1] - (int) p2[1]) * ((int) p1[1] - (int) p2[1]) + + ((int) p1[2] - (int) p2[2]) * ((int) p1[2] - (int) p2[2]); +} + +static int +babl_palette_radius_compare (const void *r1, + const void *r2) +{ + const BablPaletteRadius *radius1 = r1; + const BablPaletteRadius *radius2 = r2; + + return (int) radius1->diff - (int) radius2->diff; +} + +static void +babl_palette_init_radii (BablPalette *pal) +{ + int i, j; + + init_ceil_sqrt_u8 (); + + /* calculate the distance between each pair of colors in the palette, and, for + * each color, construct a list of all other colors and their distances from + * it, sorted by distance. we use these lists in babl_palette_lookup() to + * speed up the search, as described in the function. + */ + + pal->radii = babl_malloc (sizeof (BablPaletteRadius) * + (pal->count - 1) * + pal->count); + + for (i = 0; i < pal->count; i++) + { + BablPaletteRadius *radii1 = pal->radii + (pal->count - 1) * i; + const unsigned char *p1 = pal->data_u8 + 4 * i; + + for (j = i + 1; j < pal->count; j++) + { + BablPaletteRadius *radii2 = pal->radii + (pal->count - 1) * j; + const unsigned char *p2 = pal->data_u8 + 4 * j; + unsigned short diff; + + diff = floor (sqrt (diff2_u8 (p1, p2))); + + radii1[j - 1].idx = j; + radii1[j - 1].diff = diff; + + radii2[i].idx = i; + radii2[i].diff = diff; + } + + qsort (radii1, pal->count - 1, sizeof (BablPaletteRadius), + babl_palette_radius_compare); + } +} + +static void babl_palette_reset_hash (BablPalette *pal) { int i; @@ -75,10 +162,14 @@ } } +#define BABL_IDX_FACTOR 255.5 + static int -babl_palette_lookup (BablPalette *pal, int r, int g, int b, int a) +babl_palette_lookup (BablPalette *pal, + const unsigned char *p, + int best_idx) { - unsigned int pixel = (r << 16) | (g << 8) | b; + unsigned int pixel = p[0] | (p[1] << 8) | (p[2] << 16); int hash_index = pixel % HASH_TABLE_SIZE; unsigned int hash_value = pal->hash[hash_index]; unsigned int hash_pixel = hash_value & 0x00ffffffu; @@ -97,26 +188,60 @@ } else { - int best_idx = 0; - int best_diff = INT_MAX; + const BablPaletteRadius *radii = pal->radii + (pal->count - 1) * best_idx; + const unsigned char *q; + int best_diff2; + int best_diff; + int diff0; + int i; + + /* best_idx is the closest palette entry to the previous pixel (referred + * to as the source color). based on the assumption that nearby pixels + * have similar color, we start the search for the current closest entry + * at best_idx, and iterate over the entry's color list, as calculated in + * babl_palette_init_radii(), in search for a better match. + */ + + q = pal->data_u8 + 4 * best_idx; + best_diff2 = diff2_u8 (p, q); + best_diff = ceil_sqrt_u8[best_diff2]; + diff0 = best_diff; - for (idx = 0; idx < pal->count; idx++) + for (i = 0; i < pal->count - 1; i++) { - unsigned char *palpx = pal->data_u8 + idx * 4; - int pr = palpx[0]; - int pg = palpx[1]; - int pb = palpx[2]; - - int diff = (r - pr) * (r - pr) + - (g - pg) * (g - pg) + - (b - pb) * (b - pb); - if (diff < best_diff) + const BablPaletteRadius *radius = &radii[i]; + int min_diff; + int diff2; + + /* radius->diff is the distance from the source color to the current + * color. diff0 is the distance from the source color to the input + * color. according to the triangle inequality, the distance from + * the current color to the input color is at least + * radius->diff - diff0. if the shortest distance found so far is + * less than that, then the best match found so far is necessarily + * better than the current color, and we can stop the search, since + * the color list is sorted in ascending radius->diff order. + */ + + idx = radius->idx; + min_diff = radius->diff - diff0; + + if (best_diff < min_diff || (best_diff == min_diff && best_idx < idx)) + break; + + q = pal->data_u8 + 4 * idx; + diff2 = diff2_u8 (p, q); + + if (diff2 < best_diff2 || (diff2 == best_diff2 && idx < best_idx)) { - best_diff = diff; - best_idx = idx; + best_idx = idx; + best_diff2 = diff2; + best_diff = ceil_sqrt_u8[diff2]; } } + pal->hash[hash_index] = ((unsigned int) best_idx << 24) | pixel; + return best_idx; } } @@ -139,6 +264,8 @@ babl_process (babl_fish (format, babl_format ("R'G'B'A u8")), data, pal->data_u8, count); + babl_palette_init_radii (pal); + babl_palette_reset_hash (pal); return pal; @@ -149,6 +276,7 @@ babl_free (pal->data); babl_free (pal->data_double); babl_free (pal->data_u8); + babl_free (pal->radii); babl_free (pal); } @@ -172,93 +300,101 @@ babl_process (babl_fish (pal.format, babl_format ("RGBA double")), pal.data, pal.data_double, pal.count); + babl_palette_init_radii (&pal); babl_palette_reset_hash (&pal); return &pal; } static void rgba_to_pal (Babl *conversion, - char *src, + char *src_b, char *dst, long n, void *dst_model_data) { + const Babl *space = babl_conversion_get_source_space (conversion); BablPalette **palptr = dst_model_data; - BablPalette *pal = *palptr; + BablPalette *pal; + int best_idx = 0; + assert (palptr); + pal = *palptr; + assert(pal); + while (n--) { - int idx; - - int best_idx = 0; - double best_diff = 100000; - double *srcf; - - srcf = ((double *) src); - - for (idx = 0; idx<pal->count; idx++) - { - double diff; - double *palpx = ((double *)pal->data_double) + idx * 4; + double *src_d = (void*) src_b; + unsigned char src[4]; + int c; + for (c = 0; c < 3; c++) + { + if (src_d[c] >= 1.0f) + src[c] = 255; + else if (src_d[c] <= 0.0f) + src[c] = 0; + else + src[c] = babl_trc_from_linear (space->space.trc[0], + src_d[c]) * 255 + 0.5f; + } + if (src_d[3] >= 1.0f) + src[3] = 255; + else if (src_d[3] <= 0.0f) + src[3] = 0; + else + src[3] = src_d[3] * 255 + 0.5f; - diff = (palpx[0] - srcf[0]) * (palpx[0] - srcf[0]) + - (palpx[1] - srcf[1]) * (palpx[1] - srcf[1]) + - (palpx[2] - srcf[2]) * (palpx[2] - srcf[2]); - if (diff <= best_diff) - { - best_diff = diff; - best_idx = idx; - } - } + best_idx = babl_palette_lookup (pal, src, best_idx); - ((double *) dst)[0] = best_idx / 255.5; + ((double *) dst)[0] = best_idx / BABL_IDX_FACTOR; - src += sizeof (double) * 4; + src_b += sizeof (double) * 4; dst += sizeof (double) * 1; } + } static void rgba_to_pala (Babl *conversion, - char *src, + char *src_i, char *dst, long n, void *dst_model_data) { + const Babl *space = babl_conversion_get_destination_space (conversion); BablPalette **palptr = dst_model_data; - BablPalette *pal = *palptr; - + BablPalette *pal; + int best_idx = 0; + assert (palptr); + pal = *palptr; assert(pal); + while (n--) { - int idx; - - int best_idx = 0; - double best_diff = 100000; - double *srcf; - double alpha; + double *src_d = (void*) src_i; + unsigned char src[4]; + int c; + for (c = 0; c < 3; c++) + { + if (src_d[c] >= 1.0f) + src[c] = 255; + else if (src_d[c] <= 0.0f) + src[c] = 0; + else + src[c] = babl_trc_from_linear (space->space.trc[0], + src_d[c]) * 255 + 0.5f; + } + if (src_d[3] >= 1.0f) + src[3] = 255; + else if (src_d[3] <= 0.0f) + src[3] = 0; + else + src[3] = src_d[3] * 255 + 0.5f; - srcf = ((double *) src); - alpha = srcf[3]; - - for (idx = 0; idx<pal->count; idx++) - { - double diff; - double *palpx = ((double *)pal->data_double) + idx * 4; - - diff = (palpx[0] - srcf[0]) * (palpx[0] - srcf[0]) + - (palpx[1] - srcf[1]) * (palpx[1] - srcf[1]) + - (palpx[2] - srcf[2]) * (palpx[2] - srcf[2]); - if (diff <= best_diff) - { - best_diff = diff; - best_idx = idx; - } - } + best_idx = babl_palette_lookup (pal, src, best_idx); - ((double *) dst)[0] = best_idx / 255.5; - ((double *) dst)[1] = alpha; + ((double *) dst)[0] = best_idx / BABL_IDX_FACTOR; + ((double *) dst)[1] = src_d[3]; - src += sizeof (double) * 4; + src_i += sizeof (double) * 4; dst += sizeof (double) * 2; } } @@ -275,7 +411,7 @@ assert(pal); while (n--) { - int idx = (((double *) src)[0]) * 255.5; + int idx = (((double *) src)[0]) * BABL_IDX_FACTOR; double *palpx; if (idx < 0) idx = 0; @@ -302,7 +438,7 @@ assert(pal); while (n--) { - int idx = (((double *) src)[0]) * 255.5; + int idx = (((double *) src)[0]) * BABL_IDX_FACTOR; double alpha = (((double *) src)[1]); double *palpx; @@ -312,7 +448,7 @@ palpx = ((double *)pal->data_double) + idx * 4; memcpy (dst, palpx, sizeof(double)*4); - ((double *)dst)[3] *= alpha; + ((double *)dst)[3] *= alpha; src += sizeof (double) * 2; dst += sizeof (double) * 4; @@ -320,6 +456,97 @@ } static void +rgba_float_to_pal_a (Babl *conversion, + unsigned char *src_b, + unsigned char *dst, + long n, + void *src_model_data) +{ + const Babl *space = babl_conversion_get_destination_space (conversion); + BablPalette **palptr = src_model_data; + BablPalette *pal; + int best_idx = 0; + assert (palptr); + pal = *palptr; + assert(pal); + + while (n--) + { + float *src_f = (void*) src_b; + unsigned char src[4]; + int c; + for (c = 0; c < 3; c++) + { + if (src_f[c] >= 1.0f) + src[c] = 255; + else if (src_f[c] <= 0.0f) + src[c] = 0; + else + src[c] = babl_trc_from_linear (space->space.trc[0], + src_f[c]) * 255 + 0.5f; + } + if (src_f[3] >= 1.0f) + src[3] = 255; + else if (src_f[3] <= 0.0f) + src[3] = 0; + else + src[3] = src_f[3] * 255 + 0.5f; + + + dst[0] = best_idx = babl_palette_lookup (pal, src, best_idx); + dst[1] = src[3]; + + src_b += sizeof (float) * 4; + dst += sizeof (char) * 2; + } +} + + +static void +rgba_float_to_pal (Babl *conversion, + unsigned char *src_b, + unsigned char *dst, + long n, + void *src_model_data) +{ + const Babl *space = babl_conversion_get_destination_space (conversion); + BablPalette **palptr = src_model_data; + BablPalette *pal; + int best_idx = 0; + assert (palptr); + pal = *palptr; + assert(pal); + + while (n--) + { + float *src_f = (void*) src_b; + unsigned char src[4]; + int c; + for (c = 0; c < 3; c++) + { + if (src_f[c] >= 1.0f) + src[c] = 255; + else if (src_f[c] <= 0.0f) + src[c] = 0; + else + src[c] = babl_trc_from_linear (space->space.trc[0], + src_f[c]) * 255 + 0.5f; + } + if (src_f[3] >= 1.0f) + src[3] = 255; + else if (src_f[3] <= 0.0f) + src[3] = 0; + else + src[3] = src_f[3] * 255 + 0.5f; + + dst[0] = best_idx = babl_palette_lookup (pal, src, best_idx); + + src_b += sizeof (float) * 4; + dst += sizeof (char) * 1; + } +} + +static void rgba_u8_to_pal (Babl *conversion, unsigned char *src, unsigned char *dst, @@ -328,12 +555,14 @@ { BablPalette **palptr = src_model_data; BablPalette *pal; + int best_idx = 0; assert (palptr); pal = *palptr; assert(pal); + while (n--) { - dst[0] = babl_palette_lookup (pal, src[0], src[1], src[2], src[3]); + dst[0] = best_idx = babl_palette_lookup (pal, src, best_idx); src += sizeof (char) * 4; dst += sizeof (char) * 1; @@ -349,12 +578,13 @@ { BablPalette **palptr = src_model_data; BablPalette *pal; + int best_idx = 0; assert (palptr); pal = *palptr; assert(pal); while (n--) { - dst[0] = babl_palette_lookup (pal, src[0], src[1], src[2], src[3]); + dst[0] = best_idx = babl_palette_lookup (pal, src, best_idx); dst[1] = src[3]; src += sizeof (char) * 4; @@ -510,7 +740,6 @@ "chroma", NULL); alpha = babl_component ("A"); - model = babl_model_new ("name", name, component, alpha, NULL); palptr = malloc (sizeof (void*)); *palptr = default_palette ();; @@ -558,30 +787,24 @@ "data", palptr, NULL ); - babl_conversion_new ( f_pal_u8, f_pal_a_u8, "linear", conv_pal8_pala8, NULL ); - babl_conversion_new ( f_pal_a_u8, f_pal_u8, "linear", conv_pala8_pal8, NULL ); - - babl_conversion_new ( f_pal_u8, babl_format ("R'G'B'A u8"), "linear", pal_u8_to_rgba_u8, "data", palptr, NULL); - - babl_conversion_new ( f_pal_a_u8, babl_format ("R'G'B'A u8"), @@ -602,6 +825,19 @@ "data", palptr, NULL); + babl_conversion_new ( + babl_format ("RGBA float"), + f_pal_a_u8, + "linear", rgba_float_to_pal_a, + "data", palptr, + NULL); + babl_conversion_new ( + babl_format ("RGBA float"), + f_pal_u8, + "linear", rgba_float_to_pal, + "data", palptr, + NULL); + babl_set_user_data (model, palptr); babl_set_user_data (model_no_alpha, palptr); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/babl/babl-version.h new/babl-0.1.50/babl/babl-version.h --- old/babl-0.1.46/babl/babl-version.h 2018-04-10 23:11:27.000000000 +0200 +++ new/babl-0.1.50/babl/babl-version.h 2018-05-20 10:48:54.000000000 +0200 @@ -34,7 +34,7 @@ #define BABL_MAJOR_VERSION 0 #define BABL_MINOR_VERSION 1 -#define BABL_MICRO_VERSION 46 +#define BABL_MICRO_VERSION 50 /** Get the version information on the babl library */ void babl_get_version (int *major, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/babl/base/type-u8.c new/babl-0.1.50/babl/base/type-u8.c --- old/babl-0.1.46/babl/base/type-u8.c 2017-09-29 00:30:45.000000000 +0200 +++ new/babl-0.1.50/babl/base/type-u8.c 2018-04-21 15:18:07.000000000 +0200 @@ -110,7 +110,7 @@ MAKE_CONVERSIONS (u8, 0.0, 1.0, 0x00, UINT8_MAX) MAKE_CONVERSIONS (u8_luma, 0.0, 1.0, 16, 235) -MAKE_CONVERSIONS (u8_chroma, 0.0, 1.0, 16, 240) +MAKE_CONVERSIONS (u8_chroma, -0.5, 0.5, 16, 240) void babl_base_type_u8 (void) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/config.h.in new/babl-0.1.50/config.h.in --- old/babl-0.1.46/config.h.in 2018-04-10 23:11:28.000000000 +0200 +++ new/babl-0.1.50/config.h.in 2018-05-20 10:48:54.000000000 +0200 @@ -123,5 +123,8 @@ /* Define to 1 if SSE2 assembly is available. */ #undef USE_SSE2 +/* Define to 1 if SSE3 assembly is available. */ +#undef USE_SSE3 + /* Define to 1 if SSE4_1 assembly is available. */ #undef USE_SSE4_1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/configure.ac new/babl-0.1.50/configure.ac --- old/babl-0.1.46/configure.ac 2018-04-10 23:04:31.000000000 +0200 +++ new/babl-0.1.50/configure.ac 2018-05-20 10:48:45.000000000 +0200 @@ -14,7 +14,7 @@ m4_define([babl_major_version], [0]) m4_define([babl_minor_version], [1]) -m4_define([babl_micro_version], [46]) +m4_define([babl_micro_version], [50]) m4_define([babl_real_version], [babl_major_version.babl_minor_version.babl_micro_version]) m4_define([babl_version], [babl_real_version]) @@ -161,6 +161,9 @@ BABL_DETECT_CFLAGS(extra_warnings, '-Wold-style-definition') CFLAGS="$CFLAGS $extra_warnings" +BABL_DETECT_CFLAGS(extra_warnings, '-Ofast' ) +CFLAGS="$CFLAGS $extra_warnings" + fi AC_PATH_PROG(RSVG, rsvg-convert, no) @@ -324,6 +327,10 @@ [ --enable-sse2 enable SSE2 support (default=auto)],, enable_sse2=$enable_sse) +AC_ARG_ENABLE(sse3, + [ --enable-sse3 enable SSE3 support (default=auto)],, + enable_sse3=$enable_sse2) + AC_ARG_ENABLE(sse4_1, [ --enable-sse4_1 enable SSE4_1 support (default=auto)],, enable_sse4_1=$enable_sse) @@ -388,22 +395,40 @@ AC_MSG_WARN([The assembler does not support the SSE2 command set.]) ) - if test "x$enable_sse4_1" = xyes; then - BABL_DETECT_CFLAGS(sse4_1_flag, '-msse4.1') - SSE4_1_EXTRA_CFLAGS="$SSE_EXTRA_CFLAGS $sse4_1_flag" + if test "x$enable_sse3" = xyes; then + BABL_DETECT_CFLAGS(sse3_flag, '-msse3') + SSE3_EXTRA_CFLAGS="$SSE2_EXTRA_CFLAGS $sse3_flag" - AC_MSG_CHECKING(whether we can compile SSE4_1 code) + AC_MSG_CHECKING(whether we can compile SSE3 code) - CFLAGS="$CFLAGS $sse4_1_flag" + CFLAGS="$CFLAGS $sse3_flag" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("pmovzxbd %xmm0,%xmm1");])], - AC_DEFINE(USE_SSE4_1, 1, [Define to 1 if SSE4_1 assembly is available.]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("addsubpd %xmm0,%xmm1");])], + AC_DEFINE(USE_SSE3, 1, [Define to 1 if SSE3 assembly is available.]) AC_MSG_RESULT(yes) , - enable_sse4_1=no + enable_sse3=no AC_MSG_RESULT(no) - AC_MSG_WARN([The assembler does not support the SSE4_1 command set.]) + AC_MSG_WARN([The assembler does not support the SSE3 command set.]) ) + + if test "x$enable_sse4_1" = xyes; then + BABL_DETECT_CFLAGS(sse4_1_flag, '-msse4.1') + SSE4_1_EXTRA_CFLAGS="$SSE_EXTRA_CFLAGS $sse4_1_flag" + + AC_MSG_CHECKING(whether we can compile SSE4_1 code) + + CFLAGS="$CFLAGS $sse4_1_flag" + + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("pmovzxbd %xmm0,%xmm1");])], + AC_DEFINE(USE_SSE4_1, 1, [Define to 1 if SSE4_1 assembly is available.]) + AC_MSG_RESULT(yes) + , + enable_sse4_1=no + AC_MSG_RESULT(no) + AC_MSG_WARN([The assembler does not support the SSE4_1 command set.]) + ) + fi fi fi @@ -439,6 +464,7 @@ AC_SUBST(MMX_EXTRA_CFLAGS) AC_SUBST(SSE_EXTRA_CFLAGS) AC_SUBST(SSE2_EXTRA_CFLAGS) + AC_SUBST(SSE3_EXTRA_CFLAGS) AC_SUBST(SSE4_1_EXTRA_CFLAGS) AC_SUBST(F16C_EXTRA_CFLAGS) fi diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/extensions/CIE.c new/babl-0.1.50/extensions/CIE.c --- old/babl-0.1.46/extensions/CIE.c 2018-04-07 18:14:44.000000000 +0200 +++ new/babl-0.1.50/extensions/CIE.c 2018-05-15 19:07:48.000000000 +0200 @@ -2,7 +2,7 @@ * Copyright (C) 2005, 2014 Øyvind Kolås. * Copyright (C) 2009, Martin Nordholts * Copyright (C) 2014, Elle Stone - * Copyright (C) 2017, Red Hat, Inc. + * Copyright (C) 2017, 2018 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -21,8 +21,13 @@ #include "config.h" #include <math.h> +#include <stdint.h> #include <string.h> +#if defined(USE_SSE2) +#include <emmintrin.h> +#endif /* defined(USE_SSE2) */ + #include "babl-internal.h" #include "extensions/util.h" @@ -172,24 +177,17 @@ double *to_a, double *to_b) { - double f_x, f_y, f_z; - - double x_r = X / D50_WHITE_REF_X; - double y_r = Y / D50_WHITE_REF_Y; - double z_r = Z / D50_WHITE_REF_Z; - - if (x_r > LAB_EPSILON) f_x = cbrt(x_r); - else ( f_x = ((LAB_KAPPA * x_r) + 16) / 116.0 ); - - if (y_r > LAB_EPSILON) f_y = cbrt(y_r); - else ( f_y = ((LAB_KAPPA * y_r) + 16) / 116.0 ); - - if (z_r > LAB_EPSILON) f_z = cbrt(z_r); - else ( f_z = ((LAB_KAPPA * z_r) + 16) / 116.0 ); - - *to_L = (116.0 * f_y) - 16.0; - *to_a = 500.0 * (f_x - f_y); - *to_b = 200.0 * (f_y - f_z); + double xr = X / D50_WHITE_REF_X; + double yr = Y / D50_WHITE_REF_Y; + double zr = Z / D50_WHITE_REF_Z; + + double fx = xr > LAB_EPSILON ? cbrt (xr) : (LAB_KAPPA * xr + 16.0) / 116.0; + double fy = yr > LAB_EPSILON ? cbrt (yr) : (LAB_KAPPA * yr + 16.0) / 116.0; + double fz = zr > LAB_EPSILON ? cbrt (zr) : (LAB_KAPPA * zr + 16.0) / 116.0; + + *to_L = 116.0 * fy - 16.0; + *to_a = 500.0 * (fx - fy); + *to_b = 200.0 * (fy - fz); } static inline void @@ -200,26 +198,18 @@ double *to_Y, double *to_Z) { - double fy, fx, fz, fx_cubed, fy_cubed, fz_cubed; - double xr, yr, zr; - - fy = (L + 16.0) / 116.0; - fy_cubed = fy*fy*fy; - - fz = fy - (b / 200.0); - fz_cubed = fz*fz*fz; + double fy = (L + 16.0) / 116.0; + double fy_cubed = fy * fy * fy; - fx = (a / 500.0) + fy; - fx_cubed = fx*fx*fx; + double fx = fy + a / 500.0; + double fx_cubed = fx * fx * fx; - if (fx_cubed > LAB_EPSILON) xr = fx_cubed; - else xr = ((116.0 * fx) - 16) / LAB_KAPPA; + double fz = fy - b / 200.0; + double fz_cubed = fz * fz * fz; - if ( L > (LAB_KAPPA * LAB_EPSILON) ) yr = fy_cubed; - else yr = (L / LAB_KAPPA); - - if (fz_cubed > LAB_EPSILON) zr = fz_cubed; - else zr = ( (116.0 * fz) - 16 ) / LAB_KAPPA; + double yr = L > LAB_KAPPA * LAB_EPSILON ? fy_cubed : L / LAB_KAPPA; + double xr = fx_cubed > LAB_EPSILON ? fx_cubed : (fx * 116.0 - 16.0) / LAB_KAPPA; + double zr = fz_cubed > LAB_EPSILON ? fz_cubed : (fz * 116.0 - 16.0) / LAB_KAPPA; *to_X = xr * D50_WHITE_REF_X; *to_Y = yr * D50_WHITE_REF_Y; @@ -572,8 +562,6 @@ * Return cube root of x */ -#include <stdint.h> - static inline float _cbrtf (float x) { @@ -1049,6 +1037,267 @@ } } +#if defined(USE_SSE2) + +/* This is an SSE2 version of Halley's method for approximating the + * cube root of an IEEE float implementation. + * + * The scalar version is as follows: + * + * static inline float + * _cbrt_5f (float x) + * { + * union { float f; uint32_t i; } u = { x }; + * + * u.i = u.i / 3 + 709921077; + * return u.f; + * } + * + * static inline float + * _cbrta_halleyf (float a, float R) + * { + * float a3 = a * a * a; + * float b = a * (a3 + R + R) / (a3 + a3 + R); + * return b; + * } + * + * static inline float + * _cbrtf (float x) + * { + * float a; + * + * a = _cbrt_5f (x); + * a = _cbrta_halleyf (a, x); + * a = _cbrta_halleyf (a, x); + * return a; + * } + * + * The above scalar version seems to have originated from + * http://metamerist.com/cbrt/cbrt.htm but that's not accessible + * anymore. At present there's a copy in CubeRoot.cpp in the Skia + * sources that's licensed under a BSD-style license. There's some + * discussion on the implementation at + * http://www.voidcn.com/article/p-gpwztojr-wt.html. + * + * Note that Darktable also has an SSE2 version of the same algorithm, + * but uses only a single iteration of Halley's method, which is too + * coarse. + */ +/* Return cube roots of the four single-precision floating point + * components of x. + */ +static inline __m128 +_cbrtf_ps_sse2 (__m128 x) +{ + const __m128i magic = _mm_set1_epi32 (709921077); + + __m128i xi = _mm_castps_si128 (x); + __m128 xi_3 = _mm_div_ps (_mm_cvtepi32_ps (xi), _mm_set1_ps (3.0f)); + __m128i ai = _mm_add_epi32 (_mm_cvtps_epi32 (xi_3), magic); + __m128 a = _mm_castsi128_ps (ai); + + __m128 a3 = _mm_mul_ps (_mm_mul_ps (a, a), a); + __m128 divisor = _mm_add_ps (_mm_add_ps (a3, a3), x); + a = _mm_div_ps (_mm_mul_ps (a, _mm_add_ps (a3, _mm_add_ps (x, x))), divisor); + + a3 = _mm_mul_ps (_mm_mul_ps (a, a), a); + divisor = _mm_add_ps (_mm_add_ps (a3, a3), x); + a = _mm_div_ps (_mm_mul_ps (a, _mm_add_ps (a3, _mm_add_ps (x, x))), divisor); + + return a; +} + +static inline __m128 +lab_r_to_f_sse2 (__m128 r) +{ + const __m128 epsilon = _mm_set1_ps (LAB_EPSILON); + const __m128 kappa = _mm_set1_ps (LAB_KAPPA); + + const __m128 f_big = _cbrtf_ps_sse2 (r); + + const __m128 f_small = _mm_div_ps (_mm_add_ps (_mm_mul_ps (kappa, r), _mm_set1_ps (16.0f)), + _mm_set1_ps (116.0f)); + + const __m128 mask = _mm_cmpgt_ps (r, epsilon); + const __m128 f = _mm_or_ps (_mm_and_ps (mask, f_big), _mm_andnot_ps (mask, f_small)); + return f; +} + +static void +rgbaf_to_Lf_sse2 (const Babl *conversion, const float *src, float *dst, long samples) +{ + const Babl *space = babl_conversion_get_source_space (conversion); + const float m_1_0 = space->space.RGBtoXYZf[3] / D50_WHITE_REF_Y; + const float m_1_1 = space->space.RGBtoXYZf[4] / D50_WHITE_REF_Y; + const float m_1_2 = space->space.RGBtoXYZf[5] / D50_WHITE_REF_Y; + long i = 0; + long remainder; + + if (((uintptr_t) src % 16) + ((uintptr_t) dst % 16) == 0) + { + const long n = (samples / 4) * 4; + const __m128 m_1_0_v = _mm_set1_ps (m_1_0); + const __m128 m_1_1_v = _mm_set1_ps (m_1_1); + const __m128 m_1_2_v = _mm_set1_ps (m_1_2); + + for ( ; i < n; i += 4) + { + __m128 rgba0 = _mm_load_ps (src); + __m128 rgba1 = _mm_load_ps (src + 4); + __m128 rgba2 = _mm_load_ps (src + 8); + __m128 rgba3 = _mm_load_ps (src + 12); + + __m128 r = rgba0; + __m128 g = rgba1; + __m128 b = rgba2; + __m128 a = rgba3; + _MM_TRANSPOSE4_PS (r, g, b, a); + + { + __m128 yr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_1_0_v, r), _mm_mul_ps (m_1_1_v, g)), + _mm_mul_ps (m_1_2_v, b)); + + __m128 fy = lab_r_to_f_sse2 (yr); + + __m128 L = _mm_sub_ps (_mm_mul_ps (_mm_set1_ps (116.0f), fy), _mm_set1_ps (16.0f)); + + _mm_store_ps (dst, L); + } + + src += 16; + dst += 4; + } + } + + remainder = samples - i; + while (remainder--) + { + float r = src[0]; + float g = src[1]; + float b = src[2]; + + float yr = m_1_0 * r + m_1_1 * g + m_1_2 * b; + float L = yr > LAB_EPSILON ? 116.0f * _cbrtf (yr) - 16 : LAB_KAPPA * yr; + + dst[0] = L; + + src += 4; + dst += 1; + } +} + +static void +rgbaf_to_Labaf_sse2 (const Babl *conversion, const float *src, float *dst, long samples) +{ + const Babl *space = babl_conversion_get_source_space (conversion); + const float m_0_0 = space->space.RGBtoXYZf[0] / D50_WHITE_REF_X; + const float m_0_1 = space->space.RGBtoXYZf[1] / D50_WHITE_REF_X; + const float m_0_2 = space->space.RGBtoXYZf[2] / D50_WHITE_REF_X; + const float m_1_0 = space->space.RGBtoXYZf[3] / D50_WHITE_REF_Y; + const float m_1_1 = space->space.RGBtoXYZf[4] / D50_WHITE_REF_Y; + const float m_1_2 = space->space.RGBtoXYZf[5] / D50_WHITE_REF_Y; + const float m_2_0 = space->space.RGBtoXYZf[6] / D50_WHITE_REF_Z; + const float m_2_1 = space->space.RGBtoXYZf[7] / D50_WHITE_REF_Z; + const float m_2_2 = space->space.RGBtoXYZf[8] / D50_WHITE_REF_Z; + long i = 0; + long remainder; + + if (((uintptr_t) src % 16) + ((uintptr_t) dst % 16) == 0) + { + const long n = (samples / 4) * 4; + const __m128 m_0_0_v = _mm_set1_ps (m_0_0); + const __m128 m_0_1_v = _mm_set1_ps (m_0_1); + const __m128 m_0_2_v = _mm_set1_ps (m_0_2); + const __m128 m_1_0_v = _mm_set1_ps (m_1_0); + const __m128 m_1_1_v = _mm_set1_ps (m_1_1); + const __m128 m_1_2_v = _mm_set1_ps (m_1_2); + const __m128 m_2_0_v = _mm_set1_ps (m_2_0); + const __m128 m_2_1_v = _mm_set1_ps (m_2_1); + const __m128 m_2_2_v = _mm_set1_ps (m_2_2); + + for ( ; i < n; i += 4) + { + __m128 Laba0; + __m128 Laba1; + __m128 Laba2; + __m128 Laba3; + + __m128 rgba0 = _mm_load_ps (src); + __m128 rgba1 = _mm_load_ps (src + 4); + __m128 rgba2 = _mm_load_ps (src + 8); + __m128 rgba3 = _mm_load_ps (src + 12); + + __m128 r = rgba0; + __m128 g = rgba1; + __m128 b = rgba2; + __m128 a = rgba3; + _MM_TRANSPOSE4_PS (r, g, b, a); + + { + __m128 xr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_0_0_v, r), _mm_mul_ps (m_0_1_v, g)), + _mm_mul_ps (m_0_2_v, b)); + __m128 yr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_1_0_v, r), _mm_mul_ps (m_1_1_v, g)), + _mm_mul_ps (m_1_2_v, b)); + __m128 zr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_2_0_v, r), _mm_mul_ps (m_2_1_v, g)), + _mm_mul_ps (m_2_2_v, b)); + + __m128 fx = lab_r_to_f_sse2 (xr); + __m128 fy = lab_r_to_f_sse2 (yr); + __m128 fz = lab_r_to_f_sse2 (zr); + + __m128 L = _mm_sub_ps (_mm_mul_ps (_mm_set1_ps (116.0f), fy), _mm_set1_ps (16.0f)); + __m128 A = _mm_mul_ps (_mm_set1_ps (500.0f), _mm_sub_ps (fx, fy)); + __m128 B = _mm_mul_ps (_mm_set1_ps (200.0f), _mm_sub_ps (fy, fz)); + + Laba0 = L; + Laba1 = A; + Laba2 = B; + Laba3 = a; + _MM_TRANSPOSE4_PS (Laba0, Laba1, Laba2, Laba3); + } + + _mm_store_ps (dst, Laba0); + _mm_store_ps (dst + 4, Laba1); + _mm_store_ps (dst + 8, Laba2); + _mm_store_ps (dst + 12, Laba3); + + src += 16; + dst += 16; + } + } + + remainder = samples - i; + while (remainder--) + { + float r = src[0]; + float g = src[1]; + float b = src[2]; + float a = src[3]; + + float xr = m_0_0 * r + m_0_1 * g + m_0_2 * b; + float yr = m_1_0 * r + m_1_1 * g + m_1_2 * b; + float zr = m_2_0 * r + m_2_1 * g + m_2_2 * b; + + float fx = xr > LAB_EPSILON ? _cbrtf (xr) : (LAB_KAPPA * xr + 16.0f) / 116.0f; + float fy = yr > LAB_EPSILON ? _cbrtf (yr) : (LAB_KAPPA * yr + 16.0f) / 116.0f; + float fz = zr > LAB_EPSILON ? _cbrtf (zr) : (LAB_KAPPA * zr + 16.0f) / 116.0f; + + float L = 116.0f * fy - 16.0f; + float A = 500.0f * (fx - fy); + float B = 200.0f * (fy - fz); + + dst[0] = L; + dst[1] = A; + dst[2] = B; + dst[3] = a; + + src += 4; + dst += 4; + } +} + +#endif /* defined(USE_SSE2) */ + static void conversions (void) { @@ -1215,6 +1464,27 @@ NULL ); +#if defined(USE_SSE2) + + if (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE2) + { + babl_conversion_new ( + babl_format ("RGBA float"), + babl_format ("CIE Lab alpha float"), + "linear", rgbaf_to_Labaf_sse2, + NULL + ); + + babl_conversion_new ( + babl_format ("RGBA float"), + babl_format ("CIE L float"), + "linear", rgbaf_to_Lf_sse2, + NULL + ); + } + +#endif /* defined(USE_SSE2) */ + rgbcie_init (); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/extensions/Makefile.am new/babl-0.1.50/extensions/Makefile.am --- old/babl-0.1.46/extensions/Makefile.am 2018-04-07 17:08:01.000000000 +0200 +++ new/babl-0.1.50/extensions/Makefile.am 2018-05-10 14:41:10.000000000 +0200 @@ -71,6 +71,7 @@ LIBS = $(top_builddir)/babl/libbabl-@[email protected] \ $(MATH_LIB) $(THREAD_LIB) +CIE_la_CFLAGS = $(SSE2_EXTRA_CFLAGS) sse2_float_la_CFLAGS = $(SSE2_EXTRA_CFLAGS) sse2_int8_la_CFLAGS = $(SSE2_EXTRA_CFLAGS) sse2_int16_la_CFLAGS = $(SSE2_EXTRA_CFLAGS) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/extensions/two-table.c new/babl-0.1.50/extensions/two-table.c --- old/babl-0.1.46/extensions/two-table.c 2018-04-07 16:38:23.000000000 +0200 +++ new/babl-0.1.50/extensions/two-table.c 2018-04-24 20:14:52.000000000 +0200 @@ -205,17 +205,6 @@ if (littleendian) { - const Babl *f32 = babl_format_new ( - "name", "cairo-ARGB32", - babl_model ("R'aG'aB'aA"), - babl_type ("u8"), - babl_component ("B'a"), - babl_component ("G'a"), - babl_component ("R'a"), - babl_component ("A"), - NULL - ); - const Babl *f24 = babl_format_new ( "name", "cairo-RGB24", babl_model ("R'G'B'"), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/tests/palette-concurrency-stress-test.c new/babl-0.1.50/tests/palette-concurrency-stress-test.c --- old/babl-0.1.46/tests/palette-concurrency-stress-test.c 2017-09-21 21:06:11.000000000 +0200 +++ new/babl-0.1.50/tests/palette-concurrency-stress-test.c 2018-05-19 17:21:32.000000000 +0200 @@ -77,13 +77,13 @@ v = i * BABL_PALETTE_HASH_TABLE_SIZE; - p[0] = (v >> 16) & 0xff; + p[0] = (v >> 0) & 0xff; p[1] = (v >> 8) & 0xff; - p[2] = (v >> 0) & 0xff; + p[2] = (v >> 16) & 0xff; p[3] = 0xff; } - babl_palette_set_palette (pal, babl_format ("RGBA u8"), colors, N_THREADS); + babl_palette_set_palette (pal, babl_format ("R'G'B'A u8"), colors, N_THREADS); /* initialize the thread contexts such that each thread processes a buffer * containing a single, distinct color @@ -92,7 +92,7 @@ { ctx[i] = malloc (sizeof (ThreadContext)); - ctx[i]->fish = babl_fish (babl_format ("RGBA u8"), pal_format); + ctx[i]->fish = babl_fish (babl_format ("R'G'B'A u8"), pal_format); for (j = 0; j < 4 * N_PIXELS; j++) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/babl-0.1.46/tools/babl_fish_path_fitness.c new/babl-0.1.50/tools/babl_fish_path_fitness.c --- old/babl-0.1.46/tools/babl_fish_path_fitness.c 2018-04-08 15:28:44.000000000 +0200 +++ new/babl-0.1.50/tools/babl_fish_path_fitness.c 2018-05-19 13:34:30.000000000 +0200 @@ -91,7 +91,7 @@ static int source_each (Babl *babl, void *userdata) { - printf (SL); + printf ("%s", SL); babl_format_class_for_each (destination_each, babl); #ifdef UTF8 printf ("──%2i %s%s", source_no++, babl->instance.name, NL);
