commit 2741eab8fcbe01af3972557e2df6e765a9450093
Author:     Mattias Andrée <[email protected]>
AuthorDate: Mon May 8 22:08:36 2017 +0200
Commit:     Mattias Andrée <[email protected]>
CommitDate: Mon May 8 22:08:36 2017 +0200

    Improve performance and memory usage of blind-transpose
    
    Signed-off-by: Mattias Andrée <[email protected]>

diff --git a/man/blind-transpose.1 b/man/blind-transpose.1
index 61ecd86..73dbfd5 100644
--- a/man/blind-transpose.1
+++ b/man/blind-transpose.1
@@ -12,8 +12,9 @@ To transpose a videos means to swap the
 X and Y coordinates.
 .SH REQUIREMENTS
 .B blind-transpose
-requires enough free memory to load two full frames into
-memory. A frame requires 32 bytes per pixel it contains.
+requires enough free memory to load one full frames and
+one input column into memory. A frame requires 32 bytes
+per pixel it contains.
 .B blind-transpose
 has not been optimised for memory usage, but instead
 for code simplicity.
diff --git a/src/blind-transpose.c b/src/blind-transpose.c
index c2d2d11..5c53a0d 100644
--- a/src/blind-transpose.c
+++ b/src/blind-transpose.c
@@ -9,12 +9,31 @@
 
 USAGE("")
 
+static size_t srcw, srch, srcwps, srchps, ps;
+
+#define PROCESS(TYPE)\
+       do {\
+               size_t x, i, n = ps / sizeof(TYPE);\
+               char *src, *img;\
+               for (x = 0; x < srchps; x += ps) {\
+                       img = row + x;\
+                       src = col + x * srcw;\
+                       for (i = 0; i < n; i++)\
+                               ((TYPE *)img)[i] = ((TYPE *)src)[i];\
+               }\
+       } while (0)
+
+static void process_double(char *row, char *col) {PROCESS(double);}
+static void process_float (char *row, char *col) {PROCESS(float);}
+static void process_char  (char *row, char *col) {PROCESS(char);}
+
 int
 main(int argc, char *argv[])
 {
        struct stream stream;
-       char *buf, *row, *pix, *image, *imag, *img;
-       size_t n, srcw, srch, srcwps, srchps, ps, x, y, i;
+       char *buf, *image;
+       size_t n, y;
+       void (*process)(char *col, char *row);
 
        UNOFLAGS(argc);
 
@@ -27,23 +46,19 @@ main(int argc, char *argv[])
 
        echeck_frame_size(stream.width, stream.height, stream.pixel_size, 0, 
stream.file);
        n = stream.height * stream.width * (ps = stream.pixel_size);
-       buf   = emalloc(n);
-       image = emalloc(n);
-
        srchps = srch * ps;
        srcwps = srcw * ps;
+       buf   = emalloc(n);
+       image = emalloc(srchps);
+
+       process = !(ps % sizeof(double)) ? process_double :
+                 !(ps % sizeof(float))  ? process_float  : process_char;
+
        while (eread_frame(&stream, buf, n)) {
-               for (y = 0; y < srchps; y += ps) {
-                       imag = image + y;
-                       row  = buf + y * srcw;
-                       for (x = 0; x < srcwps; x += ps) {
-                               img = imag + x * srch;
-                               pix = row + x;
-                               for (i = 0; i < ps; i++)
-                                       img[i] = pix[i];
-                       }
+               for (y = 0; y < srcwps; y += ps) {
+                       process(image, buf + y);
+                       ewriteall(STDOUT_FILENO, image, srchps, "<stdout>");
                }
-               ewriteall(STDOUT_FILENO, image, n, "<stdout>");
        }
 
        free(buf);

Reply via email to