LinuxQuestions.org
Register a domain and help support LQ
Go Back   LinuxQuestions.org > Blogs > rainbowsally
User Name
Password

Notices

Rate this Entry

Simple C - mix pixel data, integer math (fixed point), data clamping...

Posted 06-06-2013 at 06:15 PM by rainbowsally

Simple C to mix pixel data, integer math (fixed point), data clamping

Today's Features
  • Simple program to add two pixels clamping each byte to 0xFF, but adding all four bytes in parallel for speed. There are no branches in the algorithm.
  • Another simple program to mix two colors defining a gradient using a fraction from 0 to 1. We compare floating point to fixed point math versions.


Surprisingly, the long version compiles about the same code as the short one with the -O2 switch.

file: src/addcolors.c
purpose: source file
Code:
// addcolors.c

#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <stdlib.h>

/**

  c = a + b.

  Carry if any two hi bits are set => clamp byte to FF

          ahi:bhi

        00 01 11 10
       ------------
chi 0 | 0  0  1  0 |
    1 | 0  1  1  1 | <--- chi & (ahi | bhi)
       ------------
              ^
              `---------- ahi & bhi
 */

void dbg(){} // for a non-moving breakpoint

uint longversion(uint xnum1, uint xnum2);
uint shortversion(uint xnum1, uint xnum2);

int main(int argc, char** argv)
{
  dbg();
  // add routines here
  if(argc != 3)
  {
    printf("\n"
            "Experment with clamped color adding.\n\n"
            "Usage: addcolors AARRGGBB AARRGGBB\n"
            "  Where AARRGGBB are two ARGB format pixels in HEX.\n\n"
          );
    return 0;
  }
  uint xnum1 = 0, xnum2 = 0;
  sscanf(argv[1], "%x", &xnum1);
  sscanf(argv[2], "%x", &xnum2);
  uint d = shortversion(xnum1, xnum2);
  printf("%x\n", d);
  return 0;
}

uint longversion(uint xnum1, uint xnum2)
{
  // save 8bit register hi bits
  uint ahi = xnum1 & 0x80808080;
  uint bhi = xnum2 & 0x80808080;

  // carry if both hi bits are set
  uint cy = ahi & bhi;

  // add low bits
  uint c = (xnum1 & ~ahi) + (xnum2 & ~bhi);

  // save hi bit
  uint chi = c & 0x80808080;

  // carry if any two or more hi bits are set
  cy = cy | (chi & ahi) | (chi & bhi);

  // build the ormask shifting 80808080 down to 01010101 for
  // and then multiply by 255 to set the byte to 255.
  // 01:00:01:00 * 255 = ff00ff00.
  uint ormask = cy >> 7;
  ormask = ormask * 255;

  // restore hi bits in case of no carries
  ormask = ormask | ahi | bhi;

  // restore hi bits or overwrite with carry bits and return
  // the result
  uint d = c | ormask;
  return d;
}

uint shortversion(uint xnum1, uint xnum2)
{
  // save 8bit register hi bits
  uint ahi = xnum1 & 0x80808080;
  uint bhi = xnum2 & 0x80808080;

  // add low bits
  uint c = (xnum1 & ~ahi) + (xnum2 & ~bhi);

  // save hi bits from result
  uint chi = c & 0x80808080;

  // If any two hi bits are set or FF into the result or
  // if only one hi bit was set OR that back in and return
  // the result.
  return ((((ahi & bhi) | (chi & (ahi | bhi))) >> 7) * 255) | ahi | bhi | c;
}

Here we do a 10 bit fixed point math and double float comparison. If you want to do a timed run you'll have to write that yourself.

In my own tests the code including the call and return using the float version it took about 40 nanoseconds to run 'mixcolors f9f9f9f9 85858585 .45' on this computer and the the integer (fixed point) math took less than 100 nanoseconds.

file: src/mixcolors.c
purpose: source file
Code:
// main.cpp

#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <stdlib.h>

/**

  Color mixing per pixel

  c = a * (1 - frac) + (b * frac)

  Carry if c > 255 => clamp to 255

 */

void dbg(){} // for a non-moving breakpoint

uint mixColorsF(uint xnum1, uint xnum2, double frac);
uint mixColorsI(uint xnum1, uint xnum2, double frac);

int main(int argc, char** argv)
{
  dbg();

  if(argc != 4)
  {
    printf( "\n"
            "Experment with clamped color mixing (float and fixed point).\n\n"
            "Usage: mixcolors AARRGGBB AARRGGBB Frac\n"
            "  Where AARRGGBB are two ARGB format pixels and Frac.\n"
            "  is the distance along the gradient from the first to\n"
            "  the second.\n\n"
          );
    return 0;
  }

  uint xnum1 = 0, xnum2 = 0;
  double frac = 0;
  sscanf(argv[1], "%x", &xnum1);
  sscanf(argv[2], "%x", &xnum2);
  sscanf(argv[3], "%lg", &frac);

  printf("%0x\n", mixColorsI(xnum1, xnum2, frac));

  return 0;
}

uint mixColorsF(uint xnum1, uint xnum2, double frac)
{
  uint r1, g1, b1, a1;
  uint r2, g2, b2, a2;
  uint res;
  double frac1, frac2;

  if(frac == 0)
    return xnum1;

  if (frac == 1.0)
    return xnum2;

  frac1 = (1 - frac); //  * 1 << 10;
  frac2 = frac; // * 1 << 10;

  b1 = xnum1 & 0xff;
  g1 = (xnum1 >> 8) & 0xff;
  r1 = (xnum1 >> 16) & 0xff;
  a1 = (xnum1 >> 24) & 0xff;

  b2 = xnum2 & 0xff;
  g2 = (xnum2 >> 8) & 0xff;
  r2 = (xnum2 >> 16) & 0xff;
  a2 = (xnum2 >> 24) & 0xff;

  b1 = (b1 * (frac1)) + (b2 * frac2);
  g1 = (g1 * (frac1)) + (g2 * frac2);
  r1 = (r1 * (frac1)) + (r2 * frac2);
  a1 = (a1 * (frac1)) + (a2 * frac2);

  b1 |= 255 * (b1 > 255);
  g1 |= 255 * (g1 > 255);
  r1 |= 255 * (r1 > 255);
  a1 |= 255 * (a1 > 255);

  res = (b1 & 0xff) | (g1 & 0xff) << 8 | (r1 & 0xff) << 16 | (a1 & 0xff) << 24;
  return res;
}

#define FIXPT 10

uint mixColorsI(uint xnum1, uint xnum2, double frac)
{
  uint r1, g1, b1, a1;
  uint r2, g2, b2, a2;
  uint res;
  uint frac1, frac2;


  if(frac == 0)
    return xnum1;

  if (frac == 1.0)
    return xnum2;

  frac1 = (1 - frac) * (1 << FIXPT);
  frac2 = frac * (1 << FIXPT);

  b1 = xnum1 & 0xff;
  g1 = (xnum1 >> 8) & 0xff;
  r1 = (xnum1 >> 16) & 0xff;
  a1 = (xnum1 >> 24) & 0xff;

  b2 = xnum2 & 0xff;
  g2 = (xnum2 >> 8) & 0xff;
  r2 = (xnum2 >> 16) & 0xff;
  a2 = (xnum2 >> 24) & 0xff;

  b1 = ((b1 * (frac1)) + (b2 * frac2)) >> FIXPT;
  g1 = ((g1 * (frac1)) + (g2 * frac2)) >> FIXPT;
  r1 = ((r1 * (frac1)) + (r2 * frac2)) >> FIXPT;
  a1 = ((a1 * (frac1)) + (a2 * frac2)) >> FIXPT;

  b1 |= 255 * (b1 > 255);
  g1 |= 255 * (g1 > 255);
  r1 |= 255 * (r1 > 255);
  a1 |= 255 * (a1 > 255);

  res = (b1 & 0xff) | (g1 & 0xff) << 8 | (r1 & 0xff) << 16 | (a1 & 0xff) << 24;
  return res;
}
The clamping code at the bottom of the routines can be stripped out with very little chance of ever being needed. It's mostly to catch floating point errors that will never occur, even in the double float code.

BTW, the clamping code here is VERY fast, when optimized and similar code can be used to clamp audio data, also with no branching.

Here's some code just for clamping between 0 and 225. The macro version is VERY fast. See the notes in the code. This could be used to clamp 8 bit audio data biased to 128 and it could be modified by changing the masks to FFFF for 16 bit audio data which, I believe, is already biased but you'd have to look into that.

file: src/clamp-255.c
purpose: source file
Code:
// main.c -- skeleton created by new.main

#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <stdlib.h>

void dbg(){} // for a non-moving breakpoint


int clamp(int n, int lower, int upper);
int clamp255long_version(int n);
int clamp255short_version(int n);


/** Here's the disassembly for i586 minus the stack frame code

There is no multiplication or division, no branching, one bus memory access.
The rest are all register ops.

clamp255short_version:
^8048525: 8b 4d 08              mov    0x4(%esp),%ecx
v8048521: 31 d2                 xor    %edx,%edx
 8048529: 89 c8                 mov    %ecx,%eax
 804852b: f7 d0                 not    %eax
 804852d: c1 f8 1f              sar    $0x1f,%eax
 8048530: 25 ff 00 00 00        and    $0xff,%eax
 8048535: 81 f9 ff 00 00 00     cmp    $0xff,%ecx
 804853b: 0f 97 c2              seta   %dl
 804853e: f7 da                 neg    %edx
 8048540: 09 ca                 or     %ecx,%edx
 8048542: 21 d0                 and    %edx,%eax

  So here it is as a macro.

*** This macro runs in a loop in about 5 nanoseconds on my dual core 3GHz machine.
*/

#define clamp255macro_version(n) \
  ((n | ((-((n > 255) | (n < 0))))) & ((-(!(n < 0)))) & 255)


int main(int argc, char** argv)
{
  dbg();
  // int n, lower, upper;
  int n, res=0, i, j;
  sscanf(argv[1], "%d", &n);
#if 0 // for timed test
  for(i = 0; i < 1000000; i++)
  {
    for(j = 0; j < 1000; j++)
      res += clamp255macro_version(n);
  }
#endif
  res = clamp255short_version(n);
  printf("%d\n", res);
  return 0;
}

/**

IF N > 255 RES = 255    // CLAMP = X OR_FF AND_FF     // set bits to FF
IF N < 0   RES = 0      // CLAMP = X AND 0            // set bits to 00
ELSE       RES = N      // CLAMP = N                  // no change

Proceding from OR to AND with '*' meaning it must be done, '-' meaning it must NOT be done and 'X' meaning 'has no effect' in the range involved.

         OR0      ORFF     ANDFF      AND0
N > 255   X        *         *          -             // = OR FF if true
N < 0     X        X         X          *             // = AND 0 if true
N <= 255  X        -         X          -             // = no change if true
N >= 0    X        -         *          -             // = (AND FF always)

The special cases are ORFF which is only done if N>255 and AND0 which is
done for N<0.

As we can see ANDFF can always be done harmlessly.  In the short version and the macro we'll AND FF at the end of the logic regardless of the conditionals.

*/


int clamp255long_version(int n)
{
  int orff, and0, res;

  orff = (n > 255);           // set bits = ff if true
  orff = (-orff) & 0xFF;      // else 0

  and0 = !(n < 0);            // set bits 0 if true
  and0 = (-and0) & 0xFF;      // else ff

  res = (n | orff);           // ORFF first
  res = (res & and0);         // AND0 last
  return res;
}

int clamp255short_version(int n)
{
  // refactored to do the final ANDFF at the end
  return (n | ((-((n > 255) | (n < 0))))) & ((-(!(n < 0)))) & 255;
}
And here's an mc2.def file for generating the makefiles if you have mc2, or you can write your own if you know how.

file: mc2.def
purpose: source file
Code:
# mc2.def template created with Makefile Creator 'mc2'

CC = gcc

## sandbox path and other new variables

# output name override
OUTNAME = MULTI

SRCDIR = src
OBJDIR = o
BINDIR = .

# compile function overrides
COMPILE = $(CC) -m32 -c -o # COMPILE <output_file> ...
 CFLAGS = -Wall -g3 # debug
#CFLAGS = -Wall -O2 # optimized
INCLUDE = -I $(SRCDIR) -I$(PREFIX)/include -I /usr/include

# link function overrides
LINK = $(CC) -m32 -o # LINK <output_file> ...
LDFLAGS =
LIB = -L/usr/lib -L$(PREFIX)/lib

# additional targets
#mc2-update:
#  @mc2 -update

semiclean: $(EXT_SEMICLEAN)
  @rm -f $(OBJ)
  @rm -f *~ */*~ */*/*~ */*/*/*~

strip:
  @strip $(MAIN)
  @make semiclean

clean: $(EXT_CLEAN)
  @rm -f $(MAIN)
  @rm -f $(OBJ)
  @rm -f *.kdevelop.pcs *.kdevses
  @rm -f *~ */*~ */*/*~ */*/*/*~ tmp.mak

# Note: If you install into PREFIX, make sure to include and link
# against your working copy so you don't accidentally get the
# installed copy's libs and headers instead of the ones you are
# working on.

force: # used to force execution
:-)
Posted in Uncategorized
Views 485 Comments 0
« Prev     Main     Next »
Total Comments 0

Comments

 

  



All times are GMT -5. The time now is 03:51 AM.

Main Menu
Advertisement

My LQ
Write for LQ
LinuxQuestions.org is looking for people interested in writing Editorials, Articles, Reviews, and more. If you'd like to contribute content, let us know.
Main Menu
Syndicate
RSS1  Latest Threads
RSS1  LQ News
Twitter: @linuxquestions
identi.ca: @linuxquestions
Facebook: linuxquestions Google+: linuxquestions
Open Source Consulting | Domain Registration