diff options
author | Mattias Andrée <maandree@kth.se> | 2023-01-31 18:07:32 +0100 |
---|---|---|
committer | Mattias Andrée <maandree@kth.se> | 2023-01-31 18:07:32 +0100 |
commit | 704369867625fc72d1b714bdf839bc06ce0f5d49 (patch) | |
tree | 165ada8df8a6de95777fb9791bdf31775bc4b575 /libglitter_redistribute_energy_double.c | |
parent | Implement libglitter_redistribute_energy_double (diff) | |
download | libglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.gz libglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.bz2 libglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.xz |
Add optimisations for common kernels in libglitter_redistribute_energy_double
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'libglitter_redistribute_energy_double.c')
-rw-r--r-- | libglitter_redistribute_energy_double.c | 55 |
1 files changed, 45 insertions, 10 deletions
diff --git a/libglitter_redistribute_energy_double.c b/libglitter_redistribute_energy_double.c index 9f97ed3..2a12f9b 100644 --- a/libglitter_redistribute_energy_double.c +++ b/libglitter_redistribute_energy_double.c @@ -7,13 +7,33 @@ static void vconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel) { size_t y, x, i; - for (y = 0; y < height; y++) { + + if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) { + raster = &raster[-2 * rowsize]; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + raster[x] += raster[1 * rowsize + x]; + for (x = 0; x < width; x++) { + raster[x] += raster[2 * rowsize + x]; + raster[x] *= kernel[0]; + } + raster = &raster[rowsize]; + } for (x = 0; x < width; x++) raster[x] *= kernel[0]; - for (i = 1; i < kernelsize; i++) - for (x = 0; x < width; x++) - raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]); raster = &raster[rowsize]; + for (x = 0; x < width; x++) + raster[x] *= kernel[0]; + + } else { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + raster[x] *= kernel[0]; + for (i = 1; i < kernelsize; i++) + for (x = 0; x < width; x++) + raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]); + raster = &raster[rowsize]; + } } } @@ -22,13 +42,28 @@ static void hconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel) { size_t y, x, i; - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - raster[x] *= kernel[0]; - for (i = 1; i < kernelsize; i++) - raster[x] = fma(raster[x + i], kernel[i], raster[x]); + + if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + raster[x - 1] += raster[x]; + raster[x - 2] += raster[x]; + raster[x - 2] *= kernel[0]; + } + raster[width - 2] *= kernel[0]; + raster[width - 1] *= kernel[0]; + raster = &raster[rowsize]; + } + + } else { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + raster[x] *= kernel[0]; + for (i = 1; i < kernelsize; i++) + raster[x] = fma(raster[x + i], kernel[i], raster[x]); + } + raster = &raster[rowsize]; } - raster = &raster[rowsize]; } } |