aboutsummaryrefslogtreecommitdiffstats
path: root/libglitter_redistribute_energy_double.c
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2023-01-31 18:07:32 +0100
committerMattias Andrée <maandree@kth.se>2023-01-31 18:07:32 +0100
commit704369867625fc72d1b714bdf839bc06ce0f5d49 (patch)
tree165ada8df8a6de95777fb9791bdf31775bc4b575 /libglitter_redistribute_energy_double.c
parentImplement libglitter_redistribute_energy_double (diff)
downloadlibglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.gz
libglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.bz2
libglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.xz
Add optimisations for common kernels in libglitter_redistribute_energy_double
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'libglitter_redistribute_energy_double.c')
-rw-r--r--libglitter_redistribute_energy_double.c55
1 files changed, 45 insertions, 10 deletions
diff --git a/libglitter_redistribute_energy_double.c b/libglitter_redistribute_energy_double.c
index 9f97ed3..2a12f9b 100644
--- a/libglitter_redistribute_energy_double.c
+++ b/libglitter_redistribute_energy_double.c
@@ -7,13 +7,33 @@ static void
vconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel)
{
size_t y, x, i;
- for (y = 0; y < height; y++) {
+
+ if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) {
+ raster = &raster[-2 * rowsize];
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ raster[x] += raster[1 * rowsize + x];
+ for (x = 0; x < width; x++) {
+ raster[x] += raster[2 * rowsize + x];
+ raster[x] *= kernel[0];
+ }
+ raster = &raster[rowsize];
+ }
for (x = 0; x < width; x++)
raster[x] *= kernel[0];
- for (i = 1; i < kernelsize; i++)
- for (x = 0; x < width; x++)
- raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]);
raster = &raster[rowsize];
+ for (x = 0; x < width; x++)
+ raster[x] *= kernel[0];
+
+ } else {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ raster[x] *= kernel[0];
+ for (i = 1; i < kernelsize; i++)
+ for (x = 0; x < width; x++)
+ raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]);
+ raster = &raster[rowsize];
+ }
}
}
@@ -22,13 +42,28 @@ static void
hconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel)
{
size_t y, x, i;
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- raster[x] *= kernel[0];
- for (i = 1; i < kernelsize; i++)
- raster[x] = fma(raster[x + i], kernel[i], raster[x]);
+
+ if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ raster[x - 1] += raster[x];
+ raster[x - 2] += raster[x];
+ raster[x - 2] *= kernel[0];
+ }
+ raster[width - 2] *= kernel[0];
+ raster[width - 1] *= kernel[0];
+ raster = &raster[rowsize];
+ }
+
+ } else {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ raster[x] *= kernel[0];
+ for (i = 1; i < kernelsize; i++)
+ raster[x] = fma(raster[x + i], kernel[i], raster[x]);
+ }
+ raster = &raster[rowsize];
}
- raster = &raster[rowsize];
}
}