aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2023-01-31 18:07:32 +0100
committerMattias Andrée <maandree@kth.se>2023-01-31 18:07:32 +0100
commit704369867625fc72d1b714bdf839bc06ce0f5d49 (patch)
tree165ada8df8a6de95777fb9791bdf31775bc4b575
parentImplement libglitter_redistribute_energy_double (diff)
downloadlibglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.gz
libglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.bz2
libglitter-704369867625fc72d1b714bdf839bc06ce0f5d49.tar.xz
Add optimisations for common kernels in libglitter_redistribute_energy_double
Signed-off-by: Mattias Andrée <maandree@kth.se>
-rw-r--r--libglitter.h16
-rw-r--r--libglitter_redistribute_energy_double.c55
2 files changed, 53 insertions, 18 deletions
diff --git a/libglitter.h b/libglitter.h
index b7d409a..fe83073 100644
--- a/libglitter.h
+++ b/libglitter.h
@@ -241,14 +241,14 @@ void libglitter_update_render_context(LIBGLITTER_RENDER_CONTEXT *, size_t);
* `(hkernelsize - 1) / 2` cells to the left and to
* the right, and `(vkernelsize - 1) / 2` cells to
* the up as well as down. The caller is responsible
- * for furthering extending the image by `widthmul`
- * - (hkernelsize - 1) / 2 % widthmul` cells both to
- * on the left and on the right, and by `heightmul`
- * - (vkernelsize - 1) / 2 % heightmul` cells both
- * up and down (where `widthmul` and `heightmul` are
- * arguments to the `libglitter_create_render_context`
- * function); so that the raster can be input to
- * `libglitter_compose_double`.
+ * for furthering extending the image by `(widthmul`
+ * - (hkernelsize - 1) / 2 % widthmul) % widthmul`
+ * cells both to on the left and on the right, and
+ * by `(heightmul` - (vkernelsize - 1) / 2 % heightmul)
+ * % heightmul` cells both up and down (where
+ * `widthmul` and `heightmul` are arguments to the
+ * `libglitter_create_render_context` function); so that
+ * the raster can be input to `libglitter_compose_double`.
*
* @param raster The subpixel raster. The must be padded with
* zero-initialised cells on the left side and
diff --git a/libglitter_redistribute_energy_double.c b/libglitter_redistribute_energy_double.c
index 9f97ed3..2a12f9b 100644
--- a/libglitter_redistribute_energy_double.c
+++ b/libglitter_redistribute_energy_double.c
@@ -7,13 +7,33 @@ static void
vconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel)
{
size_t y, x, i;
- for (y = 0; y < height; y++) {
+
+ if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) {
+ raster = &raster[-2 * rowsize];
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ raster[x] += raster[1 * rowsize + x];
+ for (x = 0; x < width; x++) {
+ raster[x] += raster[2 * rowsize + x];
+ raster[x] *= kernel[0];
+ }
+ raster = &raster[rowsize];
+ }
for (x = 0; x < width; x++)
raster[x] *= kernel[0];
- for (i = 1; i < kernelsize; i++)
- for (x = 0; x < width; x++)
- raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]);
raster = &raster[rowsize];
+ for (x = 0; x < width; x++)
+ raster[x] *= kernel[0];
+
+ } else {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ raster[x] *= kernel[0];
+ for (i = 1; i < kernelsize; i++)
+ for (x = 0; x < width; x++)
+ raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]);
+ raster = &raster[rowsize];
+ }
}
}
@@ -22,13 +42,28 @@ static void
hconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel)
{
size_t y, x, i;
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- raster[x] *= kernel[0];
- for (i = 1; i < kernelsize; i++)
- raster[x] = fma(raster[x + i], kernel[i], raster[x]);
+
+ if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ raster[x - 1] += raster[x];
+ raster[x - 2] += raster[x];
+ raster[x - 2] *= kernel[0];
+ }
+ raster[width - 2] *= kernel[0];
+ raster[width - 1] *= kernel[0];
+ raster = &raster[rowsize];
+ }
+
+ } else {
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ raster[x] *= kernel[0];
+ for (i = 1; i < kernelsize; i++)
+ raster[x] = fma(raster[x + i], kernel[i], raster[x]);
+ }
+ raster = &raster[rowsize];
}
- raster = &raster[rowsize];
}
}