From 704369867625fc72d1b714bdf839bc06ce0f5d49 Mon Sep 17 00:00:00 2001
From: Mattias Andrée <maandree@kth.se>
Date: Tue, 31 Jan 2023 18:07:32 +0100
Subject: Add optimisations for common kernels in
 libglitter_redistribute_energy_double
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Mattias Andrée <maandree@kth.se>
---
 libglitter.h                            | 16 +++++-----
 libglitter_redistribute_energy_double.c | 55 +++++++++++++++++++++++++++------
 2 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/libglitter.h b/libglitter.h
index b7d409a..fe83073 100644
--- a/libglitter.h
+++ b/libglitter.h
@@ -241,14 +241,14 @@ void libglitter_update_render_context(LIBGLITTER_RENDER_CONTEXT *, size_t);
  * `(hkernelsize - 1) / 2` cells to the left and to
  * the right, and `(vkernelsize - 1) / 2` cells to
  * the up as well as down. The caller is responsible
- * for furthering extending the image by `widthmul`
- * - (hkernelsize - 1) / 2 % widthmul` cells both to
- * on the left and on the right, and by `heightmul`
- * - (vkernelsize - 1) / 2 % heightmul` cells both
- * up and down (where `widthmul` and `heightmul` are
- * arguments to the `libglitter_create_render_context`
- * function); so that the raster can be input to
- * `libglitter_compose_double`.
+ * for furthering extending the image by `(widthmul`
+ * - (hkernelsize - 1) / 2 % widthmul) % widthmul`
+ * cells both to on the left and on the right, and
+ * by `(heightmul` - (vkernelsize - 1) / 2 % heightmul)
+ * % heightmul` cells both up and down (where
+ * `widthmul` and `heightmul` are arguments to the
+ * `libglitter_create_render_context` function); so that
+ * the raster can be input to `libglitter_compose_double`.
  * 
  * @param  raster          The subpixel raster. The must be padded with
  *                         zero-initialised cells on the left side and
diff --git a/libglitter_redistribute_energy_double.c b/libglitter_redistribute_energy_double.c
index 9f97ed3..2a12f9b 100644
--- a/libglitter_redistribute_energy_double.c
+++ b/libglitter_redistribute_energy_double.c
@@ -7,13 +7,33 @@ static void
 vconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel)
 {
 	size_t y, x, i;
-	for (y = 0; y < height; y++) {
+
+	if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) {
+		raster = &raster[-2 * rowsize];
+		for (y = 0; y < height; y++) {
+			for (x = 0; x < width; x++)
+				raster[x] += raster[1 * rowsize + x];
+			for (x = 0; x < width; x++) {
+				raster[x] += raster[2 * rowsize + x];
+				raster[x] *= kernel[0];
+			}
+			raster = &raster[rowsize];
+		}
 		for (x = 0; x < width; x++)
 			raster[x] *= kernel[0];
-		for (i = 1; i < kernelsize; i++)
-			for (x = 0; x < width; x++)
-				raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]);
 		raster = &raster[rowsize];
+		for (x = 0; x < width; x++)
+			raster[x] *= kernel[0];
+
+	} else {
+		for (y = 0; y < height; y++) {
+			for (x = 0; x < width; x++)
+				raster[x] *= kernel[0];
+			for (i = 1; i < kernelsize; i++)
+				for (x = 0; x < width; x++)
+					raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]);
+			raster = &raster[rowsize];
+		}
 	}
 }
 
@@ -22,13 +42,28 @@ static void
 hconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel)
 {
 	size_t y, x, i;
-	for (y = 0; y < height; y++) {
-		for (x = 0; x < width; x++) {
-			raster[x] *= kernel[0];
-			for (i = 1; i < kernelsize; i++)
-				raster[x] = fma(raster[x + i], kernel[i], raster[x]);
+
+	if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) {
+		for (y = 0; y < height; y++) {
+			for (x = 0; x < width; x++) {
+				raster[x - 1] += raster[x];
+				raster[x - 2] += raster[x];
+				raster[x - 2] *= kernel[0];
+			}
+			raster[width - 2] *= kernel[0];
+			raster[width - 1] *= kernel[0];
+			raster = &raster[rowsize];
+		}
+
+	} else {
+		for (y = 0; y < height; y++) {
+			for (x = 0; x < width; x++) {
+				raster[x] *= kernel[0];
+				for (i = 1; i < kernelsize; i++)
+					raster[x] = fma(raster[x + i], kernel[i], raster[x]);
+			}
+			raster = &raster[rowsize];
 		}
-		raster = &raster[rowsize];
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2