From 704369867625fc72d1b714bdf839bc06ce0f5d49 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Tue, 31 Jan 2023 18:07:32 +0100 Subject: Add optimisations for common kernels in libglitter_redistribute_energy_double MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- libglitter.h | 16 +++++----- libglitter_redistribute_energy_double.c | 55 +++++++++++++++++++++++++++------ 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/libglitter.h b/libglitter.h index b7d409a..fe83073 100644 --- a/libglitter.h +++ b/libglitter.h @@ -241,14 +241,14 @@ void libglitter_update_render_context(LIBGLITTER_RENDER_CONTEXT *, size_t); * `(hkernelsize - 1) / 2` cells to the left and to * the right, and `(vkernelsize - 1) / 2` cells to * the up as well as down. The caller is responsible - * for furthering extending the image by `widthmul` - * - (hkernelsize - 1) / 2 % widthmul` cells both to - * on the left and on the right, and by `heightmul` - * - (vkernelsize - 1) / 2 % heightmul` cells both - * up and down (where `widthmul` and `heightmul` are - * arguments to the `libglitter_create_render_context` - * function); so that the raster can be input to - * `libglitter_compose_double`. + * for furthering extending the image by `(widthmul` + * - (hkernelsize - 1) / 2 % widthmul) % widthmul` + * cells both to on the left and on the right, and + * by `(heightmul` - (vkernelsize - 1) / 2 % heightmul) + * % heightmul` cells both up and down (where + * `widthmul` and `heightmul` are arguments to the + * `libglitter_create_render_context` function); so that + * the raster can be input to `libglitter_compose_double`. * * @param raster The subpixel raster. The must be padded with * zero-initialised cells on the left side and diff --git a/libglitter_redistribute_energy_double.c b/libglitter_redistribute_energy_double.c index 9f97ed3..2a12f9b 100644 --- a/libglitter_redistribute_energy_double.c +++ b/libglitter_redistribute_energy_double.c @@ -7,13 +7,33 @@ static void vconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel) { size_t y, x, i; - for (y = 0; y < height; y++) { + + if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) { + raster = &raster[-2 * rowsize]; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + raster[x] += raster[1 * rowsize + x]; + for (x = 0; x < width; x++) { + raster[x] += raster[2 * rowsize + x]; + raster[x] *= kernel[0]; + } + raster = &raster[rowsize]; + } for (x = 0; x < width; x++) raster[x] *= kernel[0]; - for (i = 1; i < kernelsize; i++) - for (x = 0; x < width; x++) - raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]); raster = &raster[rowsize]; + for (x = 0; x < width; x++) + raster[x] *= kernel[0]; + + } else { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + raster[x] *= kernel[0]; + for (i = 1; i < kernelsize; i++) + for (x = 0; x < width; x++) + raster[x] = fma(raster[i * rowsize + x], kernel[i], raster[x]); + raster = &raster[rowsize]; + } } } @@ -22,13 +42,28 @@ static void hconvolute(double *restrict raster, size_t rowsize, size_t width, size_t height, size_t kernelsize, const double *kernel) { size_t y, x, i; - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - raster[x] *= kernel[0]; - for (i = 1; i < kernelsize; i++) - raster[x] = fma(raster[x + i], kernel[i], raster[x]); + + if (kernelsize == 3 && kernel[0] == kernel[1] && kernel[1] == kernel[2]) { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + raster[x - 1] += raster[x]; + raster[x - 2] += raster[x]; + raster[x - 2] *= kernel[0]; + } + raster[width - 2] *= kernel[0]; + raster[width - 1] *= kernel[0]; + raster = &raster[rowsize]; + } + + } else { + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + raster[x] *= kernel[0]; + for (i = 1; i < kernelsize; i++) + raster[x] = fma(raster[x + i], kernel[i], raster[x]); + } + raster = &raster[rowsize]; } - raster = &raster[rowsize]; } } -- cgit v1.2.3-70-g09d2