From 438208bafad929f34d837e8f23f121d0bbe02e9a Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Thu, 11 Jun 2020 18:29:01 +0200 Subject: First commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- .gitignore | 4 ++ LICENSE | 15 +++++ Makefile | 24 ++++++++ README | 24 ++++++++ binary-multisearch.h | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++ config.mk | 5 ++ test.c | 118 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 342 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README create mode 100644 binary-multisearch.h create mode 100644 config.mk create mode 100644 test.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dfc3571 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*\#* +*~ +*.o +/test diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..38e536c --- /dev/null +++ b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +© 2020 Mattias Andrée + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..df46d2b --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +.POSIX: + +CONFIGFILE = config.mk +include $(CONFIGFILE) + +all: test + +test: test.c binary-multisearch.h + $(CC) -o $@ test.c $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) + +check: test + ./test + +install: + mkdir -p -- "$(DESTDIR)$(PREFIX)/include" + cp -- binary-multisearch.h "$(DESTDIR)$(PREFIX)/include" + +uninstall: + -rm -f -- "$(DESTDIR)$(PREFIX)/include/binary-multisearch.h" + +clean: + -rm -f -- *.o test + +.PHONY: all check install uninstall clean diff --git a/README b/README new file mode 100644 index 0000000..aa692ca --- /dev/null +++ b/README @@ -0,0 +1,24 @@ +Binary multisearch + +Description: + Given a sorted list of unique items, find their in another + list sorted list their position or if missing where they + ought to be inserted. Both list must allow random access. + +Complexity: + n = number of elements in target list + m = number of element to locate + + Time complexity: O(S(n) + m) + S = time complexity function of underlaying search algorithm, + e.g. S(n) = O(log n) in average case for binary search given O(1) comparison. + + Auxiliary space complexity: + List return version: O(m) + Online version: O(log m) + +Notes: + In online version, positions for elements are returned in + arbitrary (deterministic) order, rather than sequential. + +I wrote this algorithm back in Feburary 2013. diff --git a/binary-multisearch.h b/binary-multisearch.h new file mode 100644 index 0000000..7d8ee16 --- /dev/null +++ b/binary-multisearch.h @@ -0,0 +1,152 @@ +/* + * ISC License + * + * © 2020 Mattias Andrée + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef BINARY_MUTLISEARCH_H +#define BINARY_MUTLISEARCH_H + +#define __STDC_WANT_LIB_EXT1__ +#include +#include +#include +#ifndef RSIZE_MAX +# define rsize_t size_t +#endif + + +struct binary_multisearch_stack_entry { + rsize_t min; + rsize_t max; + ssize_t lmin; + ssize_t lmax; +}; + + +static ssize_t +binary_search(const void *sought, const void *list, size_t list_width, ssize_t min, + ssize_t max, int (*cmp)(const void *sought, const void *list_elem, void *data), void *data) +{ + const char *clist = list; + ssize_t mid = min; + int r; + + while (min <= max) { + mid = (ssize_t)((size_t)min + (size_t)max) / 2; + r = cmp(sought, &clist[(size_t)mid * list_width], data); + if (r < 0) + max = mid - 1; + else if (r > 0) + min = mid += 1; + else { + return mid; + } + } + + return -mid - 1; +} + + +static void +binary_multisearch(const void *sought, rsize_t sought_count, size_t sought_width, const void *list, rsize_t list_count, + size_t list_width, int (*cmp)(const void *sought_elem, const void *list_elem, void *data), void *data, + ssize_t (*search_function)(const void *sought, const void *list, size_t list_width, ssize_t min, + ssize_t max, int (*cmp)(const void *, const void *, void *), void *data), + struct binary_multisearch_stack_entry stack[restrict], ssize_t r[restrict sought_count]) +{ + struct binary_multisearch_stack_entry e; + size_t stack_i; + ssize_t pos; + const char *csought = sought; + + if (!sought_count) + return; + + stack[0].min = 0; + stack[0].max = sought_count - 1; + stack[0].lmin = 0; + stack[0].lmax = (ssize_t)list_count - 1; + stack_i = 1; + + while (stack_i) { + e = stack[--stack_i]; + while (e.max != e.min) { + stack[stack_i].max = e.max; + stack[stack_i].lmax = e.lmax; + + e.max = (e.min + e.max) / 2; + pos = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data); + r[e.max] = pos; + + stack[stack_i].min = e.max + 1; + stack[stack_i].lmin = pos < 0 ? -pos - 1 : pos + 1; + stack_i++; + } + } + + e.max = sought_count - 1; + r[e.max] = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data); +} + + +static void +binary_multisearch_online(const void *sought, rsize_t sought_count, size_t sought_width, const void *list, rsize_t list_count, + size_t list_width, int (*cmp)(const void *sought_elem, const void *list_elem, void *data), void *data, + ssize_t (*search_function)(const void *sought, const void *list, size_t list_width, ssize_t min, + ssize_t max, int (*cmp)(const void *, const void *, void *), void *data), + struct binary_multisearch_stack_entry stack[restrict], + void (*result_function)(size_t index, ssize_t position, void *data)) +{ + struct binary_multisearch_stack_entry e; + size_t stack_i; + ssize_t pos; + const char *csought = sought; + + if (!sought_count) + return; + + stack[0].min = 0; + stack[0].max = sought_count - 1; + stack[0].lmin = 0; + stack[0].lmax = (ssize_t)list_count - 1; + stack_i = 1; + + while (stack_i) { + e = stack[--stack_i]; + while (e.max != e.min) { + stack[stack_i].max = e.max; + stack[stack_i].lmax = e.lmax; + + e.max = (e.min + e.max) / 2; + pos = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data); + result_function(e.max, pos, data); + + stack[stack_i].min = e.max + 1; + stack[stack_i].lmin = pos < 0 ? -pos - 1 : pos + 1; + stack_i++; + } + } + + e.max = sought_count - 1; + pos = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data); + result_function(e.max, pos, data); +} + + +#ifndef RSIZE_MAX +# undef rsize_t +#endif + +#endif diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..868c124 --- /dev/null +++ b/config.mk @@ -0,0 +1,5 @@ +PREFIX = /usr + +CC = cc + +CFLAGS = -std=c99 -Wall diff --git a/test.c b/test.c new file mode 100644 index 0000000..f38bb99 --- /dev/null +++ b/test.c @@ -0,0 +1,118 @@ +#define _GNU_SOURCE +#include +#include +#include + +#include "binary-multisearch.h" + + +static struct binary_multisearch_stack_entry stack[16]; +static ssize_t result[1 << 16]; +static short int sought[1 << 16]; +static short int list[1 << 16]; +static size_t sought_count; +static size_t list_count; + + +static int +cmp(const void *a, const void *b, void *data) +{ + const short int *x = a, *y = b; + (void) data; + return *x < *y ? -1 : *x > *y; +} + +static short int +random16(short int mask) +{ + return (short int)((((rand() & 255) << 8) | (rand() & 255)) & mask); +} + +static void +fill_random16(short int arr[], size_t n, short int mask) +{ + while (n--) + arr[n] = random16(mask); +} + +static size_t +uniq(short int arr[], size_t n) +{ + size_t r, w = (n > 0); + for (r = 1; r < n; r++) + if (arr[r] != arr[r - 1]) + arr[w++] = arr[r]; + return w; +} + +static void +test(size_t sn, size_t ln, short int mask) +{ + size_t i, p; + + sought_count = sn; + list_count = ln; + + fill_random16(sought, sought_count, mask); + fill_random16(list, list_count, mask); + qsort_r(sought, sought_count, sizeof(*sought), cmp, NULL); + qsort_r(list, list_count, sizeof(*list), cmp, NULL); + sought_count = uniq(sought, sought_count); + + for (i = 0; i < sought_count; i++) + result[i] = SIZE_MAX / 2; + + binary_multisearch(sought, sought_count, sizeof(*sought), list, list_count, + sizeof(*list), cmp, NULL, binary_search, stack, result); + + for (i = 0; i < sought_count; i++) { + if (list_count == 0) { + if (result[i] != -1) + exit(1); + continue; + } + if (result[i] >= (ssize_t)list_count) { + exit(2); + } + if (result[i] >= 0) { + if (list[result[i]] != sought[i]) + exit(3); + continue; + } + p = (size_t)(-result[i] - 1); + if (p > list_count) { + exit(4); + } + if (p == list_count) { + if (list[p - 1] >= sought[i]) + exit(5); + continue; + } + if (list[p] <= sought[i]) + exit(6); + if (p && list[p - 1] >= sought[i]) + exit(7); + } +} + +int +main(void) +{ + size_t i, j; + short int mask; + + srand((unsigned)time(NULL)); + + for (i = 1; i <= 16; i++) { + mask = (short int)(((size_t)1 << i) - 1); + test(0, 0, mask); + test(0, 10, mask); + test(10, 0, mask); + for (j = 0; j < 100; j++) { + test((size_t)(unsigned short int)random16(mask) + 1, + (size_t)(unsigned short int)random16(mask) + 1, mask); + } + } + + return 0; +} -- cgit v1.2.3-70-g09d2