aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2020-06-11 18:29:01 +0200
committerMattias Andrée <maandree@kth.se>2020-06-11 18:41:35 +0200
commit438208bafad929f34d837e8f23f121d0bbe02e9a (patch)
tree083e9441b6b01c1f29333ce5c4104c5098790808
downloadbinary-multisearch.h-438208bafad929f34d837e8f23f121d0bbe02e9a.tar.gz
binary-multisearch.h-438208bafad929f34d837e8f23f121d0bbe02e9a.tar.bz2
binary-multisearch.h-438208bafad929f34d837e8f23f121d0bbe02e9a.tar.xz
First commit
Signed-off-by: Mattias Andrée <maandree@kth.se>
-rw-r--r--.gitignore4
-rw-r--r--LICENSE15
-rw-r--r--Makefile24
-rw-r--r--README24
-rw-r--r--binary-multisearch.h152
-rw-r--r--config.mk5
-rw-r--r--test.c118
7 files changed, 342 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..dfc3571
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*\#*
+*~
+*.o
+/test
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..38e536c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+© 2020 Mattias Andrée <maandree@kth.se>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..df46d2b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,24 @@
+.POSIX:
+
+CONFIGFILE = config.mk
+include $(CONFIGFILE)
+
+all: test
+
+test: test.c binary-multisearch.h
+ $(CC) -o $@ test.c $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+check: test
+ ./test
+
+install:
+ mkdir -p -- "$(DESTDIR)$(PREFIX)/include"
+ cp -- binary-multisearch.h "$(DESTDIR)$(PREFIX)/include"
+
+uninstall:
+ -rm -f -- "$(DESTDIR)$(PREFIX)/include/binary-multisearch.h"
+
+clean:
+ -rm -f -- *.o test
+
+.PHONY: all check install uninstall clean
diff --git a/README b/README
new file mode 100644
index 0000000..aa692ca
--- /dev/null
+++ b/README
@@ -0,0 +1,24 @@
+Binary multisearch
+
+Description:
+ Given a sorted list of unique items, find their in another
+ list sorted list their position or if missing where they
+ ought to be inserted. Both list must allow random access.
+
+Complexity:
+ n = number of elements in target list
+ m = number of element to locate
+
+ Time complexity: O(S(n) + m)
+ S = time complexity function of underlaying search algorithm,
+ e.g. S(n) = O(log n) in average case for binary search given O(1) comparison.
+
+ Auxiliary space complexity:
+ List return version: O(m)
+ Online version: O(log m)
+
+Notes:
+ In online version, positions for elements are returned in
+ arbitrary (deterministic) order, rather than sequential.
+
+I wrote this algorithm back in Feburary 2013.
diff --git a/binary-multisearch.h b/binary-multisearch.h
new file mode 100644
index 0000000..7d8ee16
--- /dev/null
+++ b/binary-multisearch.h
@@ -0,0 +1,152 @@
+/*
+ * ISC License
+ *
+ * © 2020 Mattias Andrée <maandree@kth.se>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef BINARY_MUTLISEARCH_H
+#define BINARY_MUTLISEARCH_H
+
+#define __STDC_WANT_LIB_EXT1__
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#ifndef RSIZE_MAX
+# define rsize_t size_t
+#endif
+
+
+struct binary_multisearch_stack_entry {
+ rsize_t min;
+ rsize_t max;
+ ssize_t lmin;
+ ssize_t lmax;
+};
+
+
+static ssize_t
+binary_search(const void *sought, const void *list, size_t list_width, ssize_t min,
+ ssize_t max, int (*cmp)(const void *sought, const void *list_elem, void *data), void *data)
+{
+ const char *clist = list;
+ ssize_t mid = min;
+ int r;
+
+ while (min <= max) {
+ mid = (ssize_t)((size_t)min + (size_t)max) / 2;
+ r = cmp(sought, &clist[(size_t)mid * list_width], data);
+ if (r < 0)
+ max = mid - 1;
+ else if (r > 0)
+ min = mid += 1;
+ else {
+ return mid;
+ }
+ }
+
+ return -mid - 1;
+}
+
+
+static void
+binary_multisearch(const void *sought, rsize_t sought_count, size_t sought_width, const void *list, rsize_t list_count,
+ size_t list_width, int (*cmp)(const void *sought_elem, const void *list_elem, void *data), void *data,
+ ssize_t (*search_function)(const void *sought, const void *list, size_t list_width, ssize_t min,
+ ssize_t max, int (*cmp)(const void *, const void *, void *), void *data),
+ struct binary_multisearch_stack_entry stack[restrict], ssize_t r[restrict sought_count])
+{
+ struct binary_multisearch_stack_entry e;
+ size_t stack_i;
+ ssize_t pos;
+ const char *csought = sought;
+
+ if (!sought_count)
+ return;
+
+ stack[0].min = 0;
+ stack[0].max = sought_count - 1;
+ stack[0].lmin = 0;
+ stack[0].lmax = (ssize_t)list_count - 1;
+ stack_i = 1;
+
+ while (stack_i) {
+ e = stack[--stack_i];
+ while (e.max != e.min) {
+ stack[stack_i].max = e.max;
+ stack[stack_i].lmax = e.lmax;
+
+ e.max = (e.min + e.max) / 2;
+ pos = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data);
+ r[e.max] = pos;
+
+ stack[stack_i].min = e.max + 1;
+ stack[stack_i].lmin = pos < 0 ? -pos - 1 : pos + 1;
+ stack_i++;
+ }
+ }
+
+ e.max = sought_count - 1;
+ r[e.max] = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data);
+}
+
+
+static void
+binary_multisearch_online(const void *sought, rsize_t sought_count, size_t sought_width, const void *list, rsize_t list_count,
+ size_t list_width, int (*cmp)(const void *sought_elem, const void *list_elem, void *data), void *data,
+ ssize_t (*search_function)(const void *sought, const void *list, size_t list_width, ssize_t min,
+ ssize_t max, int (*cmp)(const void *, const void *, void *), void *data),
+ struct binary_multisearch_stack_entry stack[restrict],
+ void (*result_function)(size_t index, ssize_t position, void *data))
+{
+ struct binary_multisearch_stack_entry e;
+ size_t stack_i;
+ ssize_t pos;
+ const char *csought = sought;
+
+ if (!sought_count)
+ return;
+
+ stack[0].min = 0;
+ stack[0].max = sought_count - 1;
+ stack[0].lmin = 0;
+ stack[0].lmax = (ssize_t)list_count - 1;
+ stack_i = 1;
+
+ while (stack_i) {
+ e = stack[--stack_i];
+ while (e.max != e.min) {
+ stack[stack_i].max = e.max;
+ stack[stack_i].lmax = e.lmax;
+
+ e.max = (e.min + e.max) / 2;
+ pos = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data);
+ result_function(e.max, pos, data);
+
+ stack[stack_i].min = e.max + 1;
+ stack[stack_i].lmin = pos < 0 ? -pos - 1 : pos + 1;
+ stack_i++;
+ }
+ }
+
+ e.max = sought_count - 1;
+ pos = search_function(&csought[e.max * sought_width], list, list_width, e.lmin, e.lmax, cmp, data);
+ result_function(e.max, pos, data);
+}
+
+
+#ifndef RSIZE_MAX
+# undef rsize_t
+#endif
+
+#endif
diff --git a/config.mk b/config.mk
new file mode 100644
index 0000000..868c124
--- /dev/null
+++ b/config.mk
@@ -0,0 +1,5 @@
+PREFIX = /usr
+
+CC = cc
+
+CFLAGS = -std=c99 -Wall
diff --git a/test.c b/test.c
new file mode 100644
index 0000000..f38bb99
--- /dev/null
+++ b/test.c
@@ -0,0 +1,118 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "binary-multisearch.h"
+
+
+static struct binary_multisearch_stack_entry stack[16];
+static ssize_t result[1 << 16];
+static short int sought[1 << 16];
+static short int list[1 << 16];
+static size_t sought_count;
+static size_t list_count;
+
+
+static int
+cmp(const void *a, const void *b, void *data)
+{
+ const short int *x = a, *y = b;
+ (void) data;
+ return *x < *y ? -1 : *x > *y;
+}
+
+static short int
+random16(short int mask)
+{
+ return (short int)((((rand() & 255) << 8) | (rand() & 255)) & mask);
+}
+
+static void
+fill_random16(short int arr[], size_t n, short int mask)
+{
+ while (n--)
+ arr[n] = random16(mask);
+}
+
+static size_t
+uniq(short int arr[], size_t n)
+{
+ size_t r, w = (n > 0);
+ for (r = 1; r < n; r++)
+ if (arr[r] != arr[r - 1])
+ arr[w++] = arr[r];
+ return w;
+}
+
+static void
+test(size_t sn, size_t ln, short int mask)
+{
+ size_t i, p;
+
+ sought_count = sn;
+ list_count = ln;
+
+ fill_random16(sought, sought_count, mask);
+ fill_random16(list, list_count, mask);
+ qsort_r(sought, sought_count, sizeof(*sought), cmp, NULL);
+ qsort_r(list, list_count, sizeof(*list), cmp, NULL);
+ sought_count = uniq(sought, sought_count);
+
+ for (i = 0; i < sought_count; i++)
+ result[i] = SIZE_MAX / 2;
+
+ binary_multisearch(sought, sought_count, sizeof(*sought), list, list_count,
+ sizeof(*list), cmp, NULL, binary_search, stack, result);
+
+ for (i = 0; i < sought_count; i++) {
+ if (list_count == 0) {
+ if (result[i] != -1)
+ exit(1);
+ continue;
+ }
+ if (result[i] >= (ssize_t)list_count) {
+ exit(2);
+ }
+ if (result[i] >= 0) {
+ if (list[result[i]] != sought[i])
+ exit(3);
+ continue;
+ }
+ p = (size_t)(-result[i] - 1);
+ if (p > list_count) {
+ exit(4);
+ }
+ if (p == list_count) {
+ if (list[p - 1] >= sought[i])
+ exit(5);
+ continue;
+ }
+ if (list[p] <= sought[i])
+ exit(6);
+ if (p && list[p - 1] >= sought[i])
+ exit(7);
+ }
+}
+
+int
+main(void)
+{
+ size_t i, j;
+ short int mask;
+
+ srand((unsigned)time(NULL));
+
+ for (i = 1; i <= 16; i++) {
+ mask = (short int)(((size_t)1 << i) - 1);
+ test(0, 0, mask);
+ test(0, 10, mask);
+ test(10, 0, mask);
+ for (j = 0; j < 100; j++) {
+ test((size_t)(unsigned short int)random16(mask) + 1,
+ (size_t)(unsigned short int)random16(mask) + 1, mask);
+ }
+ }
+
+ return 0;
+}