diff options
| author | Mattias Andrée <m@maandree.se> | 2026-06-04 19:05:04 +0200 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-06-04 19:05:04 +0200 |
| commit | 0a4371efcadf68f49a763964226eb8e8bd4d44ec (patch) | |
| tree | 3e2e2fd5b404df7bdc53877d0bee5bae61a2b0c2 | |
| download | blkseekr-0a4371efcadf68f49a763964226eb8e8bd4d44ec.tar.gz blkseekr-0a4371efcadf68f49a763964226eb8e8bd4d44ec.tar.bz2 blkseekr-0a4371efcadf68f49a763964226eb8e8bd4d44ec.tar.xz | |
First commit
Signed-off-by: Mattias Andrée <m@maandree.se>
| -rw-r--r-- | .gitignore | 15 | ||||
| -rw-r--r-- | LICENSE | 15 | ||||
| -rw-r--r-- | Makefile | 37 | ||||
| -rw-r--r-- | blkseekr.c | 297 | ||||
| -rw-r--r-- | config.mk | 8 |
5 files changed, 372 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..031be57 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +*\#* +*~ +*.o +*.a +*.lo +*.su +*.so +*.so.* +*.dll +*.dylib +*.gch +*.gcov +*.gcno +*.gcda +/blkseekr @@ -0,0 +1,15 @@ +ISC License + +© 2025, 2026 Mattias Andrée <m@maandree.se> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..99af225 --- /dev/null +++ b/Makefile @@ -0,0 +1,37 @@ +.POSIX: + +CONFIGFILE = config.mk +include $(CONFIGFILE) + +OBJ =\ + blkseekr.o + +HDR = + +all: blkseekr +$(OBJ): $(HDR) + +.c.o: + $(CC) -c -o $@ $< $(CFLAGS) $(CPPFLAGS) + +blkseekr: $(OBJ) + $(CC) -o $@ $(OBJ) $(LDFLAGS) + +install: blkseekr + mkdir -p -- "$(DESTDIR)$(PREFIX)/bin" + mkdir -p -- "$(DESTDIR)$(MANPREFIX)/man1/" + cp -- blkseekr "$(DESTDIR)$(PREFIX)/bin/" +# TODO cp -- blkseekr.1 "$(DESTDIR)$(MANPREFIX)/man1/" + +uninstall: + -rm -f -- "$(DESTDIR)$(PREFIX)/bin/blkseekr" + -rm -f -- "$(DESTDIR)$(MANPREFIX)/man1/blkseekr.1" + +clean: + -rm -f -- *.o *.a *.lo *.su *.so *.so.* *.gch *.gcov *.gcno *.gcda + -rm -f -- blkseekr + +.SUFFIXES: +.SUFFIXES: .o .c + +.PHONY: all install uninstall clean diff --git a/blkseekr.c b/blkseekr.c new file mode 100644 index 0000000..0f1c199 --- /dev/null +++ b/blkseekr.c @@ -0,0 +1,297 @@ +/* See LICENSE file for copyright and license details. */ +#include <libsimple-arg.h> +#include <libsimple.h> +#include <regex.h> + +USAGE("[-b block-size] [-h] ([-i | +i | -n | +n] ... (-r basic-regex | -e extended-regex | -x text)) ... " + "file [first-block [last-block]]"); + + +/* + * TODO on SIGINT stop and print last completed block + * TODO use madvise, posix_advise or readahead + * TODO allocate addition blocks to reduce memory copying + * TODO show progress + * TODO make program that extracts (binary) or displays (hex + text) the blocks + * TODO add support for NUL support in regex + */ + + +struct memstring { + const char *text; + size_t len; + int icase; +}; + + +static struct memstring *texts = NULL; +static size_t ntexts = 0; +static regex_t *automata = NULL; +static size_t nautomata = 0; +static int analyse_holes = 0; +static off_t position; + + +static void +analyse(char *block, size_t blksize, size_t available) /* TODO */ +{ +} + + +static size_t +encountered_data(char *block, size_t blksize, size_t available) +{ + if (available < blksize * 2u) + return available; + + analyse(block, blksize, available); + + memcpy(&block[0], &block[blksize], blksize); + position += (off_t)blksize; + return blksize; + +} + + +static size_t +encountered_hole(char *block, size_t blksize, size_t available, off_t size) +{ + off_t q = size / (off_t)blksize; + off_t r = size % (off_t)blksize; + size_t n; + + if (q) { + q--; + r += (off_t)blksize; + } + + while (r || q) { + n = blksize * 2u - available; + n = MIN(n, (size_t)r); + memset(&block[available], 0, n); + available += n; + r -= (off_t)n; + + available = encountered_data(block, blksize, available); + if (q) { + q--; + memset(&block[available], 0, blksize); + available = encountered_data(block, blksize, available); + } + if (q) { + q--; + memset(&block[available], 0, blksize); + available = encountered_data(block, blksize, available); + } + if (analyse_holes) { + for (; q; q--) + available = encountered_data(block, blksize, available); + } + } + + return available; +} + + +static void +encountered_eof(char *block, size_t blksize, size_t available) +{ + analyse(block, blksize, available); +} + + +static void +eregcomp(regex_t *restrict preg, const char *restrict regex, int cflags) +{ + int e = regcomp(preg, regex, cflags); + if (e) { + char buf_static[512]; + char *buf_dynamic = NULL; + char *buf = buf_static; + size_t buf_size = sizeof(buf_static); + size_t r; + regerror_again: + r = regerror(e, preg, buf, buf_size); + if (r > buf_size) { + buf = buf_dynamic = erealloc(buf_dynamic, buf_size *= 2); + goto regerror_again; + } + eprintf("regcomp %s: %s", regex, buf); + free(buf_dynamic); + } +} + + +int +main(int argc, char *argv[]) +{ + const char *path; + char *block; + size_t available = 0u; + int regex_flags = REG_NOSUB; + int have_unused_flags = 0; + size_t blksize = 4ul << 10; + off_t first_block = 0; + off_t last_block = -1; + off_t data, hole; + int skip_holes = 1; + int fd; + struct stat st; + + ARGBEGIN { + case 'r': + case 'e': + have_unused_flags = 0; + automata = ereallocarray(automata, nautomata + 1u, sizeof(*automata)); + eregcomp(&automata[nautomata++], ARG(), regex_flags | (FLAG() == 'e' ? REG_EXTENDED : 0)); + break; + + case 'x': + have_unused_flags = 0; + texts = ereallocarray(texts, ntexts + 1u, sizeof(*texts)); + texts[ntexts].text = ARG(); + texts[ntexts].len = strlen(texts[ntexts].text); + texts[ntexts].icase = !!(regex_flags & REG_ICASE); + ntexts++; + break; + + case 'h': + analyse_holes = 1; + break; + + case 'b': + /* TODO set `blksize` */ + break; + + case 'i': regex_flags |= REG_ICASE; have_unused_flags = 1; break; + case 'n': regex_flags |= REG_NEWLINE; have_unused_flags = 1; break; + default: + usage(); + } ARGALT('+') { + case 'i': regex_flags &= ~REG_ICASE; have_unused_flags = 1; break; + case 'n': regex_flags &= ~REG_NEWLINE; have_unused_flags = 1; break; + default: + usage(); + } ARGEND; + + if (argc < 1 || argc > 3 || have_unused_flags || !nautomata) + usage(); + + path = argv[0]; + + if (argc >= 2) + ; /* TODO set first_block */ + + if (argc >= 3) { + ; /* TODO set last_block */ + if (last_block < first_block) + usage(); + } + + block = emalloc(blksize * 2u); + first_block *= (off_t)blksize; + if (last_block >= 0) { + last_block += 1; + last_block *= (off_t)blksize; + } + + /* Open file to analyse */ + fd = open(path, O_RDONLY); + if (fd < 0) + eprintf("open %s O_RDONLY", path); + + /* Can skip holes? */ + if (fstat(fd, &st)) + eprintf("fstat %s:", path); + if (!S_ISREG(st.st_mode)) + skip_holes = 0; + + /* Skip to first block to analyse */ + if (first_block && lseek(fd, first_block, SEEK_SET) == (off_t)-1) { + off_t remaining = first_block; + while (remaining) { + uintmax_t n = MIN((uintmax_t)remaining, (uintmax_t)(blksize * 2u)); + ssize_t r = read(fd, block, (size_t)n); + if (r <= 0) { + if (!r) + goto out; + if (errno == EINTR) + continue; + eprintf("read %s:", path); + } + position += (off_t)r; + } + } + position = first_block; + + /* Analyse to last block (to end of file if last_block < 0) */ + for (; last_block < 0 || first_block < last_block; first_block = data) { + /* Seek to data to find end of hole, and analyse the hole */ + if (skip_holes) { + data = lseek(fd, first_block, SEEK_DATA); + if (data == (off_t)-1) { + data = first_block; + if (errno != EBADF) + skip_holes = 0; + else + eprintf("lseek %s %ji SEEK_DATA", path, (intmax_t)first_block); + } else { + available = encountered_hole(block, blksize, available, data - first_block); + } + } else { + data = first_block; + } + + /* Seek to hole to find end of data */ + if (skip_holes) { + hole = lseek(fd, data, SEEK_HOLE); + if (hole == (off_t)-1) { + if (errno != EBADF) + skip_holes = 0; + else + eprintf("lseek %s %ji SEEK_HOLE", path, (intmax_t)data); + } else { + if (lseek(fd, data, SEEK_SET) == (off_t)-1) + eprintf("lseek %s %ji SEEK_SET", path, (intmax_t)data); + } + } else { + hole = (off_t)-1; + } + if (hole > last_block) + hole = last_block; + + /* Analyse the data */ + while (data < hole) { + off_t n_off = hole - data; + size_t n_size = blksize * 2u - available; + size_t n = (uintmax_t)n_off > (uintmax_t)SSIZE_MAX ? n_size : MIN((size_t)n_off, n_size); + ssize_t r = read(fd, &block[available], n); + if (r <= 0) { + if (!r) + goto eof; + if (errno == EINTR) + continue; + eprintf("read %s:", path); + } + available += (size_t)r; + if (available == blksize * 2u) + available = encountered_data(block, blksize, available); + } + } +eof: + /* Analyse end of file */ + available = encountered_data(block, blksize, available); + if (skip_holes && first_block < st.st_size) + available = encountered_hole(block, blksize, available, st.st_size - first_block); + encountered_eof(block, blksize, available); + +out: + close(fd); + + while (nautomata) + regfree(&automata[--nautomata]); + free(automata); + free(texts); + free(block); + return 0; +} diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..aeb8cec --- /dev/null +++ b/config.mk @@ -0,0 +1,8 @@ +PREFIX = /usr +MANPREFIX = $(PREFIX)/share/man + +CC = c99 + +CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -D_GNU_SOURCE +CFLAGS = +LDFLAGS = -lsimple |
