/* See LICENSE file for copyright and license details. */ #include #include #include USAGE("[-b block-size] [-h] ([-i | +i | -n | +n] ... (-r basic-regex | -e extended-regex | -x text)) ... " "file [first-block [last-block]]"); /* * TODO on SIGINT stop and print last completed block * TODO use madvise, posix_advise or readahead * TODO allocate addition blocks to reduce memory copying * TODO show progress * TODO make program that extracts (binary) or displays (hex + text) the blocks * TODO add support for NUL support in regex */ struct memstring { const char *text; size_t len; int icase; }; static struct memstring *texts = NULL; static size_t ntexts = 0; static regex_t *automata = NULL; static size_t nautomata = 0; static int analyse_holes = 0; static off_t position; static void analyse(char *block, size_t blksize, size_t available) /* TODO */ { } static size_t encountered_data(char *block, size_t blksize, size_t available) { if (available < blksize * 2u) return available; analyse(block, blksize, available); memcpy(&block[0], &block[blksize], blksize); position += (off_t)blksize; return blksize; } static size_t encountered_hole(char *block, size_t blksize, size_t available, off_t size) { off_t q = size / (off_t)blksize; off_t r = size % (off_t)blksize; size_t n; if (q) { q--; r += (off_t)blksize; } while (r || q) { n = blksize * 2u - available; n = MIN(n, (size_t)r); memset(&block[available], 0, n); available += n; r -= (off_t)n; available = encountered_data(block, blksize, available); if (q) { q--; memset(&block[available], 0, blksize); available = encountered_data(block, blksize, available); } if (q) { q--; memset(&block[available], 0, blksize); available = encountered_data(block, blksize, available); } if (analyse_holes) { for (; q; q--) available = encountered_data(block, blksize, available); } } return available; } static void encountered_eof(char *block, size_t blksize, size_t available) { analyse(block, blksize, available); } static void eregcomp(regex_t *restrict preg, const char *restrict regex, int cflags) { int e = regcomp(preg, regex, cflags); if (e) { char buf_static[512]; char *buf_dynamic = NULL; char *buf = buf_static; size_t buf_size = sizeof(buf_static); size_t r; regerror_again: r = regerror(e, preg, buf, buf_size); if (r > buf_size) { buf = buf_dynamic = erealloc(buf_dynamic, buf_size *= 2); goto regerror_again; } eprintf("regcomp %s: %s", regex, buf); free(buf_dynamic); } } int main(int argc, char *argv[]) { const char *path; char *block; size_t available = 0u; int regex_flags = REG_NOSUB; int have_unused_flags = 0; size_t blksize = 4ul << 10; off_t first_block = 0; off_t last_block = -1; off_t data, hole; int skip_holes = 1; int fd; struct stat st; ARGBEGIN { case 'r': case 'e': have_unused_flags = 0; automata = ereallocarray(automata, nautomata + 1u, sizeof(*automata)); eregcomp(&automata[nautomata++], ARG(), regex_flags | (FLAG() == 'e' ? REG_EXTENDED : 0)); break; case 'x': have_unused_flags = 0; texts = ereallocarray(texts, ntexts + 1u, sizeof(*texts)); texts[ntexts].text = ARG(); texts[ntexts].len = strlen(texts[ntexts].text); texts[ntexts].icase = !!(regex_flags & REG_ICASE); ntexts++; break; case 'h': analyse_holes = 1; break; case 'b': /* TODO set `blksize` */ break; case 'i': regex_flags |= REG_ICASE; have_unused_flags = 1; break; case 'n': regex_flags |= REG_NEWLINE; have_unused_flags = 1; break; default: usage(); } ARGALT('+') { case 'i': regex_flags &= ~REG_ICASE; have_unused_flags = 1; break; case 'n': regex_flags &= ~REG_NEWLINE; have_unused_flags = 1; break; default: usage(); } ARGEND; if (argc < 1 || argc > 3 || have_unused_flags || !nautomata) usage(); path = argv[0]; if (argc >= 2) ; /* TODO set first_block */ if (argc >= 3) { ; /* TODO set last_block */ if (last_block < first_block) usage(); } block = emalloc(blksize * 2u); first_block *= (off_t)blksize; if (last_block >= 0) { last_block += 1; last_block *= (off_t)blksize; } /* Open file to analyse */ fd = open(path, O_RDONLY); if (fd < 0) eprintf("open %s O_RDONLY", path); /* Can skip holes? */ if (fstat(fd, &st)) eprintf("fstat %s:", path); if (!S_ISREG(st.st_mode)) skip_holes = 0; /* Skip to first block to analyse */ if (first_block && lseek(fd, first_block, SEEK_SET) == (off_t)-1) { off_t remaining = first_block; while (remaining) { uintmax_t n = MIN((uintmax_t)remaining, (uintmax_t)(blksize * 2u)); ssize_t r = read(fd, block, (size_t)n); if (r <= 0) { if (!r) goto out; if (errno == EINTR) continue; eprintf("read %s:", path); } position += (off_t)r; } } position = first_block; /* Analyse to last block (to end of file if last_block < 0) */ for (; last_block < 0 || first_block < last_block; first_block = data) { /* Seek to data to find end of hole, and analyse the hole */ if (skip_holes) { data = lseek(fd, first_block, SEEK_DATA); if (data == (off_t)-1) { data = first_block; if (errno != EBADF) skip_holes = 0; else eprintf("lseek %s %ji SEEK_DATA", path, (intmax_t)first_block); } else { available = encountered_hole(block, blksize, available, data - first_block); } } else { data = first_block; } /* Seek to hole to find end of data */ if (skip_holes) { hole = lseek(fd, data, SEEK_HOLE); if (hole == (off_t)-1) { if (errno != EBADF) skip_holes = 0; else eprintf("lseek %s %ji SEEK_HOLE", path, (intmax_t)data); } else { if (lseek(fd, data, SEEK_SET) == (off_t)-1) eprintf("lseek %s %ji SEEK_SET", path, (intmax_t)data); } } else { hole = (off_t)-1; } if (hole > last_block) hole = last_block; /* Analyse the data */ while (data < hole) { off_t n_off = hole - data; size_t n_size = blksize * 2u - available; size_t n = (uintmax_t)n_off > (uintmax_t)SSIZE_MAX ? n_size : MIN((size_t)n_off, n_size); ssize_t r = read(fd, &block[available], n); if (r <= 0) { if (!r) goto eof; if (errno == EINTR) continue; eprintf("read %s:", path); } available += (size_t)r; if (available == blksize * 2u) available = encountered_data(block, blksize, available); } } eof: /* Analyse end of file */ available = encountered_data(block, blksize, available); if (skip_holes && first_block < st.st_size) available = encountered_hole(block, blksize, available, st.st_size - first_block); encountered_eof(block, blksize, available); out: close(fd); while (nautomata) regfree(&automata[--nautomata]); free(automata); free(texts); free(block); return 0; }