/**
* mds — A micro-display server
* Copyright © 2014, 2015, 2016, 2017 Mattias Andrée (maandree@kth.se)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "raw-data.h"
#include "globals.h"
#include "string.h"
#include <libmdsserver/macros.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
/**
* Initialise a `mds_kbdc_source_code_t*`
*
* @param this The `mds_kbdc_source_code_t*`
*/
void
mds_kbdc_source_code_initialise(mds_kbdc_source_code_t *restrict this)
{
this->lines = NULL;
this->real_lines = NULL;
this->content = NULL;
this->real_content = NULL;
this->line_count = 0;
this->duplicates = 0;
}
/**
* Release all data in a `mds_kbdc_source_code_t*`
*
* @param this The `mds_kbdc_source_code_t*`
*/
void
mds_kbdc_source_code_destroy(mds_kbdc_source_code_t *restrict this)
{
if (!this)
return;
if (this->duplicates--)
return;
free(this->lines), this->lines = NULL;
free(this->real_lines), this->real_lines = NULL;
free(this->content), this->content = NULL;
free(this->real_content), this->real_content = NULL;
}
/**
* Release all data in a `mds_kbdc_source_code_t*`, and free it
*
* @param this The `mds_kbdc_source_code_t*`
*/
void
mds_kbdc_source_code_free(mds_kbdc_source_code_t *restrict this)
{
if (!this)
return;
if (this->duplicates--)
return;
free(this->lines);
free(this->real_lines);
free(this->content);
free(this->real_content);
free(this);
}
/**
* Create a duplicate of a `mds_kbdc_source_code_t*`
*
* @param this The `mds_kbdc_source_code_t*`
* @return `this` is returned
*/
mds_kbdc_source_code_t *
mds_kbdc_source_code_dup(mds_kbdc_source_code_t *restrict this)
{
this->duplicates++;
return this;
}
/**
* Read the content of a file, ignoring interruptions
*
* @param pathname The file to read
* @param size Output parameter for the size of the read content, in char:s
* @return The read content, `NULL` on error
*/
static char *
read_file(const char *restrict pathname, size_t *restrict size)
{
size_t buf_size = 8096;
size_t buf_ptr = 0;
char *restrict content = NULL;
char *restrict old = NULL;
int fd = -1;
ssize_t got;
/* Allocate buffer for the file's content. */
fail_if (xmalloc(content, buf_size, char));
/* Open the file to compile. */
fail_if ((fd = open(pathname, O_RDONLY)) < 0);
/* Read the file to compile. */
for (;;) {
/* Make sure the buffer is not small. */
if (buf_size - buf_ptr < 2048)
fail_if (xxrealloc(old, content, buf_size <<= 1, char));
/* Read a chunk of the file. */
got = read(fd, content + buf_ptr, (buf_size - buf_ptr) * sizeof(char));
if (got < 0 && errno == EINTR) continue;
if (got == 0) break;
fail_if (got < 0);
buf_ptr += (size_t)got;
}
/* Shrink the buffer so it is not excessively large. */
if (buf_ptr) /* Simplest way to handle empty files: let the have the initial allocation size. */
fail_if (xxrealloc(old, content, buf_ptr, char));
/* Close file decriptor for the file. */
xclose(fd);
*size = buf_ptr;
return content;
fail:
xperror(*argv);
free(old);
free(content);
if (fd >= 0)
xclose(fd);
return NULL;
}
/**
* Find the end of a function call
*
* @param content The code
* @param offset The index after the first character after the backslash
* that triggered this call
* @param size The length of `code`
* @return The index of the character after the bracket that closes
* the function call (may be outside the code by one character),
* or `size` if the call do not end (that is, the code ends
* prematurely), or zero if there is no function call at `offset`
*/
size_t
get_end_of_call(const char *restrict content, size_t offset, size_t size)
{
#define C content[ptr]
#define r(lower, upper) ((lower) <= C && C <= (upper))
size_t ptr = offset, call_end = 0;
int escape = 0, quote = 0;
char c;
/* Skip to end of function name. */
while (ptr < size && (r('a', 'z') || r('A', 'Z') || r('0', '9') || (C == '_')))
ptr++;
/* Check that it is a function call. */
if (ptr == size || ptr == offset || C != '(')
return 0;
/* Find the end of the function call. */
while (ptr < size) {
c = content[ptr++];
if (escape) {
/* Escapes may be longer than one character,
but only the first can affect the parsing. */
escape = 0;
} else if (ptr <= call_end) {
/* Nested function and nested quotes can appear. */;
} else if (c == '\\') {
/* Quotes end with the same symbols as they start with,
and quotes automatically escape brackets. */
/* \ can either start a functon call or an escape. */
/* It may not be an escape, but registering it
as an escape cannot harm us since we only
skip the first character, and a function call
cannot be that short. */
escape = 1;
/* Nested quotes can appear at function calls. */
call_end = get_end_of_call(content, ptr, size);
} else if (quote) {
quote = (c != '"');
} else if (c == ')') {
/* End of function call, end of fun. */
break;
} else if (c == '"') {
/* " is the quote symbol. */
quote = 1;
}
}
return ptr;
#undef r
#undef C
}
/**
* Remove comments from the content
*
* @param content The code to shrink
* @param size The size of `content`, in char:s
* @return The new size of `content`, in char:s; this function cannot fail
*/
static size_t
remove_comments(char *restrict content, size_t size)
{
#define t content[n_ptr++] = c
size_t n_ptr = 0, o_ptr = 0, call_end = 0;
int comment = 0, quote = 0, escape = 0;
char c;
while (o_ptr < size) {
c = content[o_ptr++];
if (comment) {
/* Remove comment. */
if (c == '\n')
t, comment = 0;
} else if (escape) {
/* Escapes may be longer than one character,
but only the first can affect the parsing. */
t, escape = 0;
} else if (o_ptr <= call_end) {
/* Nested quotes can appear at function calls. */
t;
} else if (c == '\\') {
/* \ can either start a functon call or an escape. */
t;
/* It may not be an escape, but registering it
as an escape cannot harm us since we only
skip the first character, and a function call
cannot be that short. */
escape = 1;
/* Nested quotes can appear at function calls. */
call_end = get_end_of_call(content, o_ptr, size);
} else if (quote) {
/* Quotes end with the same symbols as they start with,
and quotes automatically escape comments. */
t;
if (strchr("\"\n", c))
quote = 0;
} else if (c == '#') {
/* # is the comment symbol. */
comment = 1;
} else if (c == '"') {
/* " is the quote symbol. */
t, quote = 1;
} else {
/* Code and whitespace. */
t;
}
}
return n_ptr;
#undef t
}
/**
* Create an array of each line in a text
*
* @param content The text to split, it must end with an LF.
* LF:s are treated as line endings rather than
* new lines, this means that the final LF will
* not create a new line in the returned array.
* Each LF will be replaced by a NUL-character.
* @param length The length of `content`.
* @return An array of each line in `content`. This
* array will be `NULL`-terminated. It will also
* reuse the allocate of `content`. This means
* that each element must not be free:d, rather
* you should simply free this returned allocation
* and the allocation of `content`. On error
* `NULL` is returned, and `content` will not
* have been modified.
*/
static char **
line_split(char *content, size_t length)
{
char **restrict lines = NULL;
size_t count = 0;
size_t i, j;
int new_line = 1;
for (i = 0; i < length; i++)
if (content[i] == '\n')
count++;
fail_if (xmalloc(lines, count + 1, char*));
lines[count] = NULL;
for (i = j = 0; i < length; i++) {
if (new_line)
new_line = 0, lines[j++] = content + i;
if (content[i] == '\n') {
new_line = 1;
content[i] = '\0';
}
}
return lines;
fail:
xperror(*argv);
return NULL;
}
/**
* Translate all tab spaces into blank spaces
*
* @param content Input and output parameter for the file's content
* @param content_size Input and output parameter for the size of the file's content
* @return Zero on success, -1 on error
*/
static int
expand(char **restrict content, size_t *restrict content_size)
{
size_t extra = 0, added = 0, ptr, col, n = *content_size;
char *restrict data = *content;
/* Calculate the new size of the file. */
for (ptr = col = 0; ptr < n; ptr++) {
if (data[ptr] == '\n')
col = 0;
else if (data[ptr] == '\t')
extra += 8 - (col % 8) - 1;
}
/* Extend the allocation. */
if (!extra)
return 0;
*content_size += extra;
fail_if (xrealloc(data, *content_size, char));
*content = data;
/* Expand tab spaces. */
memmove(data + extra, data, n);
for (ptr = 0; ptr < n; ptr++, added--) {
if (data[ptr + extra] == '\n') {
data[ptr + added++] = data[ptr + extra], col = 0;
} else if (data[ptr + extra] != '\t') {
data[ptr + added++] = data[ptr + extra], col++;
} else {
do
data[ptr + added++] = ' ';
while (++col % 8);
}
}
return 0;
fail:
return -1;
}
/**
* Read lines of a source file
*
* @param pathname The pathname of the source file
* @param source_code Output parameter for read data
* @return Zero on success, -1 on error
*/
int
read_source_lines(const char *restrict pathname, mds_kbdc_source_code_t *restrict source_code)
{
char *content = NULL;
char *real_content = NULL;
char *old = NULL;
size_t content_size;
size_t real_content_size;
char **lines = NULL;
char **real_lines = NULL;
size_t line_count = 0;
/* Read the file. */
content = read_file(pathname, &content_size);
fail_if (!content);
/* Expand tab spaces. */
fail_if (expand(&content, &content_size));
/* Make sure the content ends with a new line. */
if (!content_size || content[content_size - 1] != '\n') {
fail_if (xxrealloc(old, content, content_size + 1, char));
content[content_size++] = '\n';
}
/* Simplify file. */
fail_if (xmemdup(real_content, content, content_size, char));
real_content_size = content_size;
content_size = remove_comments(content, content_size);
fail_if (xxrealloc(old, content, content_size, char));
/* Split by line. */
fail_if (!(lines = line_split(content, content_size)));
fail_if (!(real_lines = line_split(real_content, real_content_size)));
/* Count the number of lines. */
while (lines[line_count])
line_count++;
source_code->lines = lines;
source_code->real_lines = real_lines;
source_code->content = content;
source_code->real_content = real_content;
source_code->line_count = line_count;
return 0;
fail:
xperror(*argv);
free(old);
free(content);
free(real_content);
free(lines);
free(real_lines);
return -1;
}
/**
* Encode a character in UTF-8
*
* @param buffer The buffer where the character should be stored
* @param character The character
* @return The of the character in `buffer`, `NULL` on error
*/
static char *
encode_utf8(char *buffer, char32_t character)
{
char32_t text[2];
char *restrict str;
char *restrict str_;
text[0] = character;
text[1] = -1;
fail_if (!(str_ = str = string_encode(text)));
while (*str)
*buffer++ = *str++;
free(str_);
return buffer;
fail:
return NULL;
}
/**
* Parse a quoted and escaped string that may not include function calls or variable dereferences
*
* @param string The string
* @return The string in machine-readable format, `NULL` on error
*/
char *
parse_raw_string(const char *restrict string)
{
#define r(cond, lower, upper) ((cond) && ((lower) <= c) && (c <= (upper)))
char *rc, *p;
int escape = 0;
char32_t buf = 0;
char c;
/* We know that the output string can only be shorter because
* it is surrounded by 2 quotes and escape can only be longer
* then what they escape, for example \uA0, is four characters,
* but when parsed it generateds 2 bytes in UTF-8, and their
* is not code point whose UTF-8 encoding is longer than its
* hexadecimal representation. */
fail_if (xmalloc(p = rc, strlen(string), char));
while ((c = *string++)) {
if (r(escape == 8, '0', '7')) {
buf = (buf << 3) | (c & 15);
} else if (r(escape == 16, '0', '9')) {
buf = (buf << 4) | (c & 15);
} else if (r(escape == 16, 'a', 'f')) {
buf = (buf << 4) | ((c & 15) + 9);
} else if (r(escape == 16, 'A', 'F')) {
buf = (buf << 4) | ((c & 15) + 9);
} else if (escape > 1) {
escape = 0;
fail_if (!(p = encode_utf8(p, buf)));
if (c != '.')
*p++ = c;
} else if (escape == 1) {
escape = 0;
buf = 0;
if (c == '0')
escape = 8;
else if (c == 'u')
escape = 16;
else
*p++ = c;
} else if (c == '\\') {
escape = 1;
} else if (c != '\"') {
*p++ = c;
}
}
*p = '\0';
return rc;
fail:
free(rc);
return NULL;
#undef r
}