commit 55f357eb8818f02a40e4908f159f2086daf3dc88 Author: cat Date: Sun May 25 17:39:17 2025 +1000 finally put this under version control diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2694c55 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +mvwt +mvwtc +mvwtimg diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7d139d7 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +PREFIX ?= /usr/local + +# UNCOMMENT FOR DYNAMIC LINKING +CFLAGS := $(CFLAGS) +SKA := + +# UNCOMMENT FOR STATIC LINKING +# CFLAGS := $(CFLAGS) -static -fpic -Wl,-static -fdata-sections -ffunction-sections -Wl,--gc-sections -s +# SKA := /usr/lib/libskarnet.a # you might have to change this location + +LDFLAGS := $(LDFLAGS) -lskarnet + +all: mvwt mvwtc mvwtimg + +mvwt: mvwt.c $(SKA) + +mvwtc: mvwtc.c pdjson.c $(SKA) + +mvwtimg: mvwtimg.c $(SKA) + +.PHONY: install +install: all + mkdir -p $(DESTDIR)$(PREFIX)/bin + cp -f mvwt $(DESTDIR)$(PREFIX)/bin + chmod 755 $(DESTDIR)$(PREFIX)/bin/mvwt + cp -f mvwtc $(DESTDIR)$(PREFIX)/bin + chmod 755 $(DESTDIR)$(PREFIX)/bin/mvwtc + cp -f mvwtimg $(DESTDIR)$(PREFIX)/bin + chmod 755 $(DESTDIR)$(PREFIX)/bin/mvwtimg + +.PHONY: clean +clean: + -rm -f mvwt mvwtc mvwtimg diff --git a/README b/README new file mode 100644 index 0000000..999c64c --- /dev/null +++ b/README @@ -0,0 +1,35 @@ +minimum viable walltaker client based on skalibs. +(mr skarnet if u see this i am sorry for using your nice systems programming library to make goonware) + +simple usage ($ID is your walltaker link id): +``` +while true; do + mvwt $ID + sleep 10 +done +``` +the cachedir can be set with the -c flag; +the default cachedir is /tmp/mvwt (changeable in config.h) +the cachedir must already exist, so mkdir it if it doesn't exist already. + +build time dependency: + a build of skalibs (if you want static compilation, modify the makefile as per the comments) +runtime dependencies: + s6-networking (this program uses ucspi for network communication) + hsetroot (used to actually set the wallpaper) + +pdjson.{c,h} is a bundled public domain cjson parser available from here: https://github.com/skeeto/pdjson + +you might want to remove the `current` file in the cachedir before every +time you start using mvwt; +if the file already exists and your wallpaper is still the same as it +was before, it won't be set again. +(this is because mvwt doesn't have any constant internal state, +it's meant to be run in a cron job or a shell loop and reads `current` +and compares it to walltaker's API response +to check if it needs to download and change the wallpaper) + +mvwtc and mvwtimg shouldn't be used directly. they expect to be on +a ucspi connection and are spawned by mvwt. + +public domain i guess. do whatever you want with this diff --git a/config.h b/config.h new file mode 100644 index 0000000..fdd9049 --- /dev/null +++ b/config.h @@ -0,0 +1,5 @@ +/* s6-networking's TLS programs need either CADIR or CAFILE to be set + * to function; mvwt.c sets the envvar $CADIR to CADIR_DIR. + * you might have to change this location depending on your system. */ +#define CADIR_DIR "/etc/ssl/certs" +#define CACHE_DIR "/tmp/mvwt" diff --git a/mvwt.c b/mvwt.c new file mode 100644 index 0000000..b5a14a7 --- /dev/null +++ b/mvwt.c @@ -0,0 +1,58 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#define USAGE "mvwt [-c cachedir] id" +#define WALLTAKER_URL "walltaker.joi.how" + +int +main(int argc, char *const *argv) +{ + int opt; + subgetopt l = SUBGETOPT_ZERO; + stralloc envmod = STRALLOC_ZERO; + + PROG = "mvwt"; + const char *cachedir = CACHE_DIR; + + while ((opt = subgetopt_r(argc, (const char * const*) argv, "c:", &l)) != -1) { + switch (opt) { + case 'c': /* wallpaper cache directory */ + cachedir = l.arg; + break; + default: + printf("switch %d", argc); + strerr_dieusage(100, USAGE); + break; + } + } + argc -= l.ind; + argv += l.ind; + if (argc != 1) { + printf("argc %d", argc); + strerr_dieusage(100, USAGE); + } + + if (chdir(cachedir) < 0) + strerr_diefu(111, "chdir to cache dir " + "(mkdir it if it doesn't exist)"); + + char const *wtc_argv[] = { "s6-tlsclient", "-N", "-Z", "--", + WALLTAKER_URL, "443", "mvwtc", argv[0], 0 }; + + if (!env_addmodif(&envmod, "CADIR", CADIR_DIR)) + strerr_diefu(111, "set CADIR"); + + xmexec_m(wtc_argv, envmod.s, envmod.len); + + return 111; /* uh oh! */ +} diff --git a/mvwtc.c b/mvwtc.c new file mode 100644 index 0000000..158115a --- /dev/null +++ b/mvwtc.c @@ -0,0 +1,221 @@ +/* full of jank */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pdjson.h" + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#define USAGE "mvwtc id" +#define HTTPOK "HTTP/1.1 200 OK" + +void +url_hpn(stralloc *url, stralloc *host, stralloc *path, stralloc *name) +{ + char *afterscheme; + size_t urli, eohost, lastslash; + + urli = byte_chr(url->s, url->len, '/'); + urli += 2; + + afterscheme = url->s + urli; + eohost = byte_chr(url->s + urli, url->len - urli, '/'); + stralloc_catb(host, url->s + urli, eohost); + stralloc_0(host); + + stralloc_cats(path, url->s + urli + eohost); + stralloc_0(host); + + lastslash = byte_rchr(url->s, url->len, '/'); + stralloc_cats(name, url->s + lastslash + 1); + stralloc_0(host); +} + +/* '\n' → '\0' */ +void +sa_nltoz(stralloc *sa) +{ + size_t idx; + + idx = byte_chr(sa->s, sa->len, '\n'); + if (idx < sa->len) { + sa->s[idx] = '\0'; + sa->len = idx; + } +} + +/* '\0' → '\n' */ +void +sa_ztonl(stralloc *sa) +{ + size_t idx; + + idx = byte_chr(sa->s, sa->len, '\0'); + if (idx < sa->len) { + sa->s[idx] = '\n'; + sa->len = idx + 1; + } +} + +/* img url in sa → url */ +int +imgpath(stralloc *url, stralloc *sa) +{ + json_stream j; + enum json_type t; + const char *key, *val; + + json_open_buffer(&j, sa->s, sa->len); + json_set_streaming(&j, 0); + + t = json_next(&j); + if (t != JSON_OBJECT) { + json_close(&j); + return -1; + } + + while ((t = json_next(&j)) != JSON_DONE) { + switch (t) { + case JSON_STRING: + key = json_get_string(&j, NULL); + if (!strncmp(key, "post_url", MIN(strlen(key), + strlen("post_url")))) { + /* i'm not sure why there need to be two of these */ + t = json_next(&j); + t = json_next(&j); + val = json_get_string(&j, NULL); + url->len = 0; + stralloc_cats(url, val); + stralloc_0(url); + return 0; + } + break; + case JSON_OBJECT_END: + goto done; + break; + } + } +done: + return -1; +} + +int +http_recv(int fd, stralloc *sa) +{ + char obuf[BUFFER_INSIZE]; + buffer outb = BUFFER_INIT(&buffer_read, fd, obuf, BUFFER_INSIZE); + if (skagetln(&outb, sa, '\n') < 0) + return -1; + if (sa->len < sizeof(HTTPOK) || !strncmp(sa->s, HTTPOK, sizeof(HTTPOK))) + return -1; + sa->len = 0; + + /* skip to the end of the header */ + for (;;) { + if (skagetln(&outb, sa, '\n') < 0) + return -1; + if (!strncmp(sa->s, "\r\n", 2)) { + break; + } + sa->len = 0; + } + + sa->len = 0; + skagetln(&outb, sa, '\n'); + // todo handle fuckery here + stralloc_0(sa); + + return 0; +} + +int +http_send(int fd, char *id) +{ + char buf[BUFFER_OUTSIZE]; + + buffer b = BUFFER_INIT(&buffer_write, fd, buf, BUFFER_OUTSIZE); + + buffer_putsnoflush(&b, "GET /api/links/"); + buffer_putsnoflush(&b, id); + buffer_putsnoflush(&b, ".json HTTP/1.0\r\n"); + buffer_putsnoflush(&b, "Host: walltaker.joi.how\r\n"); + buffer_putsnoflush(&b, "Connection: close\r\n"); + buffer_putsnoflush(&b, "User-Agent: mvwt\r\n\r\n"); + buffer_flush(&b); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + stralloc resp = STRALLOC_ZERO; + stralloc img = STRALLOC_ZERO; + stralloc currentimg = STRALLOC_ZERO; + stralloc tmp = STRALLOC_ZERO; + stralloc imhost = STRALLOC_ZERO; + stralloc impath = STRALLOC_ZERO; + stralloc imfile = STRALLOC_ZERO; + PROG = "mvwtc"; + + if (argc != 2) + strerr_dieusage(100, USAGE); + + if (http_send(7, argv[1]) < 0) + strerr_diefu(111, "send http request"); + if (http_recv(6, &resp) < 0) { + stralloc_free(&resp); + strerr_diefu(111, "recieve http response"); + } + fd_shutdown(7, 1); + fd_close(7); + fd_shutdown(6, 0); + fd_close(6); + + if (imgpath(&img, &resp) < 0) { + stralloc_free(&img); + stralloc_free(&resp); + strerr_dief(111, "json seems mangled"); + } + stralloc_free(&resp); + + if (opengetlnclose("current", ¤timg, '\n') >= 0) { + sa_nltoz(¤timg); + if (!strncmp(currentimg.s, img.s, + MIN(currentimg.len, img.len))) { + /* we don't need to do anything, just exit */ + stralloc_free(¤timg); + stralloc_free(&img); + return 0; + } + } + + stralloc_copy(&tmp, &img); + sa_ztonl(&tmp); + if (!openwritenclose_suffix6("current", tmp.s, tmp.len, NULL, 0, "~")) { + stralloc_free(&tmp); + stralloc_free(¤timg); + stralloc_free(&img); + strerr_diefu(111, "write to current"); + } + stralloc_free(&tmp); + + url_hpn(&img, &imhost, &impath, &imfile); + + char const *wtimg_argv[] = { "s6-tlsclient", "-N", "--", + imhost.s, "443", "mvwtimg", imhost.s, impath.s, imfile.s, 0 }; + + xexec(wtimg_argv); + + return 111; +} diff --git a/mvwtimg.c b/mvwtimg.c new file mode 100644 index 0000000..94d01d1 --- /dev/null +++ b/mvwtimg.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define USAGE "mvwtimg host path filename" +#define HTTPOK "HTTP/1.1 200 OK" + +int +download(int infd, char *path) +{ + int outfd; + + outfd = open_trunc(path); + if (outfd < 0) + return -1; + + if (fd_cat(infd, outfd) < 0) { + fd_close(outfd); + return -1; + } + + fd_sync(outfd); + fd_close(outfd); + + return 0; +} + +int +fdgetln(int fd, stralloc *sa, int sep) +{ + char s[1]; + ssize_t r; + + sa->len = 0; + for (;;) { + r = fd_read(fd, s, 1); + if (r < 0) + return -1; + if (s[0] == EOF) + break; + stralloc_append(sa, s[0]); + if (s[0] == sep) + break; + } + return 0; +} + +int +http_recv_header(int fd) +{ + stralloc sa = STRALLOC_ZERO; + + if (fdgetln(fd, &sa, '\n') < 0) + return -1; + if (sa.len < sizeof(HTTPOK) || !strncmp(sa.s, HTTPOK, sizeof(HTTPOK))) + return -1; + sa.len = 0; + + for (;;) { + if (fdgetln(fd, &sa, '\n') < 0) + return -1; + if (!strncmp(sa.s, "\r\n", 2)) { + break; + } + sa.len = 0; + } + + stralloc_free(&sa); + + return 0; +} + +int +http_send(int fd, char *path, char *host) +{ + char buf[BUFFER_OUTSIZE]; + + buffer b = BUFFER_INIT(&buffer_write, fd, buf, BUFFER_OUTSIZE); + + buffer_putsnoflush(&b, "GET "); + buffer_putsnoflush(&b, path); + buffer_putsnoflush(&b, " HTTP/1.0\r\n"); + buffer_putsnoflush(&b, "Host: "); + buffer_putsnoflush(&b, host); + buffer_putsnoflush(&b, "\r\nConnection: close\r\n"); + buffer_putsnoflush(&b, "User-Agent: mvwt\r\n\r\n"); + buffer_flush(&b); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + PROG = "mvwtimg"; + + if (argc != 4) + strerr_dieusage(100, USAGE); + + if (http_send(7, argv[2], argv[1]) < 0) + strerr_diefu(111, "send http request"); + if (http_recv_header(6) < 0) + strerr_diefu(111, "recieve image header"); + if (download(6, argv[3]) < 0) + strerr_diefu(111, "downloading and saving image"); + fd_shutdown(7, 1); + fd_close(7); + fd_shutdown(6, 0); + fd_close(6); + + char const *hsetroot_argv[] = { "hsetroot", "-cover", argv[3], 0 }; + xexec(hsetroot_argv); + return 0; +} diff --git a/pdjson.c b/pdjson.c new file mode 100644 index 0000000..474fcb5 --- /dev/null +++ b/pdjson.c @@ -0,0 +1,992 @@ +#ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200112L +#elif _POSIX_C_SOURCE < 200112L +# error incompatible _POSIX_C_SOURCE level +#endif + +#include +#include +#include + +#ifndef PDJSON_H +# include "pdjson.h" +#endif + +#define JSON_FLAG_ERROR (1u << 0) +#define JSON_FLAG_STREAMING (1u << 1) + +#if defined(_MSC_VER) && (_MSC_VER < 1900) + +#define json_error(json, format, ...) \ + if (!(json->flags & JSON_FLAG_ERROR)) { \ + json->flags |= JSON_FLAG_ERROR; \ + _snprintf_s(json->errmsg, sizeof(json->errmsg), \ + _TRUNCATE, \ + format, \ + __VA_ARGS__); \ + } \ + +#else + +#define json_error(json, format, ...) \ + if (!(json->flags & JSON_FLAG_ERROR)) { \ + json->flags |= JSON_FLAG_ERROR; \ + snprintf(json->errmsg, sizeof(json->errmsg), \ + format, \ + __VA_ARGS__); \ + } \ + +#endif /* _MSC_VER */ + +/* See also PDJSON_STACK_MAX below. */ +#ifndef PDJSON_STACK_INC +# define PDJSON_STACK_INC 4 +#endif + +struct json_stack { + enum json_type type; + long count; +}; + +static enum json_type +push(json_stream *json, enum json_type type) +{ + json->stack_top++; + +#ifdef PDJSON_STACK_MAX + if (json->stack_top > PDJSON_STACK_MAX) { + json_error(json, "%s", "maximum depth of nesting reached"); + return JSON_ERROR; + } +#endif + + if (json->stack_top >= json->stack_size) { + struct json_stack *stack; + size_t size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack); + stack = (struct json_stack *)json->alloc.realloc(json->stack, size); + if (stack == NULL) { + json_error(json, "%s", "out of memory"); + return JSON_ERROR; + } + + json->stack_size += PDJSON_STACK_INC; + json->stack = stack; + } + + json->stack[json->stack_top].type = type; + json->stack[json->stack_top].count = 0; + + return type; +} + +/* Note: c is assumed not to be EOF. */ +static enum json_type +pop(json_stream *json, int c, enum json_type expected) +{ + if (json->stack == NULL || json->stack[json->stack_top].type != expected) { + json_error(json, "unexpected byte '%c'", c); + return JSON_ERROR; + } + json->stack_top--; + return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END; +} + +static int buffer_peek(struct json_source *source) +{ + if (source->position < source->source.buffer.length) + return source->source.buffer.buffer[source->position]; + else + return EOF; +} + +static int buffer_get(struct json_source *source) +{ + int c = source->peek(source); + if (c != EOF) + source->position++; + return c; +} + +static int stream_get(struct json_source *source) +{ + int c = fgetc(source->source.stream.stream); + if (c != EOF) + source->position++; + return c; +} + +static int stream_peek(struct json_source *source) +{ + int c = fgetc(source->source.stream.stream); + ungetc(c, source->source.stream.stream); + return c; +} + +static void init(json_stream *json) +{ + json->lineno = 1; + json->flags = JSON_FLAG_STREAMING; + json->errmsg[0] = '\0'; + json->ntokens = 0; + json->next = (enum json_type)0; + + json->stack = NULL; + json->stack_top = -1; + json->stack_size = 0; + + json->data.string = NULL; + json->data.string_size = 0; + json->data.string_fill = 0; + json->source.position = 0; + + json->alloc.malloc = malloc; + json->alloc.realloc = realloc; + json->alloc.free = free; +} + +static enum json_type +is_match(json_stream *json, const char *pattern, enum json_type type) +{ + int c; + for (const char *p = pattern; *p; p++) { + if (*p != (c = json->source.get(&json->source))) { + if (c != EOF) { + json_error(json, "expected '%c' instead of byte '%c'", *p, c); + } else { + json_error(json, "expected '%c' instead of end of text", *p); + } + return JSON_ERROR; + } + } + return type; +} + +static int pushchar(json_stream *json, int c) +{ + if (json->data.string_fill == json->data.string_size) { + size_t size = json->data.string_size * 2; + char *buffer = (char *)json->alloc.realloc(json->data.string, size); + if (buffer == NULL) { + json_error(json, "%s", "out of memory"); + return -1; + } else { + json->data.string_size = size; + json->data.string = buffer; + } + } + json->data.string[json->data.string_fill++] = c; + return 0; +} + +static int init_string(json_stream *json) +{ + json->data.string_fill = 0; + if (json->data.string == NULL) { + json->data.string_size = 1024; + json->data.string = (char *)json->alloc.malloc(json->data.string_size); + if (json->data.string == NULL) { + json_error(json, "%s", "out of memory"); + return -1; + } + } + json->data.string[0] = '\0'; + return 0; +} + +static int encode_utf8(json_stream *json, unsigned long c) +{ + if (c < 0x80UL) { + return pushchar(json, c); + } else if (c < 0x0800UL) { + return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else if (c < 0x010000UL) { + if (c >= 0xd800 && c <= 0xdfff) { + json_error(json, "invalid codepoint %06lx", c); + return -1; + } + return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) && + (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else if (c < 0x110000UL) { + return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) && + (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else { + json_error(json, "unable to encode %06lx as UTF-8", c); + return -1; + } +} + +static int hexchar(int c) +{ + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': + case 'A': return 10; + case 'b': + case 'B': return 11; + case 'c': + case 'C': return 12; + case 'd': + case 'D': return 13; + case 'e': + case 'E': return 14; + case 'f': + case 'F': return 15; + default: + return -1; + } +} + +static long +read_unicode_cp(json_stream *json) +{ + long cp = 0; + int shift = 12; + + for (size_t i = 0; i < 4; i++) { + int c = json->source.get(&json->source); + int hc; + + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if ((hc = hexchar(c)) == -1) { + json_error(json, "invalid escape Unicode byte '%c'", c); + return -1; + } + + cp += hc * (1 << shift); + shift -= 4; + } + + + return cp; +} + +static int read_unicode(json_stream *json) +{ + long cp, h, l; + + if ((cp = read_unicode_cp(json)) == -1) { + return -1; + } + + if (cp >= 0xd800 && cp <= 0xdbff) { + /* This is the high portion of a surrogate pair; we need to read the + * lower portion to get the codepoint + */ + h = cp; + + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if (c != '\\') { + json_error(json, "invalid continuation for surrogate pair '%c', " + "expected '\\'", c); + return -1; + } + + c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if (c != 'u') { + json_error(json, "invalid continuation for surrogate pair '%c', " + "expected 'u'", c); + return -1; + } + + if ((l = read_unicode_cp(json)) == -1) { + return -1; + } + + if (l < 0xdc00 || l > 0xdfff) { + json_error(json, "surrogate pair continuation \\u%04lx out " + "of range (dc00-dfff)", l); + return -1; + } + + cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); + } else if (cp >= 0xdc00 && cp <= 0xdfff) { + json_error(json, "dangling surrogate \\u%04lx", cp); + return -1; + } + + return encode_utf8(json, cp); +} + +static int +read_escaped(json_stream *json) +{ + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in escape"); + return -1; + } else if (c == 'u') { + if (read_unicode(json) != 0) + return -1; + } else { + switch (c) { + case '\\': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case '/': + case '"': + { + const char *codes = "\\bfnrt/\""; + const char *p = strchr(codes, c); + if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0) + return -1; + } + break; + default: + json_error(json, "invalid escaped byte '%c'", c); + return -1; + } + } + return 0; +} + +static int +char_needs_escaping(int c) +{ + if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) { + return 1; + } + + return 0; +} + +static int +utf8_seq_length(char byte) +{ + unsigned char u = (unsigned char) byte; + if (u < 0x80) return 1; + + if (0x80 <= u && u <= 0xBF) + { + // second, third or fourth byte of a multi-byte + // sequence, i.e. a "continuation byte" + return 0; + } + else if (u == 0xC0 || u == 0xC1) + { + // overlong encoding of an ASCII byte + return 0; + } + else if (0xC2 <= u && u <= 0xDF) + { + // 2-byte sequence + return 2; + } + else if (0xE0 <= u && u <= 0xEF) + { + // 3-byte sequence + return 3; + } + else if (0xF0 <= u && u <= 0xF4) + { + // 4-byte sequence + return 4; + } + else + { + // u >= 0xF5 + // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8 + return 0; + } +} + +static int +is_legal_utf8(const unsigned char *bytes, int length) +{ + if (0 == bytes || 0 == length) return 0; + + unsigned char a; + const unsigned char* srcptr = bytes + length; + switch (length) + { + default: + return 0; + // Everything else falls through when true. + case 4: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + /* FALLTHRU */ + case 3: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + /* FALLTHRU */ + case 2: + a = (*--srcptr); + switch (*bytes) + { + case 0xE0: + if (a < 0xA0 || a > 0xBF) return 0; + break; + case 0xED: + if (a < 0x80 || a > 0x9F) return 0; + break; + case 0xF0: + if (a < 0x90 || a > 0xBF) return 0; + break; + case 0xF4: + if (a < 0x80 || a > 0x8F) return 0; + break; + default: + if (a < 0x80 || a > 0xBF) return 0; + break; + } + /* FALLTHRU */ + case 1: + if (*bytes >= 0x80 && *bytes < 0xC2) return 0; + } + return *bytes <= 0xF4; +} + +static int +read_utf8(json_stream* json, int next_char) +{ + int count = utf8_seq_length(next_char); + if (!count) + { + json_error(json, "%s", "invalid UTF-8 character"); + return -1; + } + + char buffer[4]; + buffer[0] = next_char; + int i; + for (i = 1; i < count; ++i) + { + buffer[i] = json->source.get(&json->source); + } + + if (!is_legal_utf8((unsigned char*) buffer, count)) + { + json_error(json, "%s", "invalid UTF-8 text"); + return -1; + } + + for (i = 0; i < count; ++i) + { + if (pushchar(json, buffer[i]) != 0) + return -1; + } + return 0; +} + +static enum json_type +read_string(json_stream *json) +{ + if (init_string(json) != 0) + return JSON_ERROR; + while (1) { + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal"); + return JSON_ERROR; + } else if (c == '"') { + if (pushchar(json, '\0') == 0) + return JSON_STRING; + else + return JSON_ERROR; + } else if (c == '\\') { + if (read_escaped(json) != 0) + return JSON_ERROR; + } else if ((unsigned) c >= 0x80) { + if (read_utf8(json, c) != 0) + return JSON_ERROR; + } else { + if (char_needs_escaping(c)) { + json_error(json, "%s", "unescaped control character in string"); + return JSON_ERROR; + } + + if (pushchar(json, c) != 0) + return JSON_ERROR; + } + } + return JSON_ERROR; +} + +static int +is_digit(int c) +{ + return c >= 48 /*0*/ && c <= 57 /*9*/; +} + +static int +read_digits(json_stream *json) +{ + int c; + unsigned nread = 0; + while (is_digit(c = json->source.peek(&json->source))) { + if (pushchar(json, json->source.get(&json->source)) != 0) + return -1; + + nread++; + } + + if (nread == 0) { + if (c != EOF) { + json_error(json, "expected digit instead of byte '%c'", c); + } else { + json_error(json, "%s", "expected digit instead of end of text"); + } + return -1; + } + + return 0; +} + +static enum json_type +read_number(json_stream *json, int c) +{ + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (c == '-') { + c = json->source.get(&json->source); + if (is_digit(c)) { + return read_number(json, c); + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c' in number", c); + } else { + json_error(json, "%s", "unexpected end of text in number"); + } + return JSON_ERROR; + } + } else if (strchr("123456789", c) != NULL) { + c = json->source.peek(&json->source); + if (is_digit(c)) { + if (read_digits(json) != 0) + return JSON_ERROR; + } + } + /* Up to decimal or exponent has been read. */ + c = json->source.peek(&json->source); + if (strchr(".eE", c) == NULL) { + if (pushchar(json, '\0') != 0) + return JSON_ERROR; + else + return JSON_NUMBER; + } + if (c == '.') { + json->source.get(&json->source); // consume . + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (read_digits(json) != 0) + return JSON_ERROR; + } + /* Check for exponent. */ + c = json->source.peek(&json->source); + if (c == 'e' || c == 'E') { + json->source.get(&json->source); // consume e/E + if (pushchar(json, c) != 0) + return JSON_ERROR; + c = json->source.peek(&json->source); + if (c == '+' || c == '-') { + json->source.get(&json->source); // consume + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (read_digits(json) != 0) + return JSON_ERROR; + } else if (is_digit(c)) { + if (read_digits(json) != 0) + return JSON_ERROR; + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c' in number", c); + } else { + json_error(json, "%s", "unexpected end of text in number"); + } + return JSON_ERROR; + } + } + if (pushchar(json, '\0') != 0) + return JSON_ERROR; + else + return JSON_NUMBER; +} + +bool +json_isspace(int c) +{ + switch (c) { + case 0x09: + case 0x0a: + case 0x0d: + case 0x20: + return true; + } + + return false; +} + +/* Returns the next non-whitespace character in the stream. */ +static int next(json_stream *json) +{ + int c; + while (json_isspace(c = json->source.get(&json->source))) + if (c == '\n') + json->lineno++; + return c; +} + +static enum json_type +read_value(json_stream *json, int c) +{ + json->ntokens++; + switch (c) { + case EOF: + json_error(json, "%s", "unexpected end of text"); + return JSON_ERROR; + case '{': + return push(json, JSON_OBJECT); + case '[': + return push(json, JSON_ARRAY); + case '"': + return read_string(json); + case 'n': + return is_match(json, "ull", JSON_NULL); + case 'f': + return is_match(json, "alse", JSON_FALSE); + case 't': + return is_match(json, "rue", JSON_TRUE); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + if (init_string(json) != 0) + return JSON_ERROR; + return read_number(json, c); + default: + json_error(json, "unexpected byte '%c' in value", c); + return JSON_ERROR; + } +} + +enum json_type json_peek(json_stream *json) +{ + enum json_type next; + if (json->next) + next = json->next; + else + next = json->next = json_next(json); + return next; +} + +enum json_type json_next(json_stream *json) +{ + if (json->flags & JSON_FLAG_ERROR) + return JSON_ERROR; + if (json->next != 0) { + enum json_type next = json->next; + json->next = (enum json_type)0; + return next; + } + if (json->ntokens > 0 && json->stack_top == (size_t)-1) { + + /* In the streaming mode leave any trailing whitespaces in the stream. + * This allows the user to validate any desired separation between + * values (such as newlines) using json_source_get/peek() with any + * remaining whitespaces ignored as leading when we parse the next + * value. */ + if (!(json->flags & JSON_FLAG_STREAMING)) { + int c; + + do { + c = json->source.peek(&json->source); + if (json_isspace(c)) { + c = json->source.get(&json->source); + } + } while (json_isspace(c)); + + if (c != EOF) { + json_error(json, "expected end of text instead of byte '%c'", c); + return JSON_ERROR; + } + } + + return JSON_DONE; + } + int c = next(json); + if (json->stack_top == (size_t)-1) { + if (c == EOF && (json->flags & JSON_FLAG_STREAMING)) + return JSON_DONE; + + return read_value(json, c); + } + if (json->stack[json->stack_top].type == JSON_ARRAY) { + if (json->stack[json->stack_top].count == 0) { + if (c == ']') { + return pop(json, c, JSON_ARRAY); + } + json->stack[json->stack_top].count++; + return read_value(json, c); + } else if (c == ',') { + json->stack[json->stack_top].count++; + return read_value(json, next(json)); + } else if (c == ']') { + return pop(json, c, JSON_ARRAY); + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c'", c); + } else { + json_error(json, "%s", "unexpected end of text"); + } + return JSON_ERROR; + } + } else if (json->stack[json->stack_top].type == JSON_OBJECT) { + if (json->stack[json->stack_top].count == 0) { + if (c == '}') { + return pop(json, c, JSON_OBJECT); + } + + /* No member name/value pairs yet. */ + enum json_type value = read_value(json, c); + if (value != JSON_STRING) { + if (value != JSON_ERROR) + json_error(json, "%s", "expected member name or '}'"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return value; + } + } else if ((json->stack[json->stack_top].count % 2) == 0) { + /* Expecting comma followed by member name. */ + if (c != ',' && c != '}') { + json_error(json, "%s", "expected ',' or '}' after member value"); + return JSON_ERROR; + } else if (c == '}') { + return pop(json, c, JSON_OBJECT); + } else { + enum json_type value = read_value(json, next(json)); + if (value != JSON_STRING) { + if (value != JSON_ERROR) + json_error(json, "%s", "expected member name"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return value; + } + } + } else if ((json->stack[json->stack_top].count % 2) == 1) { + /* Expecting colon followed by value. */ + if (c != ':') { + json_error(json, "%s", "expected ':' after member name"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return read_value(json, next(json)); + } + } + } + json_error(json, "%s", "invalid parser state"); + return JSON_ERROR; +} + +void json_reset(json_stream *json) +{ + json->stack_top = -1; + json->ntokens = 0; + json->flags &= ~JSON_FLAG_ERROR; + json->errmsg[0] = '\0'; +} + +enum json_type json_skip(json_stream *json) +{ + enum json_type type = json_next(json); + size_t cnt_arr = 0; + size_t cnt_obj = 0; + + for (enum json_type skip = type; ; skip = json_next(json)) { + if (skip == JSON_ERROR || skip == JSON_DONE) + return skip; + + if (skip == JSON_ARRAY) { + ++cnt_arr; + } else if (skip == JSON_ARRAY_END && cnt_arr > 0) { + --cnt_arr; + } else if (skip == JSON_OBJECT) { + ++cnt_obj; + } else if (skip == JSON_OBJECT_END && cnt_obj > 0) { + --cnt_obj; + } + + if (!cnt_arr && !cnt_obj) + break; + } + + return type; +} + +enum json_type json_skip_until(json_stream *json, enum json_type type) +{ + while (1) { + enum json_type skip = json_skip(json); + + if (skip == JSON_ERROR || skip == JSON_DONE) + return skip; + + if (skip == type) + break; + } + + return type; +} + +const char *json_get_string(json_stream *json, size_t *length) +{ + if (length != NULL) + *length = json->data.string_fill; + if (json->data.string == NULL) + return ""; + else + return json->data.string; +} + +double json_get_number(json_stream *json) +{ + char *p = json->data.string; + return p == NULL ? 0 : strtod(p, NULL); +} + +const char *json_get_error(json_stream *json) +{ + return json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL; +} + +size_t json_get_lineno(json_stream *json) +{ + return json->lineno; +} + +size_t json_get_position(json_stream *json) +{ + return json->source.position; +} + +size_t json_get_depth(json_stream *json) +{ + return json->stack_top + 1; +} + +/* Return the current parsing context, that is, JSON_OBJECT if we are inside + an object, JSON_ARRAY if we are inside an array, and JSON_DONE if we are + not yet/anymore in either. + + Additionally, for the first two cases, also return the number of parsing + events that have already been observed at this level with json_next/peek(). + In particular, inside an object, an odd number would indicate that the just + observed JSON_STRING event is a member name. +*/ +enum json_type json_get_context(json_stream *json, size_t *count) +{ + if (json->stack_top == (size_t)-1) + return JSON_DONE; + + if (count != NULL) + *count = json->stack[json->stack_top].count; + + return json->stack[json->stack_top].type; +} + +int json_source_get(json_stream *json) +{ + int c = json->source.get(&json->source); + if (c == '\n') + json->lineno++; + return c; +} + +int json_source_peek(json_stream *json) +{ + return json->source.peek(&json->source); +} + +void json_open_buffer(json_stream *json, const void *buffer, size_t size) +{ + init(json); + json->source.get = buffer_get; + json->source.peek = buffer_peek; + json->source.source.buffer.buffer = (const char *)buffer; + json->source.source.buffer.length = size; +} + +void json_open_string(json_stream *json, const char *string) +{ + json_open_buffer(json, string, strlen(string)); +} + +void json_open_stream(json_stream *json, FILE * stream) +{ + init(json); + json->source.get = stream_get; + json->source.peek = stream_peek; + json->source.source.stream.stream = stream; +} + +static int user_get(struct json_source *json) +{ + int c = json->source.user.get(json->source.user.ptr); + if (c != EOF) + json->position++; + return c; +} + +static int user_peek(struct json_source *json) +{ + return json->source.user.peek(json->source.user.ptr); +} + +void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user) +{ + init(json); + json->source.get = user_get; + json->source.peek = user_peek; + json->source.source.user.ptr = user; + json->source.source.user.get = get; + json->source.source.user.peek = peek; +} + +void json_set_allocator(json_stream *json, json_allocator *a) +{ + json->alloc = *a; +} + +void json_set_streaming(json_stream *json, bool streaming) +{ + if (streaming) + json->flags |= JSON_FLAG_STREAMING; + else + json->flags &= ~JSON_FLAG_STREAMING; +} + +void json_close(json_stream *json) +{ + json->alloc.free(json->stack); + json->alloc.free(json->data.string); +} diff --git a/pdjson.h b/pdjson.h new file mode 100644 index 0000000..c0262d1 --- /dev/null +++ b/pdjson.h @@ -0,0 +1,117 @@ +#ifndef PDJSON_H +#define PDJSON_H + +#ifndef PDJSON_SYMEXPORT +# define PDJSON_SYMEXPORT +#endif + +#ifdef __cplusplus +extern "C" { +#else +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #include +#else + #ifndef bool + #define bool int + #define true 1 + #define false 0 + #endif /* bool */ +#endif /* __STDC_VERSION__ */ +#endif /* __cplusplus */ + +#include + +enum json_type { + JSON_ERROR = 1, JSON_DONE, + JSON_OBJECT, JSON_OBJECT_END, JSON_ARRAY, JSON_ARRAY_END, + JSON_STRING, JSON_NUMBER, JSON_TRUE, JSON_FALSE, JSON_NULL +}; + +struct json_allocator { + void *(*malloc)(size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); +}; + +typedef int (*json_user_io)(void *user); + +typedef struct json_stream json_stream; +typedef struct json_allocator json_allocator; + +PDJSON_SYMEXPORT void json_open_buffer(json_stream *json, const void *buffer, size_t size); +PDJSON_SYMEXPORT void json_open_string(json_stream *json, const char *string); +PDJSON_SYMEXPORT void json_open_stream(json_stream *json, FILE *stream); +PDJSON_SYMEXPORT void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user); +PDJSON_SYMEXPORT void json_close(json_stream *json); + +PDJSON_SYMEXPORT void json_set_allocator(json_stream *json, json_allocator *a); +PDJSON_SYMEXPORT void json_set_streaming(json_stream *json, bool mode); + +PDJSON_SYMEXPORT enum json_type json_next(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_peek(json_stream *json); +PDJSON_SYMEXPORT void json_reset(json_stream *json); +PDJSON_SYMEXPORT const char *json_get_string(json_stream *json, size_t *length); +PDJSON_SYMEXPORT double json_get_number(json_stream *json); + +PDJSON_SYMEXPORT enum json_type json_skip(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_skip_until(json_stream *json, enum json_type type); + +PDJSON_SYMEXPORT size_t json_get_lineno(json_stream *json); +PDJSON_SYMEXPORT size_t json_get_position(json_stream *json); +PDJSON_SYMEXPORT size_t json_get_depth(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_get_context(json_stream *json, size_t *count); +PDJSON_SYMEXPORT const char *json_get_error(json_stream *json); + +PDJSON_SYMEXPORT int json_source_get(json_stream *json); +PDJSON_SYMEXPORT int json_source_peek(json_stream *json); +PDJSON_SYMEXPORT bool json_isspace(int c); + +/* internal */ + +struct json_source { + int (*get)(struct json_source *); + int (*peek)(struct json_source *); + size_t position; + union { + struct { + FILE *stream; + } stream; + struct { + const char *buffer; + size_t length; + } buffer; + struct { + void *ptr; + json_user_io get; + json_user_io peek; + } user; + } source; +}; + +struct json_stream { + size_t lineno; + + struct json_stack *stack; + size_t stack_top; + size_t stack_size; + enum json_type next; + unsigned flags; + + struct { + char *string; + size_t string_fill; + size_t string_size; + } data; + + size_t ntokens; + + struct json_source source; + struct json_allocator alloc; + char errmsg[128]; +}; + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif