From 55f357eb8818f02a40e4908f159f2086daf3dc88 Mon Sep 17 00:00:00 2001 From: cat Date: Sun, 25 May 2025 17:39:17 +1000 Subject: [PATCH] finally put this under version control --- .gitignore | 3 + Makefile | 33 ++ README | 35 ++ config.h | 5 + mvwt.c | 58 ++++ mvwtc.c | 221 ++++++++++++ mvwtimg.c | 119 +++++++ pdjson.c | 992 +++++++++++++++++++++++++++++++++++++++++++++++++++++ pdjson.h | 117 +++++++ 9 files changed, 1583 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README create mode 100644 config.h create mode 100644 mvwt.c create mode 100644 mvwtc.c create mode 100644 mvwtimg.c create mode 100644 pdjson.c create mode 100644 pdjson.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2694c55 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +mvwt +mvwtc +mvwtimg diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7d139d7 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +PREFIX ?= /usr/local + +# UNCOMMENT FOR DYNAMIC LINKING +CFLAGS := $(CFLAGS) +SKA := + +# UNCOMMENT FOR STATIC LINKING +# CFLAGS := $(CFLAGS) -static -fpic -Wl,-static -fdata-sections -ffunction-sections -Wl,--gc-sections -s +# SKA := /usr/lib/libskarnet.a # you might have to change this location + +LDFLAGS := $(LDFLAGS) -lskarnet + +all: mvwt mvwtc mvwtimg + +mvwt: mvwt.c $(SKA) + +mvwtc: mvwtc.c pdjson.c $(SKA) + +mvwtimg: mvwtimg.c $(SKA) + +.PHONY: install +install: all + mkdir -p $(DESTDIR)$(PREFIX)/bin + cp -f mvwt $(DESTDIR)$(PREFIX)/bin + chmod 755 $(DESTDIR)$(PREFIX)/bin/mvwt + cp -f mvwtc $(DESTDIR)$(PREFIX)/bin + chmod 755 $(DESTDIR)$(PREFIX)/bin/mvwtc + cp -f mvwtimg $(DESTDIR)$(PREFIX)/bin + chmod 755 $(DESTDIR)$(PREFIX)/bin/mvwtimg + +.PHONY: clean +clean: + -rm -f mvwt mvwtc mvwtimg diff --git a/README b/README new file mode 100644 index 0000000..999c64c --- /dev/null +++ b/README @@ -0,0 +1,35 @@ +minimum viable walltaker client based on skalibs. +(mr skarnet if u see this i am sorry for using your nice systems programming library to make goonware) + +simple usage ($ID is your walltaker link id): +``` +while true; do + mvwt $ID + sleep 10 +done +``` +the cachedir can be set with the -c flag; +the default cachedir is /tmp/mvwt (changeable in config.h) +the cachedir must already exist, so mkdir it if it doesn't exist already. + +build time dependency: + a build of skalibs (if you want static compilation, modify the makefile as per the comments) +runtime dependencies: + s6-networking (this program uses ucspi for network communication) + hsetroot (used to actually set the wallpaper) + +pdjson.{c,h} is a bundled public domain cjson parser available from here: https://github.com/skeeto/pdjson + +you might want to remove the `current` file in the cachedir before every +time you start using mvwt; +if the file already exists and your wallpaper is still the same as it +was before, it won't be set again. +(this is because mvwt doesn't have any constant internal state, +it's meant to be run in a cron job or a shell loop and reads `current` +and compares it to walltaker's API response +to check if it needs to download and change the wallpaper) + +mvwtc and mvwtimg shouldn't be used directly. they expect to be on +a ucspi connection and are spawned by mvwt. + +public domain i guess. do whatever you want with this diff --git a/config.h b/config.h new file mode 100644 index 0000000..fdd9049 --- /dev/null +++ b/config.h @@ -0,0 +1,5 @@ +/* s6-networking's TLS programs need either CADIR or CAFILE to be set + * to function; mvwt.c sets the envvar $CADIR to CADIR_DIR. + * you might have to change this location depending on your system. */ +#define CADIR_DIR "/etc/ssl/certs" +#define CACHE_DIR "/tmp/mvwt" diff --git a/mvwt.c b/mvwt.c new file mode 100644 index 0000000..b5a14a7 --- /dev/null +++ b/mvwt.c @@ -0,0 +1,58 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#define USAGE "mvwt [-c cachedir] id" +#define WALLTAKER_URL "walltaker.joi.how" + +int +main(int argc, char *const *argv) +{ + int opt; + subgetopt l = SUBGETOPT_ZERO; + stralloc envmod = STRALLOC_ZERO; + + PROG = "mvwt"; + const char *cachedir = CACHE_DIR; + + while ((opt = subgetopt_r(argc, (const char * const*) argv, "c:", &l)) != -1) { + switch (opt) { + case 'c': /* wallpaper cache directory */ + cachedir = l.arg; + break; + default: + printf("switch %d", argc); + strerr_dieusage(100, USAGE); + break; + } + } + argc -= l.ind; + argv += l.ind; + if (argc != 1) { + printf("argc %d", argc); + strerr_dieusage(100, USAGE); + } + + if (chdir(cachedir) < 0) + strerr_diefu(111, "chdir to cache dir " + "(mkdir it if it doesn't exist)"); + + char const *wtc_argv[] = { "s6-tlsclient", "-N", "-Z", "--", + WALLTAKER_URL, "443", "mvwtc", argv[0], 0 }; + + if (!env_addmodif(&envmod, "CADIR", CADIR_DIR)) + strerr_diefu(111, "set CADIR"); + + xmexec_m(wtc_argv, envmod.s, envmod.len); + + return 111; /* uh oh! */ +} diff --git a/mvwtc.c b/mvwtc.c new file mode 100644 index 0000000..158115a --- /dev/null +++ b/mvwtc.c @@ -0,0 +1,221 @@ +/* full of jank */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pdjson.h" + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#define USAGE "mvwtc id" +#define HTTPOK "HTTP/1.1 200 OK" + +void +url_hpn(stralloc *url, stralloc *host, stralloc *path, stralloc *name) +{ + char *afterscheme; + size_t urli, eohost, lastslash; + + urli = byte_chr(url->s, url->len, '/'); + urli += 2; + + afterscheme = url->s + urli; + eohost = byte_chr(url->s + urli, url->len - urli, '/'); + stralloc_catb(host, url->s + urli, eohost); + stralloc_0(host); + + stralloc_cats(path, url->s + urli + eohost); + stralloc_0(host); + + lastslash = byte_rchr(url->s, url->len, '/'); + stralloc_cats(name, url->s + lastslash + 1); + stralloc_0(host); +} + +/* '\n' → '\0' */ +void +sa_nltoz(stralloc *sa) +{ + size_t idx; + + idx = byte_chr(sa->s, sa->len, '\n'); + if (idx < sa->len) { + sa->s[idx] = '\0'; + sa->len = idx; + } +} + +/* '\0' → '\n' */ +void +sa_ztonl(stralloc *sa) +{ + size_t idx; + + idx = byte_chr(sa->s, sa->len, '\0'); + if (idx < sa->len) { + sa->s[idx] = '\n'; + sa->len = idx + 1; + } +} + +/* img url in sa → url */ +int +imgpath(stralloc *url, stralloc *sa) +{ + json_stream j; + enum json_type t; + const char *key, *val; + + json_open_buffer(&j, sa->s, sa->len); + json_set_streaming(&j, 0); + + t = json_next(&j); + if (t != JSON_OBJECT) { + json_close(&j); + return -1; + } + + while ((t = json_next(&j)) != JSON_DONE) { + switch (t) { + case JSON_STRING: + key = json_get_string(&j, NULL); + if (!strncmp(key, "post_url", MIN(strlen(key), + strlen("post_url")))) { + /* i'm not sure why there need to be two of these */ + t = json_next(&j); + t = json_next(&j); + val = json_get_string(&j, NULL); + url->len = 0; + stralloc_cats(url, val); + stralloc_0(url); + return 0; + } + break; + case JSON_OBJECT_END: + goto done; + break; + } + } +done: + return -1; +} + +int +http_recv(int fd, stralloc *sa) +{ + char obuf[BUFFER_INSIZE]; + buffer outb = BUFFER_INIT(&buffer_read, fd, obuf, BUFFER_INSIZE); + if (skagetln(&outb, sa, '\n') < 0) + return -1; + if (sa->len < sizeof(HTTPOK) || !strncmp(sa->s, HTTPOK, sizeof(HTTPOK))) + return -1; + sa->len = 0; + + /* skip to the end of the header */ + for (;;) { + if (skagetln(&outb, sa, '\n') < 0) + return -1; + if (!strncmp(sa->s, "\r\n", 2)) { + break; + } + sa->len = 0; + } + + sa->len = 0; + skagetln(&outb, sa, '\n'); + // todo handle fuckery here + stralloc_0(sa); + + return 0; +} + +int +http_send(int fd, char *id) +{ + char buf[BUFFER_OUTSIZE]; + + buffer b = BUFFER_INIT(&buffer_write, fd, buf, BUFFER_OUTSIZE); + + buffer_putsnoflush(&b, "GET /api/links/"); + buffer_putsnoflush(&b, id); + buffer_putsnoflush(&b, ".json HTTP/1.0\r\n"); + buffer_putsnoflush(&b, "Host: walltaker.joi.how\r\n"); + buffer_putsnoflush(&b, "Connection: close\r\n"); + buffer_putsnoflush(&b, "User-Agent: mvwt\r\n\r\n"); + buffer_flush(&b); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + stralloc resp = STRALLOC_ZERO; + stralloc img = STRALLOC_ZERO; + stralloc currentimg = STRALLOC_ZERO; + stralloc tmp = STRALLOC_ZERO; + stralloc imhost = STRALLOC_ZERO; + stralloc impath = STRALLOC_ZERO; + stralloc imfile = STRALLOC_ZERO; + PROG = "mvwtc"; + + if (argc != 2) + strerr_dieusage(100, USAGE); + + if (http_send(7, argv[1]) < 0) + strerr_diefu(111, "send http request"); + if (http_recv(6, &resp) < 0) { + stralloc_free(&resp); + strerr_diefu(111, "recieve http response"); + } + fd_shutdown(7, 1); + fd_close(7); + fd_shutdown(6, 0); + fd_close(6); + + if (imgpath(&img, &resp) < 0) { + stralloc_free(&img); + stralloc_free(&resp); + strerr_dief(111, "json seems mangled"); + } + stralloc_free(&resp); + + if (opengetlnclose("current", ¤timg, '\n') >= 0) { + sa_nltoz(¤timg); + if (!strncmp(currentimg.s, img.s, + MIN(currentimg.len, img.len))) { + /* we don't need to do anything, just exit */ + stralloc_free(¤timg); + stralloc_free(&img); + return 0; + } + } + + stralloc_copy(&tmp, &img); + sa_ztonl(&tmp); + if (!openwritenclose_suffix6("current", tmp.s, tmp.len, NULL, 0, "~")) { + stralloc_free(&tmp); + stralloc_free(¤timg); + stralloc_free(&img); + strerr_diefu(111, "write to current"); + } + stralloc_free(&tmp); + + url_hpn(&img, &imhost, &impath, &imfile); + + char const *wtimg_argv[] = { "s6-tlsclient", "-N", "--", + imhost.s, "443", "mvwtimg", imhost.s, impath.s, imfile.s, 0 }; + + xexec(wtimg_argv); + + return 111; +} diff --git a/mvwtimg.c b/mvwtimg.c new file mode 100644 index 0000000..94d01d1 --- /dev/null +++ b/mvwtimg.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define USAGE "mvwtimg host path filename" +#define HTTPOK "HTTP/1.1 200 OK" + +int +download(int infd, char *path) +{ + int outfd; + + outfd = open_trunc(path); + if (outfd < 0) + return -1; + + if (fd_cat(infd, outfd) < 0) { + fd_close(outfd); + return -1; + } + + fd_sync(outfd); + fd_close(outfd); + + return 0; +} + +int +fdgetln(int fd, stralloc *sa, int sep) +{ + char s[1]; + ssize_t r; + + sa->len = 0; + for (;;) { + r = fd_read(fd, s, 1); + if (r < 0) + return -1; + if (s[0] == EOF) + break; + stralloc_append(sa, s[0]); + if (s[0] == sep) + break; + } + return 0; +} + +int +http_recv_header(int fd) +{ + stralloc sa = STRALLOC_ZERO; + + if (fdgetln(fd, &sa, '\n') < 0) + return -1; + if (sa.len < sizeof(HTTPOK) || !strncmp(sa.s, HTTPOK, sizeof(HTTPOK))) + return -1; + sa.len = 0; + + for (;;) { + if (fdgetln(fd, &sa, '\n') < 0) + return -1; + if (!strncmp(sa.s, "\r\n", 2)) { + break; + } + sa.len = 0; + } + + stralloc_free(&sa); + + return 0; +} + +int +http_send(int fd, char *path, char *host) +{ + char buf[BUFFER_OUTSIZE]; + + buffer b = BUFFER_INIT(&buffer_write, fd, buf, BUFFER_OUTSIZE); + + buffer_putsnoflush(&b, "GET "); + buffer_putsnoflush(&b, path); + buffer_putsnoflush(&b, " HTTP/1.0\r\n"); + buffer_putsnoflush(&b, "Host: "); + buffer_putsnoflush(&b, host); + buffer_putsnoflush(&b, "\r\nConnection: close\r\n"); + buffer_putsnoflush(&b, "User-Agent: mvwt\r\n\r\n"); + buffer_flush(&b); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + PROG = "mvwtimg"; + + if (argc != 4) + strerr_dieusage(100, USAGE); + + if (http_send(7, argv[2], argv[1]) < 0) + strerr_diefu(111, "send http request"); + if (http_recv_header(6) < 0) + strerr_diefu(111, "recieve image header"); + if (download(6, argv[3]) < 0) + strerr_diefu(111, "downloading and saving image"); + fd_shutdown(7, 1); + fd_close(7); + fd_shutdown(6, 0); + fd_close(6); + + char const *hsetroot_argv[] = { "hsetroot", "-cover", argv[3], 0 }; + xexec(hsetroot_argv); + return 0; +} diff --git a/pdjson.c b/pdjson.c new file mode 100644 index 0000000..474fcb5 --- /dev/null +++ b/pdjson.c @@ -0,0 +1,992 @@ +#ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200112L +#elif _POSIX_C_SOURCE < 200112L +# error incompatible _POSIX_C_SOURCE level +#endif + +#include +#include +#include + +#ifndef PDJSON_H +# include "pdjson.h" +#endif + +#define JSON_FLAG_ERROR (1u << 0) +#define JSON_FLAG_STREAMING (1u << 1) + +#if defined(_MSC_VER) && (_MSC_VER < 1900) + +#define json_error(json, format, ...) \ + if (!(json->flags & JSON_FLAG_ERROR)) { \ + json->flags |= JSON_FLAG_ERROR; \ + _snprintf_s(json->errmsg, sizeof(json->errmsg), \ + _TRUNCATE, \ + format, \ + __VA_ARGS__); \ + } \ + +#else + +#define json_error(json, format, ...) \ + if (!(json->flags & JSON_FLAG_ERROR)) { \ + json->flags |= JSON_FLAG_ERROR; \ + snprintf(json->errmsg, sizeof(json->errmsg), \ + format, \ + __VA_ARGS__); \ + } \ + +#endif /* _MSC_VER */ + +/* See also PDJSON_STACK_MAX below. */ +#ifndef PDJSON_STACK_INC +# define PDJSON_STACK_INC 4 +#endif + +struct json_stack { + enum json_type type; + long count; +}; + +static enum json_type +push(json_stream *json, enum json_type type) +{ + json->stack_top++; + +#ifdef PDJSON_STACK_MAX + if (json->stack_top > PDJSON_STACK_MAX) { + json_error(json, "%s", "maximum depth of nesting reached"); + return JSON_ERROR; + } +#endif + + if (json->stack_top >= json->stack_size) { + struct json_stack *stack; + size_t size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack); + stack = (struct json_stack *)json->alloc.realloc(json->stack, size); + if (stack == NULL) { + json_error(json, "%s", "out of memory"); + return JSON_ERROR; + } + + json->stack_size += PDJSON_STACK_INC; + json->stack = stack; + } + + json->stack[json->stack_top].type = type; + json->stack[json->stack_top].count = 0; + + return type; +} + +/* Note: c is assumed not to be EOF. */ +static enum json_type +pop(json_stream *json, int c, enum json_type expected) +{ + if (json->stack == NULL || json->stack[json->stack_top].type != expected) { + json_error(json, "unexpected byte '%c'", c); + return JSON_ERROR; + } + json->stack_top--; + return expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END; +} + +static int buffer_peek(struct json_source *source) +{ + if (source->position < source->source.buffer.length) + return source->source.buffer.buffer[source->position]; + else + return EOF; +} + +static int buffer_get(struct json_source *source) +{ + int c = source->peek(source); + if (c != EOF) + source->position++; + return c; +} + +static int stream_get(struct json_source *source) +{ + int c = fgetc(source->source.stream.stream); + if (c != EOF) + source->position++; + return c; +} + +static int stream_peek(struct json_source *source) +{ + int c = fgetc(source->source.stream.stream); + ungetc(c, source->source.stream.stream); + return c; +} + +static void init(json_stream *json) +{ + json->lineno = 1; + json->flags = JSON_FLAG_STREAMING; + json->errmsg[0] = '\0'; + json->ntokens = 0; + json->next = (enum json_type)0; + + json->stack = NULL; + json->stack_top = -1; + json->stack_size = 0; + + json->data.string = NULL; + json->data.string_size = 0; + json->data.string_fill = 0; + json->source.position = 0; + + json->alloc.malloc = malloc; + json->alloc.realloc = realloc; + json->alloc.free = free; +} + +static enum json_type +is_match(json_stream *json, const char *pattern, enum json_type type) +{ + int c; + for (const char *p = pattern; *p; p++) { + if (*p != (c = json->source.get(&json->source))) { + if (c != EOF) { + json_error(json, "expected '%c' instead of byte '%c'", *p, c); + } else { + json_error(json, "expected '%c' instead of end of text", *p); + } + return JSON_ERROR; + } + } + return type; +} + +static int pushchar(json_stream *json, int c) +{ + if (json->data.string_fill == json->data.string_size) { + size_t size = json->data.string_size * 2; + char *buffer = (char *)json->alloc.realloc(json->data.string, size); + if (buffer == NULL) { + json_error(json, "%s", "out of memory"); + return -1; + } else { + json->data.string_size = size; + json->data.string = buffer; + } + } + json->data.string[json->data.string_fill++] = c; + return 0; +} + +static int init_string(json_stream *json) +{ + json->data.string_fill = 0; + if (json->data.string == NULL) { + json->data.string_size = 1024; + json->data.string = (char *)json->alloc.malloc(json->data.string_size); + if (json->data.string == NULL) { + json_error(json, "%s", "out of memory"); + return -1; + } + } + json->data.string[0] = '\0'; + return 0; +} + +static int encode_utf8(json_stream *json, unsigned long c) +{ + if (c < 0x80UL) { + return pushchar(json, c); + } else if (c < 0x0800UL) { + return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else if (c < 0x010000UL) { + if (c >= 0xd800 && c <= 0xdfff) { + json_error(json, "invalid codepoint %06lx", c); + return -1; + } + return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) && + (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else if (c < 0x110000UL) { + return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) && + (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && + (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); + } else { + json_error(json, "unable to encode %06lx as UTF-8", c); + return -1; + } +} + +static int hexchar(int c) +{ + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': + case 'A': return 10; + case 'b': + case 'B': return 11; + case 'c': + case 'C': return 12; + case 'd': + case 'D': return 13; + case 'e': + case 'E': return 14; + case 'f': + case 'F': return 15; + default: + return -1; + } +} + +static long +read_unicode_cp(json_stream *json) +{ + long cp = 0; + int shift = 12; + + for (size_t i = 0; i < 4; i++) { + int c = json->source.get(&json->source); + int hc; + + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if ((hc = hexchar(c)) == -1) { + json_error(json, "invalid escape Unicode byte '%c'", c); + return -1; + } + + cp += hc * (1 << shift); + shift -= 4; + } + + + return cp; +} + +static int read_unicode(json_stream *json) +{ + long cp, h, l; + + if ((cp = read_unicode_cp(json)) == -1) { + return -1; + } + + if (cp >= 0xd800 && cp <= 0xdbff) { + /* This is the high portion of a surrogate pair; we need to read the + * lower portion to get the codepoint + */ + h = cp; + + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if (c != '\\') { + json_error(json, "invalid continuation for surrogate pair '%c', " + "expected '\\'", c); + return -1; + } + + c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in Unicode"); + return -1; + } else if (c != 'u') { + json_error(json, "invalid continuation for surrogate pair '%c', " + "expected 'u'", c); + return -1; + } + + if ((l = read_unicode_cp(json)) == -1) { + return -1; + } + + if (l < 0xdc00 || l > 0xdfff) { + json_error(json, "surrogate pair continuation \\u%04lx out " + "of range (dc00-dfff)", l); + return -1; + } + + cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); + } else if (cp >= 0xdc00 && cp <= 0xdfff) { + json_error(json, "dangling surrogate \\u%04lx", cp); + return -1; + } + + return encode_utf8(json, cp); +} + +static int +read_escaped(json_stream *json) +{ + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal in escape"); + return -1; + } else if (c == 'u') { + if (read_unicode(json) != 0) + return -1; + } else { + switch (c) { + case '\\': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case '/': + case '"': + { + const char *codes = "\\bfnrt/\""; + const char *p = strchr(codes, c); + if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0) + return -1; + } + break; + default: + json_error(json, "invalid escaped byte '%c'", c); + return -1; + } + } + return 0; +} + +static int +char_needs_escaping(int c) +{ + if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) { + return 1; + } + + return 0; +} + +static int +utf8_seq_length(char byte) +{ + unsigned char u = (unsigned char) byte; + if (u < 0x80) return 1; + + if (0x80 <= u && u <= 0xBF) + { + // second, third or fourth byte of a multi-byte + // sequence, i.e. a "continuation byte" + return 0; + } + else if (u == 0xC0 || u == 0xC1) + { + // overlong encoding of an ASCII byte + return 0; + } + else if (0xC2 <= u && u <= 0xDF) + { + // 2-byte sequence + return 2; + } + else if (0xE0 <= u && u <= 0xEF) + { + // 3-byte sequence + return 3; + } + else if (0xF0 <= u && u <= 0xF4) + { + // 4-byte sequence + return 4; + } + else + { + // u >= 0xF5 + // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8 + return 0; + } +} + +static int +is_legal_utf8(const unsigned char *bytes, int length) +{ + if (0 == bytes || 0 == length) return 0; + + unsigned char a; + const unsigned char* srcptr = bytes + length; + switch (length) + { + default: + return 0; + // Everything else falls through when true. + case 4: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + /* FALLTHRU */ + case 3: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + /* FALLTHRU */ + case 2: + a = (*--srcptr); + switch (*bytes) + { + case 0xE0: + if (a < 0xA0 || a > 0xBF) return 0; + break; + case 0xED: + if (a < 0x80 || a > 0x9F) return 0; + break; + case 0xF0: + if (a < 0x90 || a > 0xBF) return 0; + break; + case 0xF4: + if (a < 0x80 || a > 0x8F) return 0; + break; + default: + if (a < 0x80 || a > 0xBF) return 0; + break; + } + /* FALLTHRU */ + case 1: + if (*bytes >= 0x80 && *bytes < 0xC2) return 0; + } + return *bytes <= 0xF4; +} + +static int +read_utf8(json_stream* json, int next_char) +{ + int count = utf8_seq_length(next_char); + if (!count) + { + json_error(json, "%s", "invalid UTF-8 character"); + return -1; + } + + char buffer[4]; + buffer[0] = next_char; + int i; + for (i = 1; i < count; ++i) + { + buffer[i] = json->source.get(&json->source); + } + + if (!is_legal_utf8((unsigned char*) buffer, count)) + { + json_error(json, "%s", "invalid UTF-8 text"); + return -1; + } + + for (i = 0; i < count; ++i) + { + if (pushchar(json, buffer[i]) != 0) + return -1; + } + return 0; +} + +static enum json_type +read_string(json_stream *json) +{ + if (init_string(json) != 0) + return JSON_ERROR; + while (1) { + int c = json->source.get(&json->source); + if (c == EOF) { + json_error(json, "%s", "unterminated string literal"); + return JSON_ERROR; + } else if (c == '"') { + if (pushchar(json, '\0') == 0) + return JSON_STRING; + else + return JSON_ERROR; + } else if (c == '\\') { + if (read_escaped(json) != 0) + return JSON_ERROR; + } else if ((unsigned) c >= 0x80) { + if (read_utf8(json, c) != 0) + return JSON_ERROR; + } else { + if (char_needs_escaping(c)) { + json_error(json, "%s", "unescaped control character in string"); + return JSON_ERROR; + } + + if (pushchar(json, c) != 0) + return JSON_ERROR; + } + } + return JSON_ERROR; +} + +static int +is_digit(int c) +{ + return c >= 48 /*0*/ && c <= 57 /*9*/; +} + +static int +read_digits(json_stream *json) +{ + int c; + unsigned nread = 0; + while (is_digit(c = json->source.peek(&json->source))) { + if (pushchar(json, json->source.get(&json->source)) != 0) + return -1; + + nread++; + } + + if (nread == 0) { + if (c != EOF) { + json_error(json, "expected digit instead of byte '%c'", c); + } else { + json_error(json, "%s", "expected digit instead of end of text"); + } + return -1; + } + + return 0; +} + +static enum json_type +read_number(json_stream *json, int c) +{ + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (c == '-') { + c = json->source.get(&json->source); + if (is_digit(c)) { + return read_number(json, c); + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c' in number", c); + } else { + json_error(json, "%s", "unexpected end of text in number"); + } + return JSON_ERROR; + } + } else if (strchr("123456789", c) != NULL) { + c = json->source.peek(&json->source); + if (is_digit(c)) { + if (read_digits(json) != 0) + return JSON_ERROR; + } + } + /* Up to decimal or exponent has been read. */ + c = json->source.peek(&json->source); + if (strchr(".eE", c) == NULL) { + if (pushchar(json, '\0') != 0) + return JSON_ERROR; + else + return JSON_NUMBER; + } + if (c == '.') { + json->source.get(&json->source); // consume . + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (read_digits(json) != 0) + return JSON_ERROR; + } + /* Check for exponent. */ + c = json->source.peek(&json->source); + if (c == 'e' || c == 'E') { + json->source.get(&json->source); // consume e/E + if (pushchar(json, c) != 0) + return JSON_ERROR; + c = json->source.peek(&json->source); + if (c == '+' || c == '-') { + json->source.get(&json->source); // consume + if (pushchar(json, c) != 0) + return JSON_ERROR; + if (read_digits(json) != 0) + return JSON_ERROR; + } else if (is_digit(c)) { + if (read_digits(json) != 0) + return JSON_ERROR; + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c' in number", c); + } else { + json_error(json, "%s", "unexpected end of text in number"); + } + return JSON_ERROR; + } + } + if (pushchar(json, '\0') != 0) + return JSON_ERROR; + else + return JSON_NUMBER; +} + +bool +json_isspace(int c) +{ + switch (c) { + case 0x09: + case 0x0a: + case 0x0d: + case 0x20: + return true; + } + + return false; +} + +/* Returns the next non-whitespace character in the stream. */ +static int next(json_stream *json) +{ + int c; + while (json_isspace(c = json->source.get(&json->source))) + if (c == '\n') + json->lineno++; + return c; +} + +static enum json_type +read_value(json_stream *json, int c) +{ + json->ntokens++; + switch (c) { + case EOF: + json_error(json, "%s", "unexpected end of text"); + return JSON_ERROR; + case '{': + return push(json, JSON_OBJECT); + case '[': + return push(json, JSON_ARRAY); + case '"': + return read_string(json); + case 'n': + return is_match(json, "ull", JSON_NULL); + case 'f': + return is_match(json, "alse", JSON_FALSE); + case 't': + return is_match(json, "rue", JSON_TRUE); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + if (init_string(json) != 0) + return JSON_ERROR; + return read_number(json, c); + default: + json_error(json, "unexpected byte '%c' in value", c); + return JSON_ERROR; + } +} + +enum json_type json_peek(json_stream *json) +{ + enum json_type next; + if (json->next) + next = json->next; + else + next = json->next = json_next(json); + return next; +} + +enum json_type json_next(json_stream *json) +{ + if (json->flags & JSON_FLAG_ERROR) + return JSON_ERROR; + if (json->next != 0) { + enum json_type next = json->next; + json->next = (enum json_type)0; + return next; + } + if (json->ntokens > 0 && json->stack_top == (size_t)-1) { + + /* In the streaming mode leave any trailing whitespaces in the stream. + * This allows the user to validate any desired separation between + * values (such as newlines) using json_source_get/peek() with any + * remaining whitespaces ignored as leading when we parse the next + * value. */ + if (!(json->flags & JSON_FLAG_STREAMING)) { + int c; + + do { + c = json->source.peek(&json->source); + if (json_isspace(c)) { + c = json->source.get(&json->source); + } + } while (json_isspace(c)); + + if (c != EOF) { + json_error(json, "expected end of text instead of byte '%c'", c); + return JSON_ERROR; + } + } + + return JSON_DONE; + } + int c = next(json); + if (json->stack_top == (size_t)-1) { + if (c == EOF && (json->flags & JSON_FLAG_STREAMING)) + return JSON_DONE; + + return read_value(json, c); + } + if (json->stack[json->stack_top].type == JSON_ARRAY) { + if (json->stack[json->stack_top].count == 0) { + if (c == ']') { + return pop(json, c, JSON_ARRAY); + } + json->stack[json->stack_top].count++; + return read_value(json, c); + } else if (c == ',') { + json->stack[json->stack_top].count++; + return read_value(json, next(json)); + } else if (c == ']') { + return pop(json, c, JSON_ARRAY); + } else { + if (c != EOF) { + json_error(json, "unexpected byte '%c'", c); + } else { + json_error(json, "%s", "unexpected end of text"); + } + return JSON_ERROR; + } + } else if (json->stack[json->stack_top].type == JSON_OBJECT) { + if (json->stack[json->stack_top].count == 0) { + if (c == '}') { + return pop(json, c, JSON_OBJECT); + } + + /* No member name/value pairs yet. */ + enum json_type value = read_value(json, c); + if (value != JSON_STRING) { + if (value != JSON_ERROR) + json_error(json, "%s", "expected member name or '}'"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return value; + } + } else if ((json->stack[json->stack_top].count % 2) == 0) { + /* Expecting comma followed by member name. */ + if (c != ',' && c != '}') { + json_error(json, "%s", "expected ',' or '}' after member value"); + return JSON_ERROR; + } else if (c == '}') { + return pop(json, c, JSON_OBJECT); + } else { + enum json_type value = read_value(json, next(json)); + if (value != JSON_STRING) { + if (value != JSON_ERROR) + json_error(json, "%s", "expected member name"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return value; + } + } + } else if ((json->stack[json->stack_top].count % 2) == 1) { + /* Expecting colon followed by value. */ + if (c != ':') { + json_error(json, "%s", "expected ':' after member name"); + return JSON_ERROR; + } else { + json->stack[json->stack_top].count++; + return read_value(json, next(json)); + } + } + } + json_error(json, "%s", "invalid parser state"); + return JSON_ERROR; +} + +void json_reset(json_stream *json) +{ + json->stack_top = -1; + json->ntokens = 0; + json->flags &= ~JSON_FLAG_ERROR; + json->errmsg[0] = '\0'; +} + +enum json_type json_skip(json_stream *json) +{ + enum json_type type = json_next(json); + size_t cnt_arr = 0; + size_t cnt_obj = 0; + + for (enum json_type skip = type; ; skip = json_next(json)) { + if (skip == JSON_ERROR || skip == JSON_DONE) + return skip; + + if (skip == JSON_ARRAY) { + ++cnt_arr; + } else if (skip == JSON_ARRAY_END && cnt_arr > 0) { + --cnt_arr; + } else if (skip == JSON_OBJECT) { + ++cnt_obj; + } else if (skip == JSON_OBJECT_END && cnt_obj > 0) { + --cnt_obj; + } + + if (!cnt_arr && !cnt_obj) + break; + } + + return type; +} + +enum json_type json_skip_until(json_stream *json, enum json_type type) +{ + while (1) { + enum json_type skip = json_skip(json); + + if (skip == JSON_ERROR || skip == JSON_DONE) + return skip; + + if (skip == type) + break; + } + + return type; +} + +const char *json_get_string(json_stream *json, size_t *length) +{ + if (length != NULL) + *length = json->data.string_fill; + if (json->data.string == NULL) + return ""; + else + return json->data.string; +} + +double json_get_number(json_stream *json) +{ + char *p = json->data.string; + return p == NULL ? 0 : strtod(p, NULL); +} + +const char *json_get_error(json_stream *json) +{ + return json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL; +} + +size_t json_get_lineno(json_stream *json) +{ + return json->lineno; +} + +size_t json_get_position(json_stream *json) +{ + return json->source.position; +} + +size_t json_get_depth(json_stream *json) +{ + return json->stack_top + 1; +} + +/* Return the current parsing context, that is, JSON_OBJECT if we are inside + an object, JSON_ARRAY if we are inside an array, and JSON_DONE if we are + not yet/anymore in either. + + Additionally, for the first two cases, also return the number of parsing + events that have already been observed at this level with json_next/peek(). + In particular, inside an object, an odd number would indicate that the just + observed JSON_STRING event is a member name. +*/ +enum json_type json_get_context(json_stream *json, size_t *count) +{ + if (json->stack_top == (size_t)-1) + return JSON_DONE; + + if (count != NULL) + *count = json->stack[json->stack_top].count; + + return json->stack[json->stack_top].type; +} + +int json_source_get(json_stream *json) +{ + int c = json->source.get(&json->source); + if (c == '\n') + json->lineno++; + return c; +} + +int json_source_peek(json_stream *json) +{ + return json->source.peek(&json->source); +} + +void json_open_buffer(json_stream *json, const void *buffer, size_t size) +{ + init(json); + json->source.get = buffer_get; + json->source.peek = buffer_peek; + json->source.source.buffer.buffer = (const char *)buffer; + json->source.source.buffer.length = size; +} + +void json_open_string(json_stream *json, const char *string) +{ + json_open_buffer(json, string, strlen(string)); +} + +void json_open_stream(json_stream *json, FILE * stream) +{ + init(json); + json->source.get = stream_get; + json->source.peek = stream_peek; + json->source.source.stream.stream = stream; +} + +static int user_get(struct json_source *json) +{ + int c = json->source.user.get(json->source.user.ptr); + if (c != EOF) + json->position++; + return c; +} + +static int user_peek(struct json_source *json) +{ + return json->source.user.peek(json->source.user.ptr); +} + +void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user) +{ + init(json); + json->source.get = user_get; + json->source.peek = user_peek; + json->source.source.user.ptr = user; + json->source.source.user.get = get; + json->source.source.user.peek = peek; +} + +void json_set_allocator(json_stream *json, json_allocator *a) +{ + json->alloc = *a; +} + +void json_set_streaming(json_stream *json, bool streaming) +{ + if (streaming) + json->flags |= JSON_FLAG_STREAMING; + else + json->flags &= ~JSON_FLAG_STREAMING; +} + +void json_close(json_stream *json) +{ + json->alloc.free(json->stack); + json->alloc.free(json->data.string); +} diff --git a/pdjson.h b/pdjson.h new file mode 100644 index 0000000..c0262d1 --- /dev/null +++ b/pdjson.h @@ -0,0 +1,117 @@ +#ifndef PDJSON_H +#define PDJSON_H + +#ifndef PDJSON_SYMEXPORT +# define PDJSON_SYMEXPORT +#endif + +#ifdef __cplusplus +extern "C" { +#else +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #include +#else + #ifndef bool + #define bool int + #define true 1 + #define false 0 + #endif /* bool */ +#endif /* __STDC_VERSION__ */ +#endif /* __cplusplus */ + +#include + +enum json_type { + JSON_ERROR = 1, JSON_DONE, + JSON_OBJECT, JSON_OBJECT_END, JSON_ARRAY, JSON_ARRAY_END, + JSON_STRING, JSON_NUMBER, JSON_TRUE, JSON_FALSE, JSON_NULL +}; + +struct json_allocator { + void *(*malloc)(size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); +}; + +typedef int (*json_user_io)(void *user); + +typedef struct json_stream json_stream; +typedef struct json_allocator json_allocator; + +PDJSON_SYMEXPORT void json_open_buffer(json_stream *json, const void *buffer, size_t size); +PDJSON_SYMEXPORT void json_open_string(json_stream *json, const char *string); +PDJSON_SYMEXPORT void json_open_stream(json_stream *json, FILE *stream); +PDJSON_SYMEXPORT void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user); +PDJSON_SYMEXPORT void json_close(json_stream *json); + +PDJSON_SYMEXPORT void json_set_allocator(json_stream *json, json_allocator *a); +PDJSON_SYMEXPORT void json_set_streaming(json_stream *json, bool mode); + +PDJSON_SYMEXPORT enum json_type json_next(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_peek(json_stream *json); +PDJSON_SYMEXPORT void json_reset(json_stream *json); +PDJSON_SYMEXPORT const char *json_get_string(json_stream *json, size_t *length); +PDJSON_SYMEXPORT double json_get_number(json_stream *json); + +PDJSON_SYMEXPORT enum json_type json_skip(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_skip_until(json_stream *json, enum json_type type); + +PDJSON_SYMEXPORT size_t json_get_lineno(json_stream *json); +PDJSON_SYMEXPORT size_t json_get_position(json_stream *json); +PDJSON_SYMEXPORT size_t json_get_depth(json_stream *json); +PDJSON_SYMEXPORT enum json_type json_get_context(json_stream *json, size_t *count); +PDJSON_SYMEXPORT const char *json_get_error(json_stream *json); + +PDJSON_SYMEXPORT int json_source_get(json_stream *json); +PDJSON_SYMEXPORT int json_source_peek(json_stream *json); +PDJSON_SYMEXPORT bool json_isspace(int c); + +/* internal */ + +struct json_source { + int (*get)(struct json_source *); + int (*peek)(struct json_source *); + size_t position; + union { + struct { + FILE *stream; + } stream; + struct { + const char *buffer; + size_t length; + } buffer; + struct { + void *ptr; + json_user_io get; + json_user_io peek; + } user; + } source; +}; + +struct json_stream { + size_t lineno; + + struct json_stack *stack; + size_t stack_top; + size_t stack_size; + enum json_type next; + unsigned flags; + + struct { + char *string; + size_t string_fill; + size_t string_size; + } data; + + size_t ntokens; + + struct json_source source; + struct json_allocator alloc; + char errmsg[128]; +}; + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif