From be33f78109e835be2f5c5c46d005284b2f0cc000 Mon Sep 17 00:00:00 2001 From: Quinn Date: Wed, 3 Sep 2025 22:28:53 +0200 Subject: [PATCH] Use `void *` over `u8 *`, to be more explicit we're working with raw data, and not just bytes. The main problem with all my buffer-parsing code so-far is that they often... always break strict aliasing rules. Using a `void *` will make these bugs more explicit / noticable. --- makefile | 2 +- src/dat/mcx.c | 22 +++++++++++----------- src/dat/mcx.h | 8 ++++---- src/dat/nbt.c | 32 ++++++++++++++++---------------- src/dat/nbt.h | 4 ++-- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/makefile b/makefile index 6447b42..35b9d1d 100644 --- a/makefile +++ b/makefile @@ -14,7 +14,7 @@ MARCH ?= $(shell uname -m) KERNEL ?= $(shell uname -s | tr '[:upper:]' '[:lower:]') # compilation flags -CFLAGS += -c -std=gnu99 -Wall -Wextra -Wpedantic -MMD -MP +CFLAGS += -c -std=gnu99 -Wall -Wextra -Wpedantic -MMD -MP -Wno-pointer-arith LDFLAGS += -flto # architecture/OS detection diff --git a/src/dat/mcx.c b/src/dat/mcx.c index dfd05f2..34d8f1b 100644 --- a/src/dat/mcx.c +++ b/src/dat/mcx.c @@ -16,7 +16,7 @@ #define CHUNKS 0x400 // amount of chunks in a file /* Moves chunks `src_s` to `src_e` (inclusive) from `src`, back onto `dst`. */ -static void mvchunks(u8 *restrict buf, u8 *src, u8 *dst, int src_s, int src_e) { +static void mvchunks(void *restrict buf, void *src, void *dst, int src_s, int src_e) { assert(src > dst); u32 *table = (u32 *)buf; size_t len = src - dst; // acquire the amount of bytes that we shall move @@ -34,7 +34,7 @@ static void mvchunks(u8 *restrict buf, u8 *src, u8 *dst, int src_s, int src_e) { /* Deletes chunk `sidx` by moving chunks up to `eidx` back over `sidx` in `buf`. * `rmb` is an optional additional offset that can be applied, and signifies bytes already removed. * Returns the bytes removed by this function. */ -static size_t delchunk(u8 *restrict buf, size_t rmb, int sidx, int eidx) { +static size_t delchunk(void *restrict buf, size_t rmb, int sidx, int eidx) { // load the table data u32 *table = (u32 *)buf; size_t slen, bidx, blen; @@ -47,8 +47,8 @@ static size_t delchunk(u8 *restrict buf, size_t rmb, int sidx, int eidx) { table[sidx + CHUNKS] = htobe32(time(NULL)); // assign the current time to the timestamp, for correctness NOTE: might need to zero-out instead // move the succeeding chunks over the deleted chunk - u8 *dst = buf + bidx - rmb; - u8 *src = buf + bidx + blen; + void *dst = buf + bidx - rmb; + void *src = buf + bidx + blen; mvchunks(buf, src, dst, sidx, eidx - 1); return blen; } @@ -56,15 +56,15 @@ static size_t delchunk(u8 *restrict buf, size_t rmb, int sidx, int eidx) { /* Just call `delchunk` with the parameters and some defaults. * This is done instead of `delchunk` being globally linked, because * `delchunk` requests more specific parameters, which is confusing outside this module. */ -size_t mcx_delchunk(u8 *restrict buf, int chunk) { +size_t mcx_delchunk(void *restrict buf, int chunk) { return delchunk(buf, 0, chunk, CHUNKS); } -size_t mcx_delchunk_range(u8 *restrict buf, int start, int end) { +size_t mcx_delchunk_range(void *restrict buf, int start, int end) { assert(start < end && end < CHUNKS); u32 *table = (u32 *)buf; - u8 *dst = buf + (be32toh(table[start]) >> 8) * SECTOR; - u8 *src = buf + (be32toh(table[end]) >> 8) * SECTOR; + void *dst = buf + (be32toh(table[start]) >> 8) * SECTOR; + void *src = buf + (be32toh(table[end]) >> 8) * SECTOR; src += (be32toh(table[end]) & 0xFF) * SECTOR; // zeroes-out the chunk data within this range. (and set the timestamp) @@ -89,7 +89,7 @@ static int cmp_chunkids(const void *restrict x, const void *restrict y) { /* Sorts the chunks marked for deletion from smallest to greatest index. * Then performs the deletion in this order. Making sure to only update the chunks up to the next. */ -size_t mcx_delchunk_bulk(u8 *restrict buf, const u16 *restrict chunks, int chunkc) { +size_t mcx_delchunk_bulk(void *restrict buf, const u16 *restrict chunks, int chunkc) { // ensure the chunks ids we're working on are sorted from least to greatest u16 chunkids[chunkc + 1]; memcpy(chunkids, chunks, chunkc); @@ -104,9 +104,9 @@ size_t mcx_delchunk_bulk(u8 *restrict buf, const u16 *restrict chunks, int chunk /* Sum together the 4th byte in each location integer to compute the sector size of all chunks. * Multiplying by `SECTOR`, and adding the size of the table itself. */ -size_t mcx_calcsize(const u8 *restrict buf) { +size_t mcx_calcsize(const void *restrict buf) { size_t size = 0; for (uint i = 0; i < CHUNKS; i++) - size += *(buf + (i * 4) + 3); + size += *(u8 *)(buf + (i * 4) + 3); return (size * CHUNKS) + TABLE; } diff --git a/src/dat/mcx.h b/src/dat/mcx.h index c4d94eb..c733e80 100644 --- a/src/dat/mcx.h +++ b/src/dat/mcx.h @@ -18,20 +18,20 @@ struct mcx_chunk { * The chunk's location data shall become `0`, and timestamp data the current time. * All succeeding chunks shall be moved back, freeing space. * Returns the amount of bytes removed. */ -size_t mcx_delchunk(u8 *restrict buf, int chunk) NONNULL((1)); +size_t mcx_delchunk(void *restrict buf, int chunk) NONNULL((1)); /* Deletes the range defined by `start`—`end` (inclusive) of chunks out of `buf`. * The chunk's location data shall become `0`, and timestamp data the current time. * All succeeding chunks shall be moved back, freeing space. * Returns the amount of bytes removed */ -size_t mcx_delchunk_range(u8 *restrict buf, int start, int end) NONNULL((1)); +size_t mcx_delchunk_range(void *restrict buf, int start, int end) NONNULL((1)); /* Deletes a `chunkc` chunks from `chunks` out of `buf`. * If the `chunks` indices are known to be sequential, i.e. have a constant difference of `1`, `mcx_delchunk_range` should be preferred. * The chunk's location data shall become `0`, and timestamp data the current time. * All succeeding chunks shall be moved back, freeing space. * Returns the amount of bytes removed */ -size_t mcx_delchunk_bulk(u8 *restrict buf, const u16 *restrict chunks, int chunkc) NONNULL((1, 2)); +size_t mcx_delchunk_bulk(void *restrict buf, const u16 *restrict chunks, int chunkc) NONNULL((1, 2)); /* Computes the byte size of the `*.mcX` file in `buf` and returns it. */ -size_t mcx_calcsize(const u8 *restrict buf) NONNULL((1)) PURE; +size_t mcx_calcsize(const void *restrict buf) NONNULL((1)) PURE; diff --git a/src/dat/nbt.c b/src/dat/nbt.c index d8de46c..c62df93 100644 --- a/src/dat/nbt.c +++ b/src/dat/nbt.c @@ -15,7 +15,7 @@ /* Processes the incoming array data in `buf`. Which contains `nmem` items of `size`. * The data shall be converted to little-endian on little-endian systems * Outputs the allocated data to `out`, returns where the next pointer would be. */ -static const u8 *procarr(const u8 *restrict buf, i32 nmem, uint size, struct nbt_array *restrict *restrict out) { +static const void *procarr(const void *restrict buf, i32 nmem, uint size, struct nbt_array *restrict *restrict out) { size_t len = nmem * size; *out = malloc(sizeof(struct nbt_array) + len); if (!*out) return buf + len; @@ -43,12 +43,12 @@ static const u8 *procarr(const u8 *restrict buf, i32 nmem, uint size, struct nbt } /* calls `procarr` for the simple types available. */ -static const u8 *proclist(const u8 *restrict buf, struct nbt_array *restrict *restrict out) { +static const void *proclist(const void *restrict buf, struct nbt_array *restrict *restrict out) { uint size; *out = NULL; - switch (*buf) { + switch (*(u8 *)buf) { case NBT_I8: size = 1; break; case NBT_I16: size = 2; break; case NBT_I32: // fall through @@ -64,15 +64,15 @@ static const u8 *proclist(const u8 *restrict buf, struct nbt_array *restrict *re return procarr(buf, len, size, out); } -const u8 *nbt_proctag(const u8 *restrict buf, u16 slen, void *restrict out) { - const u8 *ptr, *tmp; +const void *nbt_proctag(const void *restrict buf, u16 slen, void *restrict out) { + const void *ptr, *tmp; ptr = buf + 3 + slen; i32 nmem; uint size; - switch (*buf) { - case NBT_I8: *(u8 *)out = *ptr; return ptr + 1; + switch (*(u8 *)buf) { + case NBT_I8: *(u8 *)out = *(u8 *)ptr; return ptr + 1; case NBT_I16: *(u16 *)out = be16toh(*(u16 *)ptr); return ptr + 2; case NBT_I32: // fall through case NBT_F32: *(u32 *)out = be16toh(*(u32 *)ptr); return ptr + 4; @@ -99,10 +99,10 @@ const u8 *nbt_proctag(const u8 *restrict buf, u16 slen, void *restrict out) { * `ptr` is assumed to be the start of the `NBT_LIST` data, e.i. The list's ID, followed by the list's length. * If `ID` is `NBT_I8`, `NBT_I16`, `NBT_I32`, `NBT_I64`, `NBT_F32`, or `NBT_F64`, the entire list length is computed and returned. * For other types this won't be possible, and thus will add `1` to `dpt`, and write the list data to `lens` and `tags` at this new `dpt`. */ -static const u8 *nexttag_list(const u8 *restrict ptr, uint *restrict const dpt, i32 *restrict const lens, u8 *restrict const tags) { - const u8 *tag = ptr; +static const void *nexttag_list(const void *restrict ptr, uint *restrict const dpt, i32 *restrict const lens, u8 *restrict const tags) { + const void *tag = ptr; ptr++; - switch (*tag) { + switch (*(u8 *)tag) { case NBT_END: break; case NBT_I8: ptr += (i32)be32toh(*(u32 *)ptr) * 1; break; case NBT_I16: ptr += (i32)be32toh(*(u32 *)ptr) * 2; break; @@ -113,7 +113,7 @@ static const u8 *nexttag_list(const u8 *restrict ptr, uint *restrict const dpt, default: // TODO: handle out of bounds... Might not be required if we use flexible array member (*dpt)++; - tags[*dpt] = *tag; + tags[*dpt] = *(u8 *)tag; lens[*dpt] = (i32)be32toh(*(u32 *)ptr); break; } @@ -127,15 +127,15 @@ static const u8 *nexttag_list(const u8 *restrict ptr, uint *restrict const dpt, * - `lens` shall contain `MAX_DEPTH` of items representing the list length, if the current item is non-zero we shall assume we're in a list. * Where the value is decremented until we reach `0`. * - `tags` shall contain `MAX_DEPTH` of items representing the list's stored type. */ -static const u8 *nexttag(const u8 *restrict tag, uint *restrict const dpt, i32 *restrict const lens, u8 *restrict const tags) { +static const void *nexttag(const void *restrict tag, uint *restrict const dpt, i32 *restrict const lens, u8 *restrict const tags) { u8 type; - const u8 *ptr = tag; + const void *ptr = tag; if (lens[*dpt]) { type = tags[*dpt]; lens[*dpt]--; *dpt -= !lens[*dpt]; } else { - type = *tag; + type = *(u8 *)tag; ptr += be16toh(*(u16 *)(tag + 1)) + 3; } @@ -169,8 +169,8 @@ static const u8 *nexttag(const u8 *restrict tag, uint *restrict const dpt, i32 * * - compound:list:int32 * - string */ -const u8 *nbt_nexttag(const u8 *restrict buf) { - const u8 *tag; +const void *nbt_nexttag(const void *restrict buf) { + const void *tag; u8 tags[MAX_DEPTH] = {0}; i32 lens[MAX_DEPTH] = {0}; uint dpt = 0; diff --git a/src/dat/nbt.h b/src/dat/nbt.h index ddb85af..8e8f3d3 100644 --- a/src/dat/nbt.h +++ b/src/dat/nbt.h @@ -49,8 +49,8 @@ struct nbt_array { * if `buf` points to `NBT_I8`, `NBT_I16`, `NBT_I32`, `NBT_I64`, `NBT_F32`, or `NBT_F64`, `*out` is assumed * to have the available byte width for one of these types. In the case of `NBT_ARR*` and `NBT_LIST` * it must point to a `struct nbt_array*`. Where in the case of `NBT_LIST`, it must be one of the previous static-width types. */ -const u8 *nbt_proctag(const u8 *restrict buf, u16 slen, void *restrict out) NONNULL((1, 3)); +const void *nbt_proctag(const void *restrict buf, u16 slen, void *restrict out) NONNULL((1, 3)); /* searches for the end of a named tag without processing data, the final pointer is returned. * `NULL` is returned upon failure, the otherwise returned pointer is not guaranteed to be valid. */ -const u8 *nbt_nexttag(const u8 *restrict buf) NONNULL((1)) PURE; +const void *nbt_nexttag(const void *restrict buf) NONNULL((1)) PURE;