fix: violating strict aliasing rules in most areas in the new code.

Yes, I am aware there are plenty of violations in `conf.c`, but I'll
likely fix/rewrite those when I will use it. Since there are some other
changes I think I'll want to make.
This commit is contained in:
2025-09-04 11:13:07 +02:00
parent 3a0704710c
commit b13de01f24
3 changed files with 85 additions and 56 deletions

View File

@@ -11,15 +11,14 @@
#include "../util/intdef.h" #include "../util/intdef.h"
#define TABLE 0x2000 // table byte size
#define SECTOR 0x1000 // sector size #define SECTOR 0x1000 // sector size
#define TABLE 0x800 // table (total) element count
#define CHUNKS 0x400 // amount of chunks in a file #define CHUNKS 0x400 // amount of chunks in a file
/* Moves chunks `src_s` to `src_e` (inclusive) from `src`, back onto `dst`. */ /* Moves chunks `src_s` to `src_e` (inclusive) from `src`, back onto `dst`. */
static void mvchunks(u8 *restrict buf, u8 *src, u8 *dst, int src_s, int src_e) { static void mvchunks(u32 *restrict table, u8 *src, u8 *dst, int src_s, int src_e) {
assert(src > dst); assert(src > dst);
u32 *table = (u32 *)buf; // BUG: strict aliasing size_t len = src - dst; // acquire the amount of bytes that we shall move
size_t len = src - dst; // acquire the amount of bytes that we shall move
assert(!(len % SECTOR)); assert(!(len % SECTOR));
// count how many bytes we need to move, whilst updating location data // count how many bytes we need to move, whilst updating location data
@@ -34,9 +33,8 @@ static void mvchunks(u8 *restrict buf, u8 *src, u8 *dst, int src_s, int src_e) {
/* Deletes chunk `sidx` by moving chunks up to `eidx` back over `sidx` in `buf`. /* Deletes chunk `sidx` by moving chunks up to `eidx` back over `sidx` in `buf`.
* `rmb` is an optional additional offset that can be applied, and signifies bytes already removed. * `rmb` is an optional additional offset that can be applied, and signifies bytes already removed.
* Returns the bytes removed by this function. */ * Returns the bytes removed by this function. */
static size_t delchunk(u8 *restrict buf, size_t rmb, int sidx, int eidx) { static size_t delchunk(u8 *restrict buf, u32 *restrict table, size_t rmb, int sidx, int eidx) {
// load the table data // load the table data
u32 *table = (u32 *)buf; // BUG: strict aliasing
size_t slen, bidx, blen; size_t slen, bidx, blen;
slen = be32toh(table[sidx]) & 0xFF; // acquire the sector length of the chunk slen = be32toh(table[sidx]) & 0xFF; // acquire the sector length of the chunk
bidx = (be32toh(table[sidx]) >> 8) * SECTOR; // acquire and compute the byte offset the chunk starts at bidx = (be32toh(table[sidx]) >> 8) * SECTOR; // acquire and compute the byte offset the chunk starts at
@@ -49,20 +47,25 @@ static size_t delchunk(u8 *restrict buf, size_t rmb, int sidx, int eidx) {
// move the succeeding chunks over the deleted chunk // move the succeeding chunks over the deleted chunk
u8 *dst = buf + bidx - rmb; u8 *dst = buf + bidx - rmb;
u8 *src = buf + bidx + blen; u8 *src = buf + bidx + blen;
mvchunks(buf, src, dst, sidx, eidx - 1); mvchunks(table, src, dst, sidx, eidx - 1);
return blen; return blen;
} }
/* Just call `delchunk` with the parameters and some defaults. /* Call `delchunk` with the parameters and some defaults. Ensuring the table is copied correctly as well.
* This is done instead of `delchunk` being globally linked, because * This is done instead of `delchunk` being globally linked, because
* `delchunk` requests more specific parameters, which is confusing outside this module. */ * `delchunk` requests more specific parameters, which is confusing outside this module. */
size_t mcx_delchunk(u8 *restrict buf, int chunk) { size_t mcx_delchunk(u8 *restrict buf, int chunk) {
return delchunk(buf, 0, chunk, CHUNKS); u32 table[TABLE];
memcpy(table, buf, sizeof(table));
size_t res = delchunk(buf, table, 0, chunk, CHUNKS);
memcpy(buf, table, sizeof(table));
return res;
} }
size_t mcx_delchunk_range(u8 *restrict buf, int start, int end) { size_t mcx_delchunk_range(u8 *restrict buf, int start, int end) {
assert(start < end && end < CHUNKS); assert(start < end && end < CHUNKS);
u32 *table = (u32 *)buf; // BUG: strict aliasing u32 table[TABLE];
memcpy(table, buf, sizeof(table));
u8 *dst = buf + (be32toh(table[start]) >> 8) * SECTOR; u8 *dst = buf + (be32toh(table[start]) >> 8) * SECTOR;
u8 *src = buf + (be32toh(table[end]) >> 8) * SECTOR; u8 *src = buf + (be32toh(table[end]) >> 8) * SECTOR;
src += (be32toh(table[end]) & 0xFF) * SECTOR; src += (be32toh(table[end]) & 0xFF) * SECTOR;
@@ -70,13 +73,14 @@ size_t mcx_delchunk_range(u8 *restrict buf, int start, int end) {
// zeroes-out the chunk data within this range. (and set the timestamp) // zeroes-out the chunk data within this range. (and set the timestamp)
u32 ts = htobe32(time(NULL)); u32 ts = htobe32(time(NULL));
for (int i = start; i <= end; i++) { for (int i = start; i <= end; i++) {
table[i] = 0; // BUG: strict aliasing table[i] = 0;
table[i + CHUNKS] = ts; // BUG: strict aliasing table[i + CHUNKS] = ts;
} }
// move the remaining chunks down // move the remaining chunks down
if (end < (CHUNKS - 1)) if (end < (CHUNKS - 1))
mvchunks(buf, src, dst, end, (CHUNKS - 1)); mvchunks(table, src, dst, end, (CHUNKS - 1));
memcpy(buf, table, sizeof(table));
return src - dst; return src - dst;
} }
@@ -96,9 +100,14 @@ size_t mcx_delchunk_bulk(u8 *restrict buf, const u16 *restrict chunks, int chunk
qsort(chunkids, chunkc, sizeof(int), cmp_chunkids); qsort(chunkids, chunkc, sizeof(int), cmp_chunkids);
chunkids[chunkc] = CHUNKS; // set the spare chunk to the max chunks, so the rest of the chunks are moved chunkids[chunkc] = CHUNKS; // set the spare chunk to the max chunks, so the rest of the chunks are moved
u32 table[TABLE];
memcpy(table, buf, sizeof(table));
size_t rmb = 0; size_t rmb = 0;
for (int i = 0; i < chunkc; i++) for (int i = 0; i < chunkc; i++)
rmb += delchunk(buf, rmb, chunkids[i], chunkids[i + 1]); rmb += delchunk(buf, table, rmb, chunkids[i], chunkids[i + 1]);
memcpy(buf, table, sizeof(table));
return rmb; return rmb;
} }
@@ -108,5 +117,5 @@ size_t mcx_calcsize(const u8 *restrict buf) {
size_t size = 0; size_t size = 0;
for (uint i = 0; i < CHUNKS; i++) for (uint i = 0; i < CHUNKS; i++)
size += *(buf + (i * 4) + 3); size += *(buf + (i * 4) + 3);
return (size * CHUNKS) + TABLE; return (size * CHUNKS) + (TABLE * 4);
} }

View File

@@ -12,29 +12,52 @@
#define MAX_DEPTH 512 #define MAX_DEPTH 512
/* Extracts a big endian 16 bit integer from address `buf`, converts it to host byte size if needed and returns. */
static inline u16 buftoh16(const void *restrict buf) {
u16 i;
memcpy(&i, buf, sizeof(i));
return be16toh(i);
}
/* Extracts a big endian 32 bit integer from address `buf`, converts it to host byte size if needed and returns. */
static inline u32 buftoh32(const void *restrict buf) {
u32 i;
memcpy(&i, buf, sizeof(i));
return be32toh(i);
}
/* Extracts a big endian 64 bit integer from address `buf`, converts it to host byte size if needed and returns. */
static inline u64 buftoh64(const void *restrict buf) {
u64 i;
memcpy(&i, buf, sizeof(i));
return be64toh(i);
}
/* Processes the incoming array data in `buf`. Which contains `nmem` items of `size`. /* Processes the incoming array data in `buf`. Which contains `nmem` items of `size`.
* The data shall be converted to little-endian on little-endian systems * The data shall be converted to little-endian on little-endian systems
* Outputs the allocated data to `out`, returns where the next pointer would be. */ * Outputs the allocated data to `out`, returns where the next pointer would be. */
static const u8 *procarr(const u8 *restrict buf, i32 nmem, uint size, struct nbt_array *restrict *restrict out) { static const u8 *procarr(const u8 *restrict buf, i32 nmemb, uint size, struct nbt_array *restrict out) {
size_t len = nmem * size; size_t len = nmemb * size;
*out = malloc(sizeof(struct nbt_array) + len); *out = (struct nbt_array){
if (!*out) return buf + len; out->nmemb = nmemb,
out->dat = malloc(len),
};
if (!out->dat)
return buf + len;
memcpy((*out)->dat, buf, len); memcpy(out->dat, buf, len);
(*out)->len = nmem;
buf += len; buf += len;
/* Only include this code for little-endian systems. Since only they require this logic. /* Only include this code for little-endian systems. Since only they require this logic.
* Producing optimised code for other platforms. */ * Producing optimised code for other platforms. */
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
if (size == 1) return buf; if (size == 1) return buf;
size_t i = 0; i32 i = 0;
while (i < len) { while (i < nmemb) {
// BUG: strict aliasing
switch (size) { switch (size) {
case 2: *(u16 *)((*out)->dat + i) = be16toh(*(u16 *)((*out)->dat + i)); break; case 2: ((u16 *)out->dat)[i] = be16toh(((u16 *)out->dat)[i]); break;
case 4: *(u32 *)((*out)->dat + i) = be32toh(*(u32 *)((*out)->dat + i)); break; case 4: ((u32 *)out->dat)[i] = be16toh(((u32 *)out->dat)[i]); break;
case 8: *(u64 *)((*out)->dat + i) = be64toh(*(u64 *)((*out)->dat + i)); break; case 8: ((u64 *)out->dat)[i] = be16toh(((u64 *)out->dat)[i]); break;
default: __builtin_unreachable(); // this should be impossible default: __builtin_unreachable(); // this should be impossible
} }
i += size; i += size;
@@ -44,12 +67,10 @@ static const u8 *procarr(const u8 *restrict buf, i32 nmem, uint size, struct nbt
} }
/* calls `procarr` for the simple types available. */ /* calls `procarr` for the simple types available. */
static const u8 *proclist(const u8 *restrict buf, struct nbt_array *restrict *restrict out) { static const u8 *proclist(const u8 *restrict buf, struct nbt_array *restrict out) {
uint size; uint size;
*out = NULL; switch (*(u8 *)buf) {
switch (*buf) {
case NBT_I8: size = 1; break; case NBT_I8: size = 1; break;
case NBT_I16: size = 2; break; case NBT_I16: size = 2; break;
case NBT_I32: // fall through case NBT_I32: // fall through
@@ -60,7 +81,9 @@ static const u8 *proclist(const u8 *restrict buf, struct nbt_array *restrict *re
} }
buf++; buf++;
i32 len = (i32)be32toh(*(u32 *)buf); // BUG: strict aliasing i32 len;
memcpy(&len, buf, 4);
len = be32toh(len);
buf += 4; buf += 4;
return procarr(buf, len, size, out); return procarr(buf, len, size, out);
} }
@@ -72,28 +95,27 @@ const u8 *nbt_proctag(const u8 *restrict buf, u16 slen, void *restrict out) {
i32 nmem; i32 nmem;
uint size; uint size;
// BUG: strict aliasing
switch (*buf) { switch (*buf) {
case NBT_I8: *(u8 *)out = *ptr; return ptr + 1; case NBT_I8: *(u8 *)out = *ptr; return ptr + 1;
case NBT_I16: *(u16 *)out = be16toh(*(u16 *)ptr); return ptr + 2; case NBT_I16: *(u16 *)out = buftoh16(ptr); return ptr + 2;
case NBT_I32: // fall through case NBT_I32: // fall through
case NBT_F32: *(u32 *)out = be16toh(*(u32 *)ptr); return ptr + 4; case NBT_F32: *(u32 *)out = buftoh32(ptr); return ptr + 4;
case NBT_I64: // fall through case NBT_I64: // fall through
case NBT_F64: *(u64 *)out = be16toh(*(u64 *)ptr); return ptr + 8; case NBT_F64: *(u64 *)out = buftoh64(ptr); return ptr + 8;
case NBT_STR: nmem = be16toh(*(u16 *)ptr), size = 1, ptr += 2; break; case NBT_STR: nmem = buftoh16(ptr), size = 1, ptr += 2; break;
case NBT_ARR_I8: nmem = be32toh(*(u32 *)ptr), size = 1, ptr += 4; break; case NBT_ARR_I8: nmem = buftoh32(ptr), size = 1, ptr += 4; break;
case NBT_ARR_I32: nmem = be32toh(*(u32 *)ptr), size = 4, ptr += 4; break; case NBT_ARR_I32: nmem = buftoh32(ptr), size = 4, ptr += 4; break;
case NBT_ARR_I64: nmem = be32toh(*(u32 *)ptr), size = 8, ptr += 4; break; case NBT_ARR_I64: nmem = buftoh32(ptr), size = 8, ptr += 4; break;
case NBT_LIST: case NBT_LIST:
return proclist(ptr, (struct nbt_array **)out); return proclist(ptr, (struct nbt_array *)out);
return tmp; return tmp;
default: return NULL; default: return NULL;
} }
return procarr(ptr, nmem, size, (struct nbt_array **)out); return procarr(ptr, nmem, size, (struct nbt_array *)out);
} }
@@ -104,20 +126,19 @@ const u8 *nbt_proctag(const u8 *restrict buf, u16 slen, void *restrict out) {
static const u8 *nexttag_list(const u8 *restrict ptr, uint *restrict const dpt, i32 *restrict const lens, u8 *restrict const tags) { static const u8 *nexttag_list(const u8 *restrict ptr, uint *restrict const dpt, i32 *restrict const lens, u8 *restrict const tags) {
const u8 *tag = ptr; const u8 *tag = ptr;
ptr++; ptr++;
// BUG: strict aliasing
switch (*tag) { switch (*tag) {
case NBT_END: break; case NBT_END: break;
case NBT_I8: ptr += (i32)be32toh(*(u32 *)ptr) * 1; break; case NBT_I8: ptr += (i32)buftoh32(ptr) * 1; break;
case NBT_I16: ptr += (i32)be32toh(*(u32 *)ptr) * 2; break; case NBT_I16: ptr += (i32)buftoh32(ptr) * 2; break;
case NBT_I32: // fall through case NBT_I32: // fall through
case NBT_F32: ptr += (i32)be32toh(*(u32 *)ptr) * 4; break; case NBT_F32: ptr += (i32)buftoh32(ptr) * 4; break;
case NBT_I64: // fall through case NBT_I64: // fall through
case NBT_F64: ptr += (i32)be32toh(*(u32 *)ptr) * 8; break; case NBT_F64: ptr += (i32)buftoh32(ptr) * 8; break;
default: default:
// TODO: handle out of bounds... Might not be required if we use flexible array member // TODO: handle out of bounds... Might not be required if we use flexible array member
(*dpt)++; (*dpt)++;
tags[*dpt] = *tag; tags[*dpt] = *tag;
lens[*dpt] = (i32)be32toh(*(u32 *)ptr); // BUG: strict aliasing lens[*dpt] = (i32)buftoh32(ptr);
break; break;
} }
ptr += 4; ptr += 4;
@@ -139,10 +160,9 @@ static const u8 *nexttag(const u8 *restrict tag, uint *restrict const dpt, i32 *
*dpt -= !lens[*dpt]; *dpt -= !lens[*dpt];
} else { } else {
type = *tag; type = *tag;
ptr += be16toh(*(u16 *)(tag + 1)) + 3; // BUG: strict aliasing ptr += buftoh16(tag + 1) + 3;
} }
// BUG: strict aliasing
switch (type) { switch (type) {
case NBT_I8: ptr += 1; break; case NBT_I8: ptr += 1; break;
case NBT_I16: ptr += 2; break; case NBT_I16: ptr += 2; break;
@@ -151,10 +171,10 @@ static const u8 *nexttag(const u8 *restrict tag, uint *restrict const dpt, i32 *
case NBT_I64: // fall through case NBT_I64: // fall through
case NBT_F64: ptr += 8; break; case NBT_F64: ptr += 8; break;
case NBT_ARR_I8: ptr += 4 + (i32)be32toh(*(u32 *)ptr) * 1; break; case NBT_ARR_I8: ptr += 4 + (i32)buftoh32(ptr) * 1; break;
case NBT_ARR_I32: ptr += 4 + (i32)be32toh(*(u32 *)ptr) * 4; break; case NBT_ARR_I32: ptr += 4 + (i32)buftoh32(ptr) * 4; break;
case NBT_ARR_I64: ptr += 4 + (i32)be32toh(*(u32 *)ptr) * 8; break; case NBT_ARR_I64: ptr += 4 + (i32)buftoh32(ptr) * 8; break;
case NBT_STR: ptr += 2 + (u16)be16toh(*(u16 *)ptr) * 1; break; case NBT_STR: ptr += 2 + (u16)buftoh16(ptr) * 1; break;
case NBT_END: (*dpt)--; break; case NBT_END: (*dpt)--; break;
case NBT_COMPOUND: (*dpt)++; break; case NBT_COMPOUND: (*dpt)++; break;

View File

@@ -37,8 +37,8 @@ enum nbt_tagid {
}; };
struct nbt_array { struct nbt_array {
i32 len; i32 nmemb;
u8 dat[]; void *dat;
}; };