move endian.h to lib.h, to replace the system library.

Unix, and likely other systems ship with `endian.h`, from GNUC. This causes compatibility issues, so renamed the functions to be similar to the target header's macros. Where the system header is replaced to eliminate confusion.
write a function for skipping to the next NBT tag
2025-12-18 13:55:45 +01:00 · 2025-07-15 14:43:59 +02:00 · 2025-07-15 13:51:23 +02:00 · 2025-07-15 13:51:23 +02:00 · 2025-07-15 13:51:23 +02:00 · 2025-07-15 13:51:23 +02:00
4 changed files with 128 additions and 73 deletions
--- a/docs/mc-data-spec.md
+++ b/docs/mc-data-spec.md
@@ -8,20 +8,20 @@ note: UUID are stored as an integer array.

 ### tag types
 | ID    | tag name     | payload specification               |
-|-----:|:-------------|:------------------------------------|
-|  `0` | `end`        | -                                   |
-|  `1` | `byte`       | `int8_t`                            |
-|  `2` | `short`      | `int16_t` (BE[^be])                 |
-|  `3` | `int`        | `int32_t` (BE)                      |
-|  `4` | `long`       | `int64_t` (BE)                      |
-|  `5` | `float`      | `float`   (BE)                      |
-|  `6` | `double`     | `double`  (BE)                      |
-|  `7` | `byte array` | `int32_t`     (len) -> `int8_t`     |
-|  `8` | `string`     | `uint16_t`    (len) -> `UTF-8`      |
-|  `9` | `list`       | ID: `int32_t` (len) -> ID           |
-| `10` | `compound`   | list of tags delimited with end tag |
-| `11` | `int array`  | `int32_t`     (len) -> `int32_t`    |
-| `12` | `long array` | `int32_t`     (len) -> `int64_t`    |
+|:-----:|:-------------|:------------------------------------|
+| `0x0` | `end`        | -                                   |
+| `0x1` | `byte`       | `int8_t`                            |
+| `0x2` | `short`      | `int16_t` (BE[^be])                 |
+| `0x3` | `int`        | `int32_t` (BE)                      |
+| `0x4` | `long`       | `int64_t` (BE)                      |
+| `0x5` | `float`      | `float`   (BE)                      |
+| `0x6` | `double`     | `double`  (BE)                      |
+| `0x7` | `byte array` | `int32_t`     (len) -> `int8_t`     |
+| `0x8` | `string`     | `uint16_t`    (len) -> `UTF-8`      |
+| `0x9` | `list`       | ID: `int32_t` (len) -> ID           |
+| `0xA` | `compound`   | list of tags delimited with end tag |
+| `0xB` | `int array`  | `int32_t`     (len) -> `int32_t`    |
+| `0xC` | `long array` | `int32_t`     (len) -> `int64_t`    |
 [^be] [big-endian](https://en.wikipedia.org/wiki/Endianness)

 ## world data
--- a/src/dat/nbt.c
+++ b/src/dat/nbt.c
@@ -0,0 +1,65 @@
+#include "nbt.h"
+
+#include <endian.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "../util/types.h"
+
+/* returns the string length from a specific location in the buffer */
+static inline u16 nbt_strlen(u8 const *restrict buf) {
+	return be16toh(*(u16 *)(buf));
+}
+
+/* compares the string in `buf` to `matstr`.
+ * returns `=0` if equal, `>0` if buf is greater, `<0` if matstr is greater. */
+static int nbt_cmpstr(char const *restrict matstr, u8 const *restrict buf) {
+	u16 len = nbt_strlen(buf);
+
+	// allocate and copy bytes
+	char str[len + 1];
+	memcpy(str, buf + 2, len);
+	str[len] = '\0';
+
+	return strncmp(str, matstr, len);
+}
+
+/* returns the (expected) pointer of the tag following this one.
+ * `NBT_COMPOUND` and `NBT_END` tags are not valid for this function and should be handled separately.
+ * `NULL` is returned if anything went wrong. */
+static u8 const *nbt_nexttag(u8 *buf) {
+	u8 const *nxt = NULL;
+
+	switch (*buf) {
+	case NBT_I8:      nxt = buf + 1 + 1; break; // add 1 for the tag size here, since the constant can be precomputed
+	case NBT_I16:     nxt = buf + 1 + 2; break;
+	case NBT_I32:     nxt = buf + 1 + 4; break;
+	case NBT_I64:     nxt = buf + 1 + 8; break;
+	case NBT_F32:     nxt = buf + 1 + 4; break;
+	case NBT_F64:     nxt = buf + 1 + 8; break;
+	case NBT_ARR_I8:
+	case NBT_STR:     break;
+	case NBT_LIST:    break;
+	case NBT_ARR_I32: break;
+	case NBT_ARR_I64: break;
+
+	default: return NULL; // failure on compound/end tags; these require more nuanced logic
+	}
+
+	return nxt + nbt_strlen(buf + 1);
+}
+
+int nbt_proc(void **restrict datout, u8 const *restrict buf, size_t len) {
+
+	// first byte should be a compound tag
+	if (*buf != NBT_COMPOUND) return 1;
+	uint ncomp = 1;
+
+	// ignore the first tag + its name, so we start with the juicy data
+	uint tmp = nbt_strlen(buf + 1) + 3;
+	buf += tmp;
+	len -= tmp;
+
+	// TODO: finish function
+	return 0;
+}
--- a/src/dat/nbt.h
+++ b/src/dat/nbt.h
@@ -2,6 +2,11 @@
 // Licensed under the MIT Licence. See LICENSE for details
 #pragma once

+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "../util/types.h"
+
 /* NBT (named binary tag) is a tree data structure. Tags have a numeric type ID, name and a payload.
 * NBT files are a compressed `compound` tag. GZip is the compression used in most cases,
 * in some (rare) cases it's stored uncompressed.
@@ -13,17 +18,19 @@
 /* specifies the NBT tag IDs.
 * NOTE: every type is stored as BE (big-endian) in the file. */
 enum nbt_tagid {
-	NBT_END,      // signifies the end of a compound tag
-	NBT_I8,       // next byte is for an 8 bit signed integer.
-	NBT_I16,      // next 2 bytes are for a 16 bit signed integer
-	NBT_I32,      // next 4 bytes are for a 32 bit signed integer
-	NBT_I64,      // next 8 bytes are for a 64 bit signed integer
-	NBT_F32,      // next 4 bytes are for a single-precision floating-point
-	NBT_F64,      // next 8 bytes are for a double-precision floating-point
-	NBT_ARR_I8,   // starts with a i32, denoting size, followed by the i8 data
-	NBT_STR,      // starts with a u16, denoting size, followed by the UTF-8 data
-	NBT_LIST,     // starts with an ID, followed by a 32 bit signed integer denoting the size
-	NBT_COMPOUND, // compound tag, contains tags and is delimited by `NBT_END`
-	NBT_ARR_I32,  // starts with a i32, denoting size, followed by the i32 data
-	NBT_ARR_I64,  // starts with a i32, denoting size, followed by the u32 data
+	NBT_END = 0x00,      // signifies the end of a compound tag
+	NBT_I8 = 0x01,       // next byte is for an 8 bit signed integer.
+	NBT_I16 = 0x02,      // next 2 bytes are for a 16 bit signed integer
+	NBT_I32 = 0x03,      // next 4 bytes are for a 32 bit signed integer
+	NBT_I64 = 0x04,      // next 8 bytes are for a 64 bit signed integer
+	NBT_F32 = 0x05,      // next 4 bytes are for a single-precision floating-point
+	NBT_F64 = 0x06,      // next 8 bytes are for a double-precision floating-point
+	NBT_ARR_I8 = 0x07,   // starts with a i32, denoting size, followed by the i8 data
+	NBT_STR = 0x08,      // starts with a u16, denoting size, followed by the UTF-8 data
+	NBT_LIST = 0x09,     // starts with an ID, followed by a 32 bit signed integer denoting the size
+	NBT_COMPOUND = 0x0A, // compound tag, contains tags and is delimited by `NBT_END`
+	NBT_ARR_I32 = 0x0B,  // starts with a i32, denoting size, followed by the i32 data
+	NBT_ARR_I64 = 0x0C,  // starts with a i32, denoting size, followed by the u32 data
 };
+
+int nbt_proc(void **restrict datout, u8 const *restrict buf, size_t len);
--- a/src/util/compat/endian.h
+++ b/src/util/compat/endian.h
@@ -2,53 +2,36 @@
 // Licensed under the MIT Licence. See LICENSE for details
 #pragma once

-#include <stdint.h>
-
-#include "../atrb.h"
-#include "../types.h"
-
-/* little endian */
-atrb_const static inline u16 le16ton(u16); // converts little-endian (LE) encoding to native for a 16 bit integer. (NOOP if native is LE)
-atrb_const static inline u32 le32ton(u32); // converts little-endian (LE) encoding to native for a 32 bit integer. (NOOP if native is LE)
-atrb_const static inline u64 le64ton(u64); // converts little-endian (LE) encoding to native for a 64 bit integer. (NOOP if native is LE)
-atrb_const static inline u16 ntole16(u16); // converts native encoding to little-endian (LE) for a 16 bit integer. (NOOP if native is LE)
-atrb_const static inline u32 ntole32(u32); // converts native encoding to little-endian (LE) for a 32 bit integer. (NOOP if native is LE)
-atrb_const static inline u64 ntole64(u64); // converts native encoding to little-endian (LE) for a 64 bit integer. (NOOP if native is LE)
-
-/* big endian */
-atrb_const static inline u16 be16ton(u16); // converts big-endian (BE) encoding to native for a 16 bit integer. (NOOP if native is BE)
-atrb_const static inline u32 be32ton(u32); // converts big-endian (BE) encoding to native for a 32 bit integer. (NOOP if native is BE)
-atrb_const static inline u64 be64ton(u64); // converts big-endian (BE) encoding to native for a 64 bit integer. (NOOP if native is BE)
-atrb_const static inline u16 ntobe16(u16); // converts native encoding to big-endian (BE) for a 16 bit integer. (NOOP if native is BE)
-atrb_const static inline u32 ntobe32(u32); // converts native encoding to big-endian (BE) for a 32 bit integer. (NOOP if native is BE)
-atrb_const static inline u64 ntobe64(u64); // converts native encoding to big-endian (BE) for a 64 bit integer. (NOOP if native is BE)
-
+#if __has_include_next("endian.h")
+#include_next <endian.h>
+#else
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-u16 le16ton(u16 x) { return x; }
-u32 le32ton(u32 x) { return x; }
-u64 le64ton(u64 x) { return x; }
-u16 ntole16(u16 x) { return x; }
-u32 ntole32(u32 x) { return x; }
-u64 ntole64(u64 x) { return x; }
-u16 be16ton(u16 x) { return __builtin_bswap16(x); }
-u32 be32ton(u32 x) { return __builtin_bswap32(x); }
-u64 be64ton(u64 x) { return __builtin_bswap64(x); }
-u16 ntobe16(u16 x) { return __builtin_bswap16(x); }
-u32 ntobe32(u32 x) { return __builtin_bswap32(x); }
-u64 ntobe64(u64 x) { return __builtin_bswap64(x); }
+#define le16toh(x) __uint16_identity(x)
+#define le32toh(x) __uint32_identity(x)
+#define le64toh(x) __uint64_identity(x)
+#define htole16(x) __uint16_identity(x)
+#define htole32(x) __uint32_identity(x)
+#define htole64(x) __uint64_identity(x)
+#define be16toh(x) __builtin_bswap16(x)
+#define be32toh(x) __builtin_bswap32(x)
+#define be64toh(x) __builtin_bswap64(x)
+#define htobe16(x) __builtin_bswap16(x)
+#define htobe32(x) __builtin_bswap32(x)
+#define htobe64(x) __builtin_bswap64(x)
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-u16 le16ton(u16 x) { __builtin_bswap16(x); }
-u32 le32ton(u32 x) { __builtin_bswap32(x); }
-u64 le64ton(u64 x) { __builtin_bswap64(x); }
-u16 ntole16(u16 x) { __builtin_bswap16(x); }
-u32 ntole32(u32 x) { __builtin_bswap32(x); }
-u64 ntole64(u64 x) { __builtin_bswap64(x); }
-u16 be16ton(u16 x) { return x; }
-u32 be32ton(u32 x) { return x; }
-u64 be64ton(u64 x) { return x; }
-u16 ntobe16(u16 x) { return x; }
-u32 ntobe32(u32 x) { return x; }
-u64 ntobe64(u64 x) { return x; }
+#defined le16toh(x) __builtin_bswap16(x)
+#defined le32toh(x) __builtin_bswap32(x)
+#defined le64toh(x) __builtin_bswap64(x)
+#defined htole16(x) __builtin_bswap16(x)
+#defined htole32(x) __builtin_bswap32(x)
+#defined htole64(x) __builtin_bswap64(x)
+#defined be16toh(x) __uint16_identity(x)
+#defined be32toh(x) __uint32_identity(x)
+#defined be64toh(x) __uint64_identity(x)
+#defined htobe16(x) __uint16_identity(x)
+#defined htobe32(x) __uint32_identity(x)
+#defined htobe64(x) __uint64_identity(x)
 #else
 #error machine architecture unsupported! Expected either big-endian or little-endian, make sure to use a compiler which defines __BYTE_ORDER__ (like clang or gcc)
 #endif
+#endif
Author	SHA1	Message	Date
Quinn	3198881877	move `endian.h` to `lib.h`, to replace the system library. Unix, and likely other systems ship with `endian.h`, from GNUC. This causes compatibility issues, so renamed the functions to be similar to the target header's macros. Where the system header is replaced to eliminate confusion.	2025-07-15 14:43:59 +02:00
Quinn	4728846985	write a function for skipping to the next NBT tag	2025-07-15 13:51:23 +02:00
Quinn	db76d6992b	add function for comparing an NBT string with a C string	2025-07-15 13:51:23 +02:00
Quinn	8345ac1164	remove `nbt_procdat` struct	2025-07-15 13:51:23 +02:00
Quinn	13451da2e8	add basic function signature for processing the NBT data	2025-07-15 13:51:23 +02:00
Quinn	6aff9aa528	adapt data spec to use hexadecimal as well	2025-07-15 11:15:28 +02:00
Quinn	be87ccbe22	explicitly set hexadecimal values in NBT tags	2025-07-15 10:37:10 +02:00