Compare commits

..

7 Commits

Author SHA1 Message Date
3198881877 move endian.h to lib.h, to replace the system library.
Unix, and likely other systems ship with `endian.h`, from GNUC.
This causes compatibility issues, so renamed the functions to be similar
to the target header's macros.
Where the system header is replaced to eliminate confusion.
2025-07-15 14:43:59 +02:00
4728846985 write a function for skipping to the next NBT tag 2025-07-15 13:51:23 +02:00
db76d6992b add function for comparing an NBT string with a C string 2025-07-15 13:51:23 +02:00
8345ac1164 remove nbt_procdat struct 2025-07-15 13:51:23 +02:00
13451da2e8 add basic function signature for processing the NBT data 2025-07-15 13:51:23 +02:00
6aff9aa528 adapt data spec to use hexadecimal as well 2025-07-15 11:15:28 +02:00
be87ccbe22 explicitly set hexadecimal values in NBT tags 2025-07-15 10:37:10 +02:00
4 changed files with 128 additions and 73 deletions

View File

@@ -8,20 +8,20 @@ note: UUID are stored as an integer array.
### tag types
| ID | tag name | payload specification |
|-----:|:-------------|:------------------------------------|
| `0` | `end` | - |
| `1` | `byte` | `int8_t` |
| `2` | `short` | `int16_t` (BE[^be]) |
| `3` | `int` | `int32_t` (BE) |
| `4` | `long` | `int64_t` (BE) |
| `5` | `float` | `float` (BE) |
| `6` | `double` | `double` (BE) |
| `7` | `byte array` | `int32_t` (len) -> `int8_t` |
| `8` | `string` | `uint16_t` (len) -> `UTF-8` |
| `9` | `list` | ID: `int32_t` (len) -> ID |
| `10` | `compound` | list of tags delimited with end tag |
| `11` | `int array` | `int32_t` (len) -> `int32_t` |
| `12` | `long array` | `int32_t` (len) -> `int64_t` |
|:-----:|:-------------|:------------------------------------|
| `0x0` | `end` | - |
| `0x1` | `byte` | `int8_t` |
| `0x2` | `short` | `int16_t` (BE[^be]) |
| `0x3` | `int` | `int32_t` (BE) |
| `0x4` | `long` | `int64_t` (BE) |
| `0x5` | `float` | `float` (BE) |
| `0x6` | `double` | `double` (BE) |
| `0x7` | `byte array` | `int32_t` (len) -> `int8_t` |
| `0x8` | `string` | `uint16_t` (len) -> `UTF-8` |
| `0x9` | `list` | ID: `int32_t` (len) -> ID |
| `0xA` | `compound` | list of tags delimited with end tag |
| `0xB` | `int array` | `int32_t` (len) -> `int32_t` |
| `0xC` | `long array` | `int32_t` (len) -> `int64_t` |
[^be] [big-endian](https://en.wikipedia.org/wiki/Endianness)
## world data

65
src/dat/nbt.c Normal file
View File

@@ -0,0 +1,65 @@
#include "nbt.h"
#include <endian.h>
#include <stddef.h>
#include <string.h>
#include "../util/types.h"
/* returns the string length from a specific location in the buffer */
static inline u16 nbt_strlen(u8 const *restrict buf) {
return be16toh(*(u16 *)(buf));
}
/* compares the string in `buf` to `matstr`.
* returns `=0` if equal, `>0` if buf is greater, `<0` if matstr is greater. */
static int nbt_cmpstr(char const *restrict matstr, u8 const *restrict buf) {
u16 len = nbt_strlen(buf);
// allocate and copy bytes
char str[len + 1];
memcpy(str, buf + 2, len);
str[len] = '\0';
return strncmp(str, matstr, len);
}
/* returns the (expected) pointer of the tag following this one.
* `NBT_COMPOUND` and `NBT_END` tags are not valid for this function and should be handled separately.
* `NULL` is returned if anything went wrong. */
static u8 const *nbt_nexttag(u8 *buf) {
u8 const *nxt = NULL;
switch (*buf) {
case NBT_I8: nxt = buf + 1 + 1; break; // add 1 for the tag size here, since the constant can be precomputed
case NBT_I16: nxt = buf + 1 + 2; break;
case NBT_I32: nxt = buf + 1 + 4; break;
case NBT_I64: nxt = buf + 1 + 8; break;
case NBT_F32: nxt = buf + 1 + 4; break;
case NBT_F64: nxt = buf + 1 + 8; break;
case NBT_ARR_I8:
case NBT_STR: break;
case NBT_LIST: break;
case NBT_ARR_I32: break;
case NBT_ARR_I64: break;
default: return NULL; // failure on compound/end tags; these require more nuanced logic
}
return nxt + nbt_strlen(buf + 1);
}
int nbt_proc(void **restrict datout, u8 const *restrict buf, size_t len) {
// first byte should be a compound tag
if (*buf != NBT_COMPOUND) return 1;
uint ncomp = 1;
// ignore the first tag + its name, so we start with the juicy data
uint tmp = nbt_strlen(buf + 1) + 3;
buf += tmp;
len -= tmp;
// TODO: finish function
return 0;
}

View File

@@ -2,6 +2,11 @@
// Licensed under the MIT Licence. See LICENSE for details
#pragma once
#include <stdbool.h>
#include <stdlib.h>
#include "../util/types.h"
/* NBT (named binary tag) is a tree data structure. Tags have a numeric type ID, name and a payload.
* NBT files are a compressed `compound` tag. GZip is the compression used in most cases,
* in some (rare) cases it's stored uncompressed.
@@ -13,17 +18,19 @@
/* specifies the NBT tag IDs.
* NOTE: every type is stored as BE (big-endian) in the file. */
enum nbt_tagid {
NBT_END, // signifies the end of a compound tag
NBT_I8, // next byte is for an 8 bit signed integer.
NBT_I16, // next 2 bytes are for a 16 bit signed integer
NBT_I32, // next 4 bytes are for a 32 bit signed integer
NBT_I64, // next 8 bytes are for a 64 bit signed integer
NBT_F32, // next 4 bytes are for a single-precision floating-point
NBT_F64, // next 8 bytes are for a double-precision floating-point
NBT_ARR_I8, // starts with a i32, denoting size, followed by the i8 data
NBT_STR, // starts with a u16, denoting size, followed by the UTF-8 data
NBT_LIST, // starts with an ID, followed by a 32 bit signed integer denoting the size
NBT_COMPOUND, // compound tag, contains tags and is delimited by `NBT_END`
NBT_ARR_I32, // starts with a i32, denoting size, followed by the i32 data
NBT_ARR_I64, // starts with a i32, denoting size, followed by the u32 data
NBT_END = 0x00, // signifies the end of a compound tag
NBT_I8 = 0x01, // next byte is for an 8 bit signed integer.
NBT_I16 = 0x02, // next 2 bytes are for a 16 bit signed integer
NBT_I32 = 0x03, // next 4 bytes are for a 32 bit signed integer
NBT_I64 = 0x04, // next 8 bytes are for a 64 bit signed integer
NBT_F32 = 0x05, // next 4 bytes are for a single-precision floating-point
NBT_F64 = 0x06, // next 8 bytes are for a double-precision floating-point
NBT_ARR_I8 = 0x07, // starts with a i32, denoting size, followed by the i8 data
NBT_STR = 0x08, // starts with a u16, denoting size, followed by the UTF-8 data
NBT_LIST = 0x09, // starts with an ID, followed by a 32 bit signed integer denoting the size
NBT_COMPOUND = 0x0A, // compound tag, contains tags and is delimited by `NBT_END`
NBT_ARR_I32 = 0x0B, // starts with a i32, denoting size, followed by the i32 data
NBT_ARR_I64 = 0x0C, // starts with a i32, denoting size, followed by the u32 data
};
int nbt_proc(void **restrict datout, u8 const *restrict buf, size_t len);

View File

@@ -2,53 +2,36 @@
// Licensed under the MIT Licence. See LICENSE for details
#pragma once
#include <stdint.h>
#include "../atrb.h"
#include "../types.h"
/* little endian */
atrb_const static inline u16 le16ton(u16); // converts little-endian (LE) encoding to native for a 16 bit integer. (NOOP if native is LE)
atrb_const static inline u32 le32ton(u32); // converts little-endian (LE) encoding to native for a 32 bit integer. (NOOP if native is LE)
atrb_const static inline u64 le64ton(u64); // converts little-endian (LE) encoding to native for a 64 bit integer. (NOOP if native is LE)
atrb_const static inline u16 ntole16(u16); // converts native encoding to little-endian (LE) for a 16 bit integer. (NOOP if native is LE)
atrb_const static inline u32 ntole32(u32); // converts native encoding to little-endian (LE) for a 32 bit integer. (NOOP if native is LE)
atrb_const static inline u64 ntole64(u64); // converts native encoding to little-endian (LE) for a 64 bit integer. (NOOP if native is LE)
/* big endian */
atrb_const static inline u16 be16ton(u16); // converts big-endian (BE) encoding to native for a 16 bit integer. (NOOP if native is BE)
atrb_const static inline u32 be32ton(u32); // converts big-endian (BE) encoding to native for a 32 bit integer. (NOOP if native is BE)
atrb_const static inline u64 be64ton(u64); // converts big-endian (BE) encoding to native for a 64 bit integer. (NOOP if native is BE)
atrb_const static inline u16 ntobe16(u16); // converts native encoding to big-endian (BE) for a 16 bit integer. (NOOP if native is BE)
atrb_const static inline u32 ntobe32(u32); // converts native encoding to big-endian (BE) for a 32 bit integer. (NOOP if native is BE)
atrb_const static inline u64 ntobe64(u64); // converts native encoding to big-endian (BE) for a 64 bit integer. (NOOP if native is BE)
#if __has_include_next("endian.h")
#include_next <endian.h>
#else
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
u16 le16ton(u16 x) { return x; }
u32 le32ton(u32 x) { return x; }
u64 le64ton(u64 x) { return x; }
u16 ntole16(u16 x) { return x; }
u32 ntole32(u32 x) { return x; }
u64 ntole64(u64 x) { return x; }
u16 be16ton(u16 x) { return __builtin_bswap16(x); }
u32 be32ton(u32 x) { return __builtin_bswap32(x); }
u64 be64ton(u64 x) { return __builtin_bswap64(x); }
u16 ntobe16(u16 x) { return __builtin_bswap16(x); }
u32 ntobe32(u32 x) { return __builtin_bswap32(x); }
u64 ntobe64(u64 x) { return __builtin_bswap64(x); }
#define le16toh(x) __uint16_identity(x)
#define le32toh(x) __uint32_identity(x)
#define le64toh(x) __uint64_identity(x)
#define htole16(x) __uint16_identity(x)
#define htole32(x) __uint32_identity(x)
#define htole64(x) __uint64_identity(x)
#define be16toh(x) __builtin_bswap16(x)
#define be32toh(x) __builtin_bswap32(x)
#define be64toh(x) __builtin_bswap64(x)
#define htobe16(x) __builtin_bswap16(x)
#define htobe32(x) __builtin_bswap32(x)
#define htobe64(x) __builtin_bswap64(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
u16 le16ton(u16 x) { __builtin_bswap16(x); }
u32 le32ton(u32 x) { __builtin_bswap32(x); }
u64 le64ton(u64 x) { __builtin_bswap64(x); }
u16 ntole16(u16 x) { __builtin_bswap16(x); }
u32 ntole32(u32 x) { __builtin_bswap32(x); }
u64 ntole64(u64 x) { __builtin_bswap64(x); }
u16 be16ton(u16 x) { return x; }
u32 be32ton(u32 x) { return x; }
u64 be64ton(u64 x) { return x; }
u16 ntobe16(u16 x) { return x; }
u32 ntobe32(u32 x) { return x; }
u64 ntobe64(u64 x) { return x; }
#defined le16toh(x) __builtin_bswap16(x)
#defined le32toh(x) __builtin_bswap32(x)
#defined le64toh(x) __builtin_bswap64(x)
#defined htole16(x) __builtin_bswap16(x)
#defined htole32(x) __builtin_bswap32(x)
#defined htole64(x) __builtin_bswap64(x)
#defined be16toh(x) __uint16_identity(x)
#defined be32toh(x) __uint32_identity(x)
#defined be64toh(x) __uint64_identity(x)
#defined htobe16(x) __uint16_identity(x)
#defined htobe32(x) __uint32_identity(x)
#defined htobe64(x) __uint64_identity(x)
#else
#error machine architecture unsupported! Expected either big-endian or little-endian, make sure to use a compiler which defines __BYTE_ORDER__ (like clang or gcc)
#endif
#endif