Compare commits

...

10 Commits

Author SHA1 Message Date
6dfcb2b5b9 update nbt_proc function, still inoperable 2025-07-23 16:41:36 +02:00
54e4bf45f9 add funciton for data loading/processing 2025-07-23 16:37:25 +02:00
1fb878b5ae modify the array bytelength function to get the bytelength of a single tag. 2025-07-23 16:32:39 +02:00
2daeb9823c add a function to get an NBT array bytelength 2025-07-23 14:55:24 +02:00
4e1cd68c38 semantic fix, use explicit fallthrough
GCC was complaining, and I didn't have a valid counter argument.
2025-07-23 14:07:06 +02:00
501c623f01 rename nbt_prim_tagsize to nbt_primsize, for consistency. 2025-07-23 13:00:53 +02:00
65ee7c5b55 add a function for checking whether a tag is primitive or not
mainly for niche uses
2025-07-23 13:00:53 +02:00
5d7c244c8f optimise switch of nbt_prim_tagsize
had duplicate switch branches, which wastes binary size.
2025-07-23 13:00:53 +02:00
9fd920908e move nbt_prim_tagsize to header, since it's a pretty useful funciton. 2025-07-23 13:00:53 +02:00
8102dd6a71 fix: markdown footnote now uses correct format 2025-07-23 13:00:53 +02:00
3 changed files with 143 additions and 54 deletions

View File

@@ -22,7 +22,7 @@ note: UUID are stored as an integer array.
| `0xA` | `compound` | list of tags delimited with end tag |
| `0xB` | `int array` | `int32_t` (len) -> `int32_t` |
| `0xC` | `long array` | `int32_t` (len) -> `int64_t` |
[^be] [big-endian](https://en.wikipedia.org/wiki/Endianness)
[^be]: [big-endian](https://en.wikipedia.org/wiki/Endianness)
## world data
There is a difference between \*.mca and \*.mcr files.

View File

@@ -1,5 +1,6 @@
#include "nbt.h"
#include <assert.h>
#include <stddef.h>
#include <string.h>
@@ -29,73 +30,144 @@ static int nbt_cmpstr(char const *restrict matstr, u8 const *restrict buf) {
return strncmp(str, matstr, len);
}
/* gets the tag size of primitive types, returns `>0` on success, `<0` on failure */
int nbt_prim_tagsize(u8 tag) {
/* returns the (expected) pointer of the tag following this one.
* `NULL` is returned if anything went wrong. */
static u8 const *nbt_nexttag(u8 const *restrict buf, u16 naml) {
size_t len = nbt_tagdatlen(buf);
if (!len) return NULL; // TODO: compound tags should be handled here
return buf + naml + len + 3;
}
// TODO: not actually doing anything
/* readies the output data for export, returns the new buffer position, or `NULL` upon an error (may be out of bounds) */
static u8 const *nbt_proctag(u8 const *restrict buf, u16 slen) {
u8 const *ptr = buf + 3 + slen;
u8 dat[8];
size_t arrlen = 0;
switch (*buf) {
// integral types
case NBT_I8: *dat = *ptr; return ptr;
case NBT_I16: *(u16 *)dat = be16toh(*(u16 *)ptr); return ptr + 2;
case NBT_I32: __attribute__((fallthrough));
case NBT_F32: *(u32 *)dat = be16toh(*(u32 *)ptr); return ptr + 4;
case NBT_I64: __attribute__((fallthrough));
case NBT_F64: *(u64 *)dat = be16toh(*(u64 *)ptr); return ptr + 8;
// arrays, handled differently
case NBT_LIST: __attribute__((fallthrough));
case NBT_ARR_I8: __attribute__((fallthrough));
case NBT_STR: __attribute__((fallthrough));
case NBT_ARR_I32: __attribute__((fallthrough));
case NBT_ARR_I64:
// arrlen = nbt_arrbsize(ptr);
break;
default: return NULL;
}
if (!arrlen) return NULL;
return ptr + nbt_primsize(*buf);
}
/* finds which of `pats` is equivalent to `cmp`, assumes `cmp` is `≥len` bytes long */
static char const *getpat(struct nbt_path const *restrict pats, uint npats, i16 dpt, char const *restrict cmp, u16 len) {
for (uint i = 0; i < npats; i++) {
if (strncmp(pats[i].pat[dpt], cmp, len) == 0)
return pats[i].pat[dpt];
}
return NULL;
}
// TODO: make the user do the looping
int nbt_proc(struct nbt_path const *restrict pats, uint npats, u8 const *restrict buf, size_t len) {
// ensure first and last tag(s) are valid
if (buf[0] != NBT_COMPOUND || buf[len - 1] != NBT_END)
return 1;
i16 dpt = 0;
i16 mdpt = 0;
// acquire the maximum depth that we'll need to go (exclusive)
for (uint i = 0; i < npats; i++) {
int tmp = pats[i].len - mdpt;
mdpt += -(tmp > 0) & tmp;
}
assert(mdpt > 0);
// storing the segments of the current path
char const *cpat[mdpt - 1];
memset((void *)cpat, 0, mdpt - 1);
// looping through the different tags
u8 const *ptr = buf + nbt_strlen(buf + 1) + 3;
while (ptr < (buf + len) && dpt >= 0) {
u16 naml = nbt_strlen(ptr + 1);
char const *mat = getpat(pats, npats, dpt, (char *)(ptr + 3), naml);
cpat[dpt] = mat;
if (mat) {
switch (*ptr) {
case NBT_END: dpt--; break;
case NBT_COMPOUND: dpt++; break;
default: ptr = nbt_proctag(ptr, naml); break;
}
} else {
ptr = nbt_nexttag(ptr, naml);
if (!ptr) return 1;
}
}
// TODO: finish function
return !dpt;
}
int nbt_primsize(u8 tag) {
switch (tag) {
case NBT_I8: return 1;
case NBT_I16: return 2;
case NBT_I32: return 4;
case NBT_I64: return 8;
case NBT_I32: __attribute__((fallthrough));
case NBT_F32: return 4;
case NBT_I64: __attribute__((fallthrough));
case NBT_F64: return 8;
default: return -1;
}
}
/* returns the (expected) pointer of the tag following this one.
* `NBT_COMPOUND` and `NBT_END` tags are not valid for this function and should be handled separately.
* `NULL` is returned if anything went wrong. */
static u8 const *nbt_nexttag(u8 const *restrict buf) {
u8 const *nxt = buf + 1;
nxt += nbt_strlen(nxt) + 2;
size_t nbt_tagdatlen(u8 const *restrict buf) {
i32 mems = 0;
uint mems = 0;
switch (*buf) {
case NBT_I8:
case NBT_I16:
case NBT_I32:
case NBT_I64:
case NBT_F32:
case NBT_F64:
nxt += nbt_prim_tagsize(*buf);
return nxt;
case NBT_I8: __attribute__((fallthrough));
case NBT_I16: __attribute__((fallthrough));
case NBT_I32: __attribute__((fallthrough));
case NBT_F32: __attribute__((fallthrough));
case NBT_I64: __attribute__((fallthrough));
case NBT_F64: __attribute__((fallthrough));
case NBT_ARR_I64:
mems += sizeof(i64) - sizeof(i32);
case NBT_ARR_I32:
mems += sizeof(i32) - sizeof(i8);
case NBT_ARR_I8:
mems += 1;
nxt += mems * nbt_arrlen(nxt);
return nxt;
case NBT_STR:
nxt += nbt_strlen(nxt);
return nxt;
case NBT_ARR_I64: mems += sizeof(i64) - sizeof(i32); __attribute__((fallthrough));
case NBT_ARR_I32: mems += sizeof(i32) - sizeof(i8); __attribute__((fallthrough));
case NBT_ARR_I8: return +mems * nbt_arrlen(buf) + 4;
case NBT_STR: return nbt_strlen(buf) + 2;
case NBT_LIST:
mems = nbt_prim_tagsize(*nxt);
if (mems > 0) {
nxt += 1;
nxt += mems * nbt_arrlen(nxt);
return nxt;
}
// let case escape to `default` when `mems` `≤0`
default: return NULL; // failure on compound/end tags; these require more nuanced logic
mems = nbt_primsize(*buf);
if (mems > 0) return mems * nbt_arrlen(buf + 1) + 5;
return 0;
default: return 0;
}
}
int nbt_proc(void **restrict datout, u8 const *restrict buf, size_t len) {
// first byte should be a compound tag
if (*buf != NBT_COMPOUND) return 1;
uint ncomp = 1;
// ignore the first tag + its name, so we start with the juicy data
uint tmp = nbt_strlen(buf + 1) + 3;
buf += tmp;
len -= tmp;
// TODO: finish function
return 0;
int nbt_isprim(u8 tag) {
switch (tag) {
case NBT_I8: __attribute__((fallthrough));
case NBT_I16: __attribute__((fallthrough));
case NBT_I32: __attribute__((fallthrough));
case NBT_F32: __attribute__((fallthrough));
case NBT_I64: __attribute__((fallthrough));
case NBT_F64:
return 1;
default:
return 0;
}
}

View File

@@ -5,6 +5,7 @@
#include <stdbool.h>
#include <stdlib.h>
#include "../util/atrb.h"
#include "../util/types.h"
/* NBT (named binary tag) is a tree data structure. Tags have a numeric type ID, name and a payload.
@@ -33,4 +34,20 @@ enum nbt_tagid {
NBT_ARR_I64 = 0x0C, // starts with a i32, denoting size, followed by the u32 data
};
int nbt_proc(void **restrict datout, u8 const *restrict buf, size_t len);
struct nbt_path {
char const **restrict pat; // specifies the NBT path components as separate elements
i16 len; // specifies the length of the NBT elements
};
/* checks whether the tag is a primitive data tag. (not recommended for filtering tags, use a `switch`)
* returns a boolean value. */
atrb_const int nbt_isprim(u8 tag);
/* gets the byte size of an NBT tag's data (excluding id and name), returns `0` upon error. */
atrb_const size_t nbt_tagdatlen(u8 const *buf);
/* gets the tag size of primitive types, returns `>0` on success, `<0` on failure */
atrb_const int nbt_primsize(u8 tag);
/* processes the uncompressed `NBT` data in `buf`, with a size of `len`. */
atrb_nonnull(1, 3) int nbt_proc(struct nbt_path const *restrict paths, uint npaths, u8 const *restrict buf, size_t len);