1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 07:26:04 +00:00

make it work for arrays of tags too

This commit is contained in:
mat 2024-05-12 14:46:04 -05:00
parent 1ab27e531e
commit 927df9b33b
4 changed files with 342 additions and 17 deletions

View file

@ -18,7 +18,7 @@ pub struct NbtCompound<'a> {
values: &'a [(&'a Mutf8Str, NbtTag<'a>)],
}
impl<'a, 'b> NbtCompound<'a> {
impl<'a> NbtCompound<'a> {
pub fn read(
data: &mut Cursor<&'a [u8]>,
alloc: &UnsafeCell<TagAllocator<'a>>,
@ -37,7 +37,7 @@ impl<'a, 'b> NbtCompound<'a> {
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.start_compound(depth);
let mut tags = alloc_mut.start_named_tags(depth);
loop {
let tag_type = data.read_u8().map_err(|_| Error::UnexpectedEof)?;
if tag_type == END_ID {
@ -45,13 +45,13 @@ impl<'a, 'b> NbtCompound<'a> {
}
let tag_name = read_string(data)?;
tags.push(
tags.push((
tag_name,
NbtTag::read_with_type(data, alloc, tag_type, depth)?,
);
));
}
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let values = alloc_mut.finish_compound(tags);
let values = alloc_mut.finish_named_tags(tags, depth);
Ok(Self { values })
}

View file

@ -29,8 +29,8 @@ pub enum NbtList<'a> {
Double(RawList<'a, f64>) = DOUBLE_ID,
ByteArray(Vec<&'a [u8]>) = BYTE_ARRAY_ID,
String(Vec<&'a Mutf8Str>) = STRING_ID,
List(Vec<NbtList<'a>>) = LIST_ID,
Compound(Vec<NbtCompound<'a>>) = COMPOUND_ID,
List(&'a [NbtList<'a>]) = LIST_ID,
Compound(&'a [NbtCompound<'a>]) = COMPOUND_ID,
IntArray(Vec<RawList<'a, i32>>) = INT_ARRAY_ID,
LongArray(Vec<RawList<'a, i64>>) = LONG_ARRAY_ID,
}
@ -76,20 +76,26 @@ impl<'a> NbtList<'a> {
LIST_ID => NbtList::List({
let length = read_u32(data)?;
// arbitrary number to prevent big allocations
let mut lists = Vec::with_capacity(length.min(128) as usize);
// let mut lists = Vec::with_capacity(length.min(128) as usize);
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.start_unnamed_list_tags(depth);
for _ in 0..length {
lists.push(NbtList::read(data, alloc, depth + 1)?)
tags.push(NbtList::read(data, alloc, depth + 1)?)
}
lists
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.finish_unnamed_list_tags(tags, depth)
}),
COMPOUND_ID => NbtList::Compound({
let length = read_u32(data)?;
// arbitrary number to prevent big allocations
let mut compounds = Vec::with_capacity(length.min(128) as usize);
// let mut compounds = Vec::with_capacity(length.min(128) as usize);
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.start_unnamed_compound_tags(depth);
for _ in 0..length {
compounds.push(NbtCompound::read_with_depth(data, alloc, depth + 1)?)
tags.push(NbtCompound::read_with_depth(data, alloc, depth + 1)?)
}
compounds
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.finish_unnamed_compound_tags(tags, depth)
}),
INT_ARRAY_ID => NbtList::IntArray({
let length = read_u32(data)?;
@ -122,7 +128,7 @@ impl<'a> NbtList<'a> {
unchecked_push(data, COMPOUND_ID);
unchecked_extend(data, &(compounds.len() as u32).to_be_bytes());
}
for compound in compounds {
for compound in *compounds {
compound.write(data);
}
return;
@ -165,7 +171,7 @@ impl<'a> NbtList<'a> {
}
NbtList::List(lists) => {
write_u32(data, lists.len() as u32);
for list in lists {
for list in *lists {
list.write(data);
}
}

View file

@ -26,7 +26,8 @@ pub use self::{compound::NbtCompound, list::NbtList};
pub struct BaseNbt<'a> {
name: &'a Mutf8Str,
tag: NbtCompound<'a>,
tag_alloc: TagAllocator<'a>,
// we need to keep this around so it's not deallocated
_tag_alloc: TagAllocator<'a>,
}
#[derive(Debug, PartialEq, Default)]
@ -54,7 +55,7 @@ impl<'a> Nbt<'a> {
Ok(Nbt::Some(BaseNbt {
name,
tag,
tag_alloc: tag_alloc.into_inner(),
_tag_alloc: tag_alloc.into_inner(),
}))
}

View file

@ -0,0 +1,318 @@
//! Some tags, like compounds and arrays, contain other tags. The naive approach would be to just
//! use `Vec`s or `HashMap`s, but this is inefficient and leads to many small allocations.
//!
//! Instead, the idea for this is essentially that we'd have two big Vec for every tag (one for
//! named tags and one for unnamed tags), and then compounds/arrays simply contain a slice of this
//! vec.
//!
//! This almost works. but there's two main issues:
//! - compounds aren't length-prefixed, so we can't pre-allocate at the beginning of compounds for
//! the rest of that compound
//! - resizing a vec might move it in memory, invalidating all of our slices to it
//!
//! solving the first problem isn't that hard, since we can have a separate vec for every "depth"
//! (so compounds in compounds don't share the same vec).
//! to solve the second problem, i chose to implement a special data structure
//! that relies on low-level allocations so we can guarantee that our allocations don't move in memory.
use std::{
alloc::{self, Layout},
fmt,
ptr::NonNull,
};
use crate::Mutf8Str;
use super::{NbtCompound, NbtList, NbtTag};
const MIN_ALLOC_SIZE: usize = 1024;
#[derive(Default)]
pub struct TagAllocator<'a> {
// it's a vec because of the depth thing mentioned earlier, index in the vec = depth
named_tags: Vec<TagsAllocation<(&'a Mutf8Str, NbtTag<'a>)>>,
// we also have to keep track of old allocations so we can deallocate them later
previous_named_tags: Vec<Vec<TagsAllocation<(&'a Mutf8Str, NbtTag<'a>)>>>,
// so remember earlier when i said the depth thing is only necessary because compounds aren't
// length prefixed? ... well soooo i decided to make arrays store per-depth separately too to
// avoid exploits where an array with a big length is sent to force it to immediately allocate
// a lot
unnamed_list_tags: Vec<TagsAllocation<NbtList<'a>>>,
previous_unnamed_list_tags: Vec<Vec<TagsAllocation<NbtList<'a>>>>,
unnamed_compound_tags: Vec<TagsAllocation<NbtCompound<'a>>>,
previous_unnamed_compound_tags: Vec<Vec<TagsAllocation<NbtCompound<'a>>>>,
}
impl<'a> TagAllocator<'a> {
pub fn new() -> Self {
Self::default()
}
pub fn start_named_tags(
&mut self,
depth: usize,
) -> ContiguousTagsAllocator<(&'a Mutf8Str, NbtTag<'a>)> {
start_allocating_tags_with_depth(depth, &mut self.named_tags, &mut self.previous_named_tags)
}
pub fn finish_named_tags(
&mut self,
alloc: ContiguousTagsAllocator<(&'a Mutf8Str, NbtTag<'a>)>,
depth: usize,
) -> &'a [(&'a Mutf8Str, NbtTag)] {
finish_allocating_tags_with_depth(
alloc,
depth,
&mut self.named_tags,
&mut self.previous_named_tags,
)
}
pub fn start_unnamed_list_tags(
&mut self,
depth: usize,
) -> ContiguousTagsAllocator<NbtList<'a>> {
start_allocating_tags_with_depth(
depth,
&mut self.unnamed_list_tags,
&mut self.previous_unnamed_list_tags,
)
}
pub fn finish_unnamed_list_tags(
&mut self,
alloc: ContiguousTagsAllocator<NbtList<'a>>,
depth: usize,
) -> &'a [NbtList<'a>] {
finish_allocating_tags_with_depth(
alloc,
depth,
&mut self.unnamed_list_tags,
&mut self.previous_unnamed_list_tags,
)
}
pub fn start_unnamed_compound_tags(
&mut self,
depth: usize,
) -> ContiguousTagsAllocator<NbtCompound<'a>> {
start_allocating_tags_with_depth(
depth,
&mut self.unnamed_compound_tags,
&mut self.previous_unnamed_compound_tags,
)
}
pub fn finish_unnamed_compound_tags(
&mut self,
alloc: ContiguousTagsAllocator<NbtCompound<'a>>,
depth: usize,
) -> &'a [NbtCompound<'a>] {
finish_allocating_tags_with_depth(
alloc,
depth,
&mut self.unnamed_compound_tags,
&mut self.previous_unnamed_compound_tags,
)
}
}
impl Drop for TagAllocator<'_> {
fn drop(&mut self) {
self.named_tags
.iter_mut()
.for_each(TagsAllocation::deallocate);
self.previous_named_tags
.iter_mut()
.flatten()
.for_each(TagsAllocation::deallocate);
self.unnamed_list_tags
.iter_mut()
.for_each(TagsAllocation::deallocate);
self.previous_unnamed_list_tags
.iter_mut()
.flatten()
.for_each(TagsAllocation::deallocate);
self.unnamed_compound_tags
.iter_mut()
.for_each(TagsAllocation::deallocate);
self.previous_unnamed_compound_tags
.iter_mut()
.flatten()
.for_each(TagsAllocation::deallocate);
}
}
pub fn start_allocating_tags_with_depth<T>(
depth: usize,
tags: &mut Vec<TagsAllocation<T>>,
previous_allocs: &mut Vec<Vec<TagsAllocation<T>>>,
) -> ContiguousTagsAllocator<T>
where
T: Clone,
{
// make sure we have enough space for this depth
// (also note that depth is reused for compounds and arrays so we might have to push
// more than once)
for _ in tags.len()..=depth {
tags.push(Default::default());
previous_allocs.push(Default::default());
}
let alloc = tags[depth].clone();
start_allocating_tags(alloc)
}
fn finish_allocating_tags_with_depth<'a, T>(
alloc: ContiguousTagsAllocator<T>,
depth: usize,
tags: &mut [TagsAllocation<T>],
previous_allocs: &mut [Vec<TagsAllocation<T>>],
) -> &'a [T]
where
T: Clone,
{
finish_allocating_tags(alloc, &mut tags[depth], &mut previous_allocs[depth])
}
fn start_allocating_tags<T>(alloc: TagsAllocation<T>) -> ContiguousTagsAllocator<T> {
let is_new_allocation = alloc.cap == 0;
ContiguousTagsAllocator {
alloc,
is_new_allocation,
size: 0,
}
}
fn finish_allocating_tags<'a, T>(
alloc: ContiguousTagsAllocator<T>,
current_alloc: &mut TagsAllocation<T>,
previous_allocs: &mut Vec<TagsAllocation<T>>,
) -> &'a [T] {
let slice = unsafe {
std::slice::from_raw_parts(
alloc
.alloc
.ptr
.as_ptr()
.add(alloc.alloc.len)
.sub(alloc.size),
alloc.size,
)
};
let previous_allocation_at_that_depth = std::mem::replace(current_alloc, alloc.alloc);
if alloc.is_new_allocation {
previous_allocs.push(previous_allocation_at_that_depth);
}
slice
}
#[derive(Clone)]
pub struct TagsAllocation<T> {
ptr: NonNull<T>,
cap: usize,
len: usize,
}
impl<T> Default for TagsAllocation<T> {
fn default() -> Self {
Self {
ptr: NonNull::dangling(),
cap: 0,
len: 0,
}
}
}
impl<T> TagsAllocation<T> {
fn deallocate(&mut self) {
if self.cap == 0 {
return;
}
// call drop on the tags too
unsafe {
std::ptr::drop_in_place(std::slice::from_raw_parts_mut(
self.ptr.as_ptr().cast::<T>(),
self.len,
));
}
unsafe {
alloc::dealloc(
self.ptr.as_ptr().cast(),
Layout::array::<T>(self.cap).unwrap(),
)
}
}
}
// this is created when we start allocating a compound tag
pub struct ContiguousTagsAllocator<T> {
alloc: TagsAllocation<T>,
/// whether we created a new allocation for this compound (as opposed to reusing an existing
/// one).
/// this is used to determine whether we're allowed to deallocate it when growing, and whether
/// we should add this allocation to `all_allocations`
is_new_allocation: bool,
/// the size of this individual compound allocation. the size of the full allocation is in
/// `alloc.len`.
size: usize,
}
impl<T> ContiguousTagsAllocator<T> {
fn grow(&mut self) {
let new_cap = if self.is_new_allocation {
// this makes sure we don't allocate 0 bytes
std::cmp::max(self.alloc.cap * 2, MIN_ALLOC_SIZE)
} else {
// reuse the previous cap, since it's not unlikely that we'll have another compound
// with a similar
self.alloc.cap
};
let new_layout = Layout::array::<T>(new_cap).unwrap();
let new_ptr = if self.is_new_allocation && self.alloc.ptr != NonNull::dangling() {
let old_ptr = self.alloc.ptr.as_ptr();
let old_cap = self.alloc.cap;
let old_layout = Layout::array::<T>(old_cap).unwrap();
unsafe { alloc::realloc(old_ptr as *mut u8, old_layout, new_cap) }
} else {
self.is_new_allocation = true;
unsafe { alloc::alloc(new_layout) }
} as *mut T;
// copy the last `size` elements from the old allocation to the new one
unsafe {
std::ptr::copy_nonoverlapping(
self.alloc.ptr.as_ptr().sub(self.size),
new_ptr,
self.size,
)
};
self.alloc.ptr = NonNull::new(new_ptr).unwrap();
self.alloc.cap = new_cap;
self.alloc.len = self.size;
}
pub fn push(&mut self, value: T) {
// check if we need to reallocate
if self.alloc.len == self.alloc.cap {
self.grow();
}
// push the new tag
unsafe {
std::ptr::write(self.alloc.ptr.as_ptr().add(self.alloc.len), value);
}
self.alloc.len += 1;
self.size += 1;
}
}
impl<'a> fmt::Debug for TagAllocator<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("TagAllocator").finish()
}
}