1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 07:26:04 +00:00

make NbtTag one byte smaller by removing all Vecs in NbtList

This commit is contained in:
mat 2024-05-12 23:02:23 +00:00
parent 33366ff085
commit 5ffce1a309
3 changed files with 133 additions and 186 deletions

View file

@ -37,7 +37,7 @@ impl<'a> NbtCompound<'a> {
let alloc_mut = unsafe { alloc.get().as_mut().unwrap_unchecked() }; let alloc_mut = unsafe { alloc.get().as_mut().unwrap_unchecked() };
let mut tags = alloc_mut.start_named_tags(depth); let mut tags = alloc_mut.named.start(depth);
loop { loop {
let tag_type = data.read_u8().map_err(|_| Error::UnexpectedEof)?; let tag_type = data.read_u8().map_err(|_| Error::UnexpectedEof)?;
if tag_type == END_ID { if tag_type == END_ID {
@ -47,7 +47,7 @@ impl<'a> NbtCompound<'a> {
let tag_name = match read_string(data) { let tag_name = match read_string(data) {
Ok(name) => name, Ok(name) => name,
Err(_) => { Err(_) => {
alloc_mut.finish_named_tags(tags, depth); alloc_mut.named.finish(tags, depth);
// the only error read_string can return is UnexpectedEof, so this makes it // the only error read_string can return is UnexpectedEof, so this makes it
// slightly faster // slightly faster
return Err(Error::UnexpectedEof); return Err(Error::UnexpectedEof);
@ -56,14 +56,14 @@ impl<'a> NbtCompound<'a> {
let tag = match NbtTag::read_with_type(data, alloc, tag_type, depth) { let tag = match NbtTag::read_with_type(data, alloc, tag_type, depth) {
Ok(tag) => tag, Ok(tag) => tag,
Err(e) => { Err(e) => {
alloc_mut.finish_named_tags(tags, depth); alloc_mut.named.finish(tags, depth);
return Err(e); return Err(e);
} }
}; };
tags.push((tag_name, tag)); tags.push((tag_name, tag));
} }
let alloc_mut = unsafe { alloc.get().as_mut().unwrap_unchecked() }; let alloc_mut = unsafe { alloc.get().as_mut().unwrap_unchecked() };
let values = alloc_mut.finish_named_tags(tags, depth); let values = alloc_mut.named.finish(tags, depth);
Ok(Self { values }) Ok(Self { values })
} }

View file

@ -27,12 +27,12 @@ pub enum NbtList<'a> {
Long(RawList<'a, i64>) = LONG_ID, Long(RawList<'a, i64>) = LONG_ID,
Float(RawList<'a, f32>) = FLOAT_ID, Float(RawList<'a, f32>) = FLOAT_ID,
Double(RawList<'a, f64>) = DOUBLE_ID, Double(RawList<'a, f64>) = DOUBLE_ID,
ByteArray(Vec<&'a [u8]>) = BYTE_ARRAY_ID, ByteArray(&'a [&'a [u8]]) = BYTE_ARRAY_ID,
String(Vec<&'a Mutf8Str>) = STRING_ID, String(&'a [&'a Mutf8Str]) = STRING_ID,
List(&'a [NbtList<'a>]) = LIST_ID, List(&'a [NbtList<'a>]) = LIST_ID,
Compound(&'a [NbtCompound<'a>]) = COMPOUND_ID, Compound(&'a [NbtCompound<'a>]) = COMPOUND_ID,
IntArray(Vec<RawList<'a, i32>>) = INT_ARRAY_ID, IntArray(&'a [RawList<'a, i32>]) = INT_ARRAY_ID,
LongArray(Vec<RawList<'a, i64>>) = LONG_ARRAY_ID, LongArray(&'a [RawList<'a, i64>]) = LONG_ARRAY_ID,
} }
impl<'a> NbtList<'a> { impl<'a> NbtList<'a> {
pub fn read( pub fn read(
@ -57,77 +57,105 @@ impl<'a> NbtList<'a> {
DOUBLE_ID => NbtList::Double(RawList::new(read_with_u32_length(data, 8)?)), DOUBLE_ID => NbtList::Double(RawList::new(read_with_u32_length(data, 8)?)),
BYTE_ARRAY_ID => NbtList::ByteArray({ BYTE_ARRAY_ID => NbtList::ByteArray({
let length = read_u32(data)?; let length = read_u32(data)?;
// arbitrary number to prevent big allocations
let mut arrays = Vec::with_capacity(length.min(128) as usize);
for _ in 0..length {
arrays.push(read_u8_array(data)?)
}
arrays
}),
STRING_ID => NbtList::String({
let length = read_u32(data)?;
// arbitrary number to prevent big allocations
let mut strings = Vec::with_capacity(length.min(128) as usize);
for _ in 0..length {
strings.push(read_string(data)?)
}
strings
}),
LIST_ID => NbtList::List({
let length = read_u32(data)?;
// arbitrary number to prevent big allocations
// let mut lists = Vec::with_capacity(length.min(128) as usize);
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() }; let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.start_unnamed_list_tags(depth); let mut tags = alloc_mut.unnamed_bytearray.start(depth);
for _ in 0..length { for _ in 0..length {
let tag = match NbtList::read(data, alloc, depth + 1) { let tag = match read_u8_array(data) {
Ok(tag) => tag, Ok(tag) => tag,
Err(e) => { Err(e) => {
alloc_mut.finish_unnamed_list_tags(tags, depth); alloc_mut.unnamed_bytearray.finish(tags, depth);
return Err(e);
}
};
tags.push(tag)
}
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.finish_unnamed_list_tags(tags, depth)
}),
COMPOUND_ID => NbtList::Compound({
let length = read_u32(data)?;
// arbitrary number to prevent big allocations
// let mut compounds = Vec::with_capacity(length.min(128) as usize);
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.start_unnamed_compound_tags(depth);
for _ in 0..length {
let tag = match NbtCompound::read_with_depth(data, alloc, depth + 1) {
Ok(tag) => tag,
Err(e) => {
alloc_mut.finish_unnamed_compound_tags(tags, depth);
return Err(e); return Err(e);
} }
}; };
tags.push(tag); tags.push(tag);
} }
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() }; let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.finish_unnamed_compound_tags(tags, depth) alloc_mut.unnamed_bytearray.finish(tags, depth)
}),
STRING_ID => NbtList::String({
let length = read_u32(data)?;
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.unnamed_string.start(depth);
for _ in 0..length {
let tag = match read_string(data) {
Ok(tag) => tag,
Err(e) => {
alloc_mut.unnamed_string.finish(tags, depth);
return Err(e);
}
};
tags.push(tag);
}
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.unnamed_string.finish(tags, depth)
}),
LIST_ID => NbtList::List({
let length = read_u32(data)?;
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.unnamed_list.start(depth);
for _ in 0..length {
let tag = match NbtList::read(data, alloc, depth + 1) {
Ok(tag) => tag,
Err(e) => {
alloc_mut.unnamed_list.finish(tags, depth);
return Err(e);
}
};
tags.push(tag)
}
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.unnamed_list.finish(tags, depth)
}),
COMPOUND_ID => NbtList::Compound({
let length = read_u32(data)?;
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut tags = alloc_mut.unnamed_compound.start(depth);
for _ in 0..length {
let tag = match NbtCompound::read_with_depth(data, alloc, depth + 1) {
Ok(tag) => tag,
Err(e) => {
alloc_mut.unnamed_compound.finish(tags, depth);
return Err(e);
}
};
tags.push(tag);
}
let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.unnamed_compound.finish(tags, depth)
}), }),
INT_ARRAY_ID => NbtList::IntArray({ INT_ARRAY_ID => NbtList::IntArray({
let length = read_u32(data)?; let length = read_u32(data)?;
// arbitrary number to prevent big allocations let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut arrays = Vec::with_capacity(length.min(128) as usize); let mut tags = alloc_mut.unnamed_intarray.start(depth);
for _ in 0..length { for _ in 0..length {
arrays.push(read_int_array(data)?) let tag = match read_int_array(data) {
Ok(tag) => tag,
Err(e) => {
alloc_mut.unnamed_intarray.finish(tags, depth);
return Err(e);
}
};
tags.push(tag);
} }
arrays let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.unnamed_intarray.finish(tags, depth)
}), }),
LONG_ARRAY_ID => NbtList::LongArray({ LONG_ARRAY_ID => NbtList::LongArray({
let length = read_u32(data)?; let length = read_u32(data)?;
// arbitrary number to prevent big allocations let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
let mut arrays = Vec::with_capacity(length.min(128) as usize); let mut tags = alloc_mut.unnamed_longarray.start(depth);
for _ in 0..length { for _ in 0..length {
arrays.push(read_long_array(data)?) let tag = match read_long_array(data) {
Ok(tag) => tag,
Err(e) => {
alloc_mut.unnamed_longarray.finish(tags, depth);
return Err(e);
}
};
tags.push(tag);
} }
arrays let alloc_mut = unsafe { alloc.get().as_mut().unwrap() };
alloc_mut.unnamed_longarray.finish(tags, depth)
}), }),
_ => return Err(Error::UnknownTagId(tag_type)), _ => return Err(Error::UnknownTagId(tag_type)),
}) })
@ -179,7 +207,7 @@ impl<'a> NbtList<'a> {
} }
NbtList::String(strings) => { NbtList::String(strings) => {
write_u32(data, strings.len() as u32); write_u32(data, strings.len() as u32);
for string in strings { for string in *strings {
write_string(data, string); write_string(data, string);
} }
} }
@ -194,13 +222,13 @@ impl<'a> NbtList<'a> {
} }
NbtList::IntArray(int_arrays) => { NbtList::IntArray(int_arrays) => {
write_u32(data, int_arrays.len() as u32); write_u32(data, int_arrays.len() as u32);
for array in int_arrays { for array in *int_arrays {
write_with_u32_length(data, 4, array.as_big_endian()); write_with_u32_length(data, 4, array.as_big_endian());
} }
} }
NbtList::LongArray(long_arrays) => { NbtList::LongArray(long_arrays) => {
write_u32(data, long_arrays.len() as u32); write_u32(data, long_arrays.len() as u32);
for array in long_arrays { for array in *long_arrays {
write_with_u32_length(data, 8, array.as_big_endian()); write_with_u32_length(data, 8, array.as_big_endian());
} }
} }
@ -253,7 +281,7 @@ impl<'a> NbtList<'a> {
_ => None, _ => None,
} }
} }
pub fn byte_arrays(&self) -> Option<&Vec<&[u8]>> { pub fn byte_arrays(&self) -> Option<&[&[u8]]> {
match self { match self {
NbtList::ByteArray(byte_arrays) => Some(byte_arrays), NbtList::ByteArray(byte_arrays) => Some(byte_arrays),
_ => None, _ => None,

View file

@ -21,7 +21,7 @@ use std::{
ptr::NonNull, ptr::NonNull,
}; };
use crate::Mutf8Str; use crate::{raw_list::RawList, Mutf8Str};
use super::{NbtCompound, NbtList, NbtTag}; use super::{NbtCompound, NbtList, NbtTag};
@ -30,152 +30,71 @@ const MIN_ALLOC_SIZE: usize = 1024;
#[derive(Default)] #[derive(Default)]
pub struct TagAllocator<'a> { pub struct TagAllocator<'a> {
// it's a vec because of the depth thing mentioned earlier, index in the vec = depth pub named: IndividualTagAllocator<(&'a Mutf8Str, NbtTag<'a>)>,
named_tags: Vec<TagsAllocation<(&'a Mutf8Str, NbtTag<'a>)>>,
// we also have to keep track of old allocations so we can deallocate them later
previous_named_tags: Vec<Vec<TagsAllocation<(&'a Mutf8Str, NbtTag<'a>)>>>,
// so remember earlier when i said the depth thing is only necessary because compounds aren't // so remember earlier when i said the depth thing is only necessary because compounds aren't
// length prefixed? ... well soooo i decided to make arrays store per-depth separately too to // length prefixed? ... well soooo i decided to make arrays store per-depth separately too to
// avoid exploits where an array with a big length is sent to force it to immediately allocate // avoid exploits where an array with a big length is sent to force it to immediately allocate
// a lot // a lot
unnamed_list_tags: Vec<TagsAllocation<NbtList<'a>>>, pub unnamed_list: IndividualTagAllocator<NbtList<'a>>,
previous_unnamed_list_tags: Vec<Vec<TagsAllocation<NbtList<'a>>>>, pub unnamed_compound: IndividualTagAllocator<NbtCompound<'a>>,
pub unnamed_bytearray: IndividualTagAllocator<&'a [u8]>,
unnamed_compound_tags: Vec<TagsAllocation<NbtCompound<'a>>>, pub unnamed_string: IndividualTagAllocator<&'a Mutf8Str>,
previous_unnamed_compound_tags: Vec<Vec<TagsAllocation<NbtCompound<'a>>>>, pub unnamed_intarray: IndividualTagAllocator<RawList<'a, i32>>,
pub unnamed_longarray: IndividualTagAllocator<RawList<'a, i64>>,
} }
impl<'a> TagAllocator<'a> { impl<'a> TagAllocator<'a> {
pub fn new() -> Self { pub fn new() -> Self {
Self::default() Self::default()
} }
}
pub fn start_named_tags( pub struct IndividualTagAllocator<T> {
&mut self, // it's a vec because of the depth thing mentioned earlier, index in the vec = depth
depth: usize, current: Vec<TagsAllocation<T>>,
) -> ContiguousTagsAllocator<(&'a Mutf8Str, NbtTag<'a>)> { // we also have to keep track of old allocations so we can deallocate them later
start_allocating_tags_with_depth(depth, &mut self.named_tags, &mut self.previous_named_tags) previous: Vec<Vec<TagsAllocation<T>>>,
} }
pub fn finish_named_tags( impl<T> IndividualTagAllocator<T>
&mut self, where
alloc: ContiguousTagsAllocator<(&'a Mutf8Str, NbtTag<'a>)>, T: Clone,
depth: usize, {
) -> &'a [(&'a Mutf8Str, NbtTag)] { pub fn start(&mut self, depth: usize) -> ContiguousTagsAllocator<T> {
finish_allocating_tags_with_depth( // make sure we have enough space for this depth
alloc, // (also note that depth is reused for compounds and arrays so we might have to push
depth, // more than once)
&mut self.named_tags, for _ in self.current.len()..=depth {
&mut self.previous_named_tags, self.current.push(Default::default());
) self.previous.push(Default::default());
} }
pub fn start_unnamed_list_tags( let alloc = self.current[depth].clone();
&mut self,
depth: usize,
) -> ContiguousTagsAllocator<NbtList<'a>> {
start_allocating_tags_with_depth(
depth,
&mut self.unnamed_list_tags,
&mut self.previous_unnamed_list_tags,
)
}
pub fn finish_unnamed_list_tags(
&mut self,
alloc: ContiguousTagsAllocator<NbtList<'a>>,
depth: usize,
) -> &'a [NbtList<'a>] {
finish_allocating_tags_with_depth(
alloc,
depth,
&mut self.unnamed_list_tags,
&mut self.previous_unnamed_list_tags,
)
}
pub fn start_unnamed_compound_tags( start_allocating_tags(alloc)
&mut self,
depth: usize,
) -> ContiguousTagsAllocator<NbtCompound<'a>> {
start_allocating_tags_with_depth(
depth,
&mut self.unnamed_compound_tags,
&mut self.previous_unnamed_compound_tags,
)
} }
pub fn finish_unnamed_compound_tags( pub fn finish<'a>(&mut self, alloc: ContiguousTagsAllocator<T>, depth: usize) -> &'a [T] {
&mut self, finish_allocating_tags(alloc, &mut self.current[depth], &mut self.previous[depth])
alloc: ContiguousTagsAllocator<NbtCompound<'a>>,
depth: usize,
) -> &'a [NbtCompound<'a>] {
finish_allocating_tags_with_depth(
alloc,
depth,
&mut self.unnamed_compound_tags,
&mut self.previous_unnamed_compound_tags,
)
} }
} }
impl Drop for TagAllocator<'_> { impl<T> Default for IndividualTagAllocator<T> {
fn default() -> Self {
Self {
current: Default::default(),
previous: Default::default(),
}
}
}
impl<T> Drop for IndividualTagAllocator<T> {
fn drop(&mut self) { fn drop(&mut self) {
self.named_tags self.current.iter_mut().for_each(TagsAllocation::deallocate);
.iter_mut() self.previous
.for_each(TagsAllocation::deallocate);
self.previous_named_tags
.iter_mut()
.flatten()
.for_each(TagsAllocation::deallocate);
self.unnamed_list_tags
.iter_mut()
.for_each(TagsAllocation::deallocate);
self.previous_unnamed_list_tags
.iter_mut()
.flatten()
.for_each(TagsAllocation::deallocate);
self.unnamed_compound_tags
.iter_mut()
.for_each(TagsAllocation::deallocate);
self.previous_unnamed_compound_tags
.iter_mut() .iter_mut()
.flatten() .flatten()
.for_each(TagsAllocation::deallocate); .for_each(TagsAllocation::deallocate);
} }
} }
pub fn start_allocating_tags_with_depth<T>(
depth: usize,
tags: &mut Vec<TagsAllocation<T>>,
previous_allocs: &mut Vec<Vec<TagsAllocation<T>>>,
) -> ContiguousTagsAllocator<T>
where
T: Clone,
{
// make sure we have enough space for this depth
// (also note that depth is reused for compounds and arrays so we might have to push
// more than once)
for _ in tags.len()..=depth {
tags.push(Default::default());
previous_allocs.push(Default::default());
}
let alloc = tags[depth].clone();
start_allocating_tags(alloc)
}
fn finish_allocating_tags_with_depth<'a, T>(
alloc: ContiguousTagsAllocator<T>,
depth: usize,
tags: &mut [TagsAllocation<T>],
previous_allocs: &mut [Vec<TagsAllocation<T>>],
) -> &'a [T]
where
T: Clone,
{
finish_allocating_tags(alloc, &mut tags[depth], &mut previous_allocs[depth])
}
fn start_allocating_tags<T>(alloc: TagsAllocation<T>) -> ContiguousTagsAllocator<T> { fn start_allocating_tags<T>(alloc: TagsAllocation<T>) -> ContiguousTagsAllocator<T> {
let is_new_allocation = alloc.cap == 0; let is_new_allocation = alloc.cap == 0;
ContiguousTagsAllocator { ContiguousTagsAllocator {