1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 15:36:03 +00:00

more optimal string skipping in read_tag_in_compound

This commit is contained in:
mat 2025-01-18 08:06:58 +00:00
parent bdf55aab6e
commit d4f50f2420
6 changed files with 84 additions and 60 deletions

View file

@ -2,10 +2,10 @@ use std::{hint::unreachable_unchecked, mem::MaybeUninit};
use crate::{ use crate::{
common::{ common::{
read_int_array, read_long_array, read_string, read_with_u32_length, unchecked_extend, extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string,
unchecked_push, unchecked_write_string, write_string, BYTE_ARRAY_ID, BYTE_ID, COMPOUND_ID, read_with_u32_length, skip_string, write_string, write_string_unchecked, BYTE_ARRAY_ID,
DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID, LONG_ID, BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID,
MAX_DEPTH, SHORT_ID, STRING_ID, LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
}, },
error::NonRootError, error::NonRootError,
reader::Reader, reader::Reader,
@ -54,8 +54,8 @@ impl<'a: 'tape, 'tape> NbtCompound<'a, 'tape> {
// SAFETY: We just reserved enough space for the tag ID, the name length, the name, and // SAFETY: We just reserved enough space for the tag ID, the name length, the name, and
// 4 bytes of tag data. // 4 bytes of tag data.
unsafe { unsafe {
unchecked_push(data, tag.id()); push_unchecked(data, tag.id());
unchecked_write_string(data, name); write_string_unchecked(data, name);
} }
write_tag(tag, data); write_tag(tag, data);
@ -403,7 +403,7 @@ pub(crate) fn read_tag_in_compound<'a>(
let tag_name_ptr = data.cur; let tag_name_ptr = data.cur;
debug_assert_eq!(tag_name_ptr as u64 >> 56, 0); debug_assert_eq!(tag_name_ptr as u64 >> 56, 0);
read_string(data)?; skip_string(data)?;
tapes.main.push(TapeElement::new(tag_name_ptr as u64)); tapes.main.push(TapeElement::new(tag_name_ptr as u64));
read_tag(data, tapes, stack, tag_type) read_tag(data, tapes, stack, tag_type)
@ -433,19 +433,19 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
let el = tag.element(); let el = tag.element();
match el.kind() { match el.kind() {
TapeTagKind::Byte => unsafe { TapeTagKind::Byte => unsafe {
unchecked_push(data, tag.byte().unwrap() as u8); push_unchecked(data, tag.byte().unwrap() as u8);
}, },
TapeTagKind::Short => unsafe { TapeTagKind::Short => unsafe {
unchecked_extend(data, &tag.short().unwrap().to_be_bytes()); extend_unchecked(data, &tag.short().unwrap().to_be_bytes());
}, },
TapeTagKind::Int => unsafe { TapeTagKind::Int => unsafe {
unchecked_extend(data, &tag.int().unwrap().to_be_bytes()); extend_unchecked(data, &tag.int().unwrap().to_be_bytes());
}, },
TapeTagKind::Long => { TapeTagKind::Long => {
data.extend_from_slice(&tag.long().unwrap().to_be_bytes()); data.extend_from_slice(&tag.long().unwrap().to_be_bytes());
} }
TapeTagKind::Float => unsafe { TapeTagKind::Float => unsafe {
unchecked_extend(data, &tag.float().unwrap().to_be_bytes()); extend_unchecked(data, &tag.float().unwrap().to_be_bytes());
}, },
TapeTagKind::Double => { TapeTagKind::Double => {
data.extend_from_slice(&tag.double().unwrap().to_be_bytes()); data.extend_from_slice(&tag.double().unwrap().to_be_bytes());
@ -453,7 +453,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
TapeTagKind::ByteArray => { TapeTagKind::ByteArray => {
let byte_array = tag.byte_array().unwrap(); let byte_array = tag.byte_array().unwrap();
unsafe { unsafe {
unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes()); extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes());
} }
data.extend_from_slice(byte_array); data.extend_from_slice(byte_array);
} }
@ -471,7 +471,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
let int_array = let int_array =
unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i32>(el.ptr()).unwrap() }; unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i32>(el.ptr()).unwrap() };
unsafe { unsafe {
unchecked_extend(data, &(int_array.len() as u32).to_be_bytes()); extend_unchecked(data, &(int_array.len() as u32).to_be_bytes());
} }
data.extend_from_slice(int_array.as_big_endian()); data.extend_from_slice(int_array.as_big_endian());
} }
@ -479,7 +479,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
let long_array = let long_array =
unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i64>(el.ptr()).unwrap() }; unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i64>(el.ptr()).unwrap() };
unsafe { unsafe {
unchecked_extend(data, &(long_array.len() as u32).to_be_bytes()); extend_unchecked(data, &(long_array.len() as u32).to_be_bytes());
} }
data.extend_from_slice(long_array.as_big_endian()); data.extend_from_slice(long_array.as_big_endian());
} }

View file

@ -30,7 +30,7 @@ pub fn read_with_u16_length<'a>(
width: usize, width: usize,
) -> Result<&'a [u8], UnexpectedEofError> { ) -> Result<&'a [u8], UnexpectedEofError> {
let length = data.read_u16()?; let length = data.read_u16()?;
let length_in_bytes = length as usize * width; let length_in_bytes: usize = length as usize * width;
data.read_slice(length_in_bytes) data.read_slice(length_in_bytes)
} }
@ -49,6 +49,25 @@ pub fn read_string<'a>(data: &mut Reader<'a>) -> Result<&'a Mutf8Str, Unexpected
Ok(Mutf8Str::from_slice(data)) Ok(Mutf8Str::from_slice(data))
} }
pub fn skip_string(data: &mut Reader<'_>) -> Result<(), UnexpectedEofError> {
let cur_addr = data.cur_addr();
let end_addr = data.end_addr();
if cur_addr + 2 > end_addr {
return Err(UnexpectedEofError);
}
let length = unsafe { data.read_type_unchecked::<u16>() }.to_be();
let length_in_bytes: usize = length as usize;
if cur_addr + 2 + length_in_bytes > end_addr {
return Err(UnexpectedEofError);
}
unsafe { data.skip_unchecked(length_in_bytes) };
Ok(())
}
pub fn read_u8_array<'a>(data: &mut Reader<'a>) -> Result<&'a [u8], UnexpectedEofError> { pub fn read_u8_array<'a>(data: &mut Reader<'a>) -> Result<&'a [u8], UnexpectedEofError> {
read_with_u32_length(data, 1) read_with_u32_length(data, 1)
} }
@ -79,8 +98,8 @@ pub fn write_with_u32_length(data: &mut Vec<u8>, width: usize, value: &[u8]) {
let length = value.len() / width; let length = value.len() / width;
data.reserve(4 + value.len()); data.reserve(4 + value.len());
unsafe { unsafe {
unchecked_extend(data, &(length as u32).to_be_bytes()); extend_unchecked(data, &(length as u32).to_be_bytes());
unchecked_extend(data, value); extend_unchecked(data, value);
} }
} }
@ -91,7 +110,7 @@ pub fn write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
data.reserve(2 + value.len()); data.reserve(2 + value.len());
// SAFETY: We reserved enough capacity // SAFETY: We reserved enough capacity
unsafe { unsafe {
unchecked_write_string(data, value); write_string_unchecked(data, value);
} }
} }
/// Write a string to a Vec<u8> without checking if the Vec has enough capacity. /// Write a string to a Vec<u8> without checking if the Vec has enough capacity.
@ -101,9 +120,9 @@ pub fn write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
/// ///
/// You must reserve enough capacity (2 + value.len()) in the Vec before calling this function. /// You must reserve enough capacity (2 + value.len()) in the Vec before calling this function.
#[inline] #[inline]
pub unsafe fn unchecked_write_string(data: &mut Vec<u8>, value: &Mutf8Str) { pub unsafe fn write_string_unchecked(data: &mut Vec<u8>, value: &Mutf8Str) {
unchecked_extend(data, &(value.len() as u16).to_be_bytes()); extend_unchecked(data, &(value.len() as u16).to_be_bytes());
unchecked_extend(data, value.as_bytes()); extend_unchecked(data, value.as_bytes());
} }
/// Extend a Vec<u8> with a slice of u8 without checking if the Vec has enough capacity. /// Extend a Vec<u8> with a slice of u8 without checking if the Vec has enough capacity.
@ -114,7 +133,7 @@ pub unsafe fn unchecked_write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
/// ///
/// You must reserve enough capacity in the Vec before calling this function. /// You must reserve enough capacity in the Vec before calling this function.
#[inline] #[inline]
pub unsafe fn unchecked_extend(data: &mut Vec<u8>, value: &[u8]) { pub unsafe fn extend_unchecked(data: &mut Vec<u8>, value: &[u8]) {
let ptr = data.as_mut_ptr(); let ptr = data.as_mut_ptr();
let len = data.len(); let len = data.len();
std::ptr::copy_nonoverlapping(value.as_ptr(), ptr.add(len), value.len()); std::ptr::copy_nonoverlapping(value.as_ptr(), ptr.add(len), value.len());
@ -122,7 +141,7 @@ pub unsafe fn unchecked_extend(data: &mut Vec<u8>, value: &[u8]) {
} }
#[inline] #[inline]
pub unsafe fn unchecked_push(data: &mut Vec<u8>, value: u8) { pub unsafe fn push_unchecked(data: &mut Vec<u8>, value: u8) {
let ptr = data.as_mut_ptr(); let ptr = data.as_mut_ptr();
let len = data.len(); let len = data.len();
std::ptr::write(ptr.add(len), value); std::ptr::write(ptr.add(len), value);

View file

@ -1,7 +1,7 @@
use std::mem::{self, MaybeUninit}; use std::mem::{self, MaybeUninit};
use crate::{ use crate::{
common::{read_string, unchecked_push, unchecked_write_string, END_ID, MAX_DEPTH}, common::{push_unchecked, read_string, write_string_unchecked, END_ID, MAX_DEPTH},
error::NonRootError, error::NonRootError,
mutf8::Mutf8String, mutf8::Mutf8String,
reader::Reader, reader::Reader,
@ -84,9 +84,9 @@ impl NbtCompound {
// SAFETY: We just reserved enough space for the tag ID, the name length, the name, and // SAFETY: We just reserved enough space for the tag ID, the name length, the name, and
// 4 bytes of tag data. // 4 bytes of tag data.
unsafe { unsafe {
unchecked_push(data, tag.id()); push_unchecked(data, tag.id());
unchecked_write_string(data, name); write_string_unchecked(data, name);
tag.unchecked_write_without_tag_type(data); tag.write_without_tag_type_unchecked(data);
} }
} }
data.push(END_ID); data.push(END_ID);

View file

@ -1,8 +1,8 @@
use crate::{ use crate::{
common::{ common::{
read_i8_array, read_int_array, read_long_array, read_string, read_u8_array, read_i8_array, read_int_array, read_long_array, read_string, read_u8_array,
read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, unchecked_extend, read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, extend_unchecked,
unchecked_push, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID, push_unchecked, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID,
COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID,
LONG_ID, SHORT_ID, STRING_ID, LONG_ID, SHORT_ID, STRING_ID,
}, },
@ -117,8 +117,8 @@ impl NbtList {
data.reserve(5); data.reserve(5);
// SAFETY: we just reserved 5 bytes // SAFETY: we just reserved 5 bytes
unsafe { unsafe {
unchecked_push(data, COMPOUND_ID); push_unchecked(data, COMPOUND_ID);
unchecked_extend(data, &(compounds.len() as u32).to_be_bytes()); extend_unchecked(data, &(compounds.len() as u32).to_be_bytes());
} }
for compound in compounds { for compound in compounds {
compound.write(data); compound.write(data);

View file

@ -8,10 +8,10 @@ use std::{io::Cursor, ops::Deref};
use crate::{ use crate::{
common::{ common::{
read_int_array, read_long_array, read_string, read_with_u32_length, extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string,
slice_into_u8_big_endian, unchecked_extend, unchecked_push, write_string, BYTE_ARRAY_ID, read_with_u32_length, slice_into_u8_big_endian, write_string, BYTE_ARRAY_ID, BYTE_ID,
BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID,
LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
}, },
error::NonRootError, error::NonRootError,
mutf8::Mutf8String, mutf8::Mutf8String,
@ -325,29 +325,29 @@ impl NbtTag {
/// This function is unsafe because it doesn't check that there's enough space in the data. /// This function is unsafe because it doesn't check that there's enough space in the data.
/// 4 bytes MUST be reserved before calling this function. /// 4 bytes MUST be reserved before calling this function.
#[inline] #[inline]
unsafe fn unchecked_write_without_tag_type(&self, data: &mut Vec<u8>) { unsafe fn write_without_tag_type_unchecked(&self, data: &mut Vec<u8>) {
match self { match self {
NbtTag::Byte(byte) => unsafe { NbtTag::Byte(byte) => unsafe {
unchecked_push(data, *byte as u8); push_unchecked(data, *byte as u8);
}, },
NbtTag::Short(short) => unsafe { NbtTag::Short(short) => unsafe {
unchecked_extend(data, &short.to_be_bytes()); extend_unchecked(data, &short.to_be_bytes());
}, },
NbtTag::Int(int) => unsafe { NbtTag::Int(int) => unsafe {
unchecked_extend(data, &int.to_be_bytes()); extend_unchecked(data, &int.to_be_bytes());
}, },
NbtTag::Long(long) => { NbtTag::Long(long) => {
data.extend_from_slice(&long.to_be_bytes()); data.extend_from_slice(&long.to_be_bytes());
} }
NbtTag::Float(float) => unsafe { NbtTag::Float(float) => unsafe {
unchecked_extend(data, &float.to_be_bytes()); extend_unchecked(data, &float.to_be_bytes());
}, },
NbtTag::Double(double) => { NbtTag::Double(double) => {
data.extend_from_slice(&double.to_be_bytes()); data.extend_from_slice(&double.to_be_bytes());
} }
NbtTag::ByteArray(byte_array) => { NbtTag::ByteArray(byte_array) => {
unsafe { unsafe {
unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes()); extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes());
} }
data.extend_from_slice(byte_array); data.extend_from_slice(byte_array);
} }
@ -362,13 +362,13 @@ impl NbtTag {
} }
NbtTag::IntArray(int_array) => { NbtTag::IntArray(int_array) => {
unsafe { unsafe {
unchecked_extend(data, &(int_array.len() as u32).to_be_bytes()); extend_unchecked(data, &(int_array.len() as u32).to_be_bytes());
} }
data.extend_from_slice(&slice_into_u8_big_endian(int_array)); data.extend_from_slice(&slice_into_u8_big_endian(int_array));
} }
NbtTag::LongArray(long_array) => { NbtTag::LongArray(long_array) => {
unsafe { unsafe {
unchecked_extend(data, &(long_array.len() as u32).to_be_bytes()); extend_unchecked(data, &(long_array.len() as u32).to_be_bytes());
} }
data.extend_from_slice(&slice_into_u8_big_endian(long_array)); data.extend_from_slice(&slice_into_u8_big_endian(long_array));
} }
@ -379,8 +379,8 @@ impl NbtTag {
data.reserve(1 + 4); data.reserve(1 + 4);
// SAFETY: We just reserved enough space for the tag ID and 4 bytes of tag data. // SAFETY: We just reserved enough space for the tag ID and 4 bytes of tag data.
unsafe { unsafe {
unchecked_push(data, self.id()); push_unchecked(data, self.id());
self.unchecked_write_without_tag_type(data); self.write_without_tag_type_unchecked(data);
} }
} }

View file

@ -24,6 +24,15 @@ impl<'a> Reader<'a> {
} }
} }
#[inline]
pub fn cur_addr(&self) -> usize {
self.cur as usize
}
#[inline]
pub fn end_addr(&self) -> usize {
self.end as usize
}
pub fn ensure_can_read(&self, size: usize) -> Result<(), UnexpectedEofError> { pub fn ensure_can_read(&self, size: usize) -> Result<(), UnexpectedEofError> {
let data_addr = self.cur as usize; let data_addr = self.cur as usize;
let end_addr = self.end as usize; let end_addr = self.end as usize;
@ -35,7 +44,7 @@ impl<'a> Reader<'a> {
} }
#[inline] #[inline]
pub unsafe fn unchecked_read_type<T>(&mut self) -> T { pub unsafe fn read_type_unchecked<T>(&mut self) -> T {
let value = unsafe { self.cur.cast::<T>().read_unaligned() }; let value = unsafe { self.cur.cast::<T>().read_unaligned() };
self.cur = unsafe { self.cur.add(mem::size_of::<T>()) }; self.cur = unsafe { self.cur.add(mem::size_of::<T>()) };
value value
@ -43,7 +52,7 @@ impl<'a> Reader<'a> {
pub fn read_type<T: Copy>(&mut self) -> Result<T, UnexpectedEofError> { pub fn read_type<T: Copy>(&mut self) -> Result<T, UnexpectedEofError> {
self.ensure_can_read(mem::size_of::<T>())?; self.ensure_can_read(mem::size_of::<T>())?;
Ok(unsafe { self.unchecked_read_type() }) Ok(unsafe { self.read_type_unchecked() })
} }
#[inline] #[inline]
@ -57,10 +66,7 @@ impl<'a> Reader<'a> {
#[inline] #[inline]
pub fn read_u16(&mut self) -> Result<u16, UnexpectedEofError> { pub fn read_u16(&mut self) -> Result<u16, UnexpectedEofError> {
let value = self.read_type::<u16>(); self.read_type::<u16>().map(u16::to_be)
#[cfg(target_endian = "little")]
let value = value.map(u16::swap_bytes);
value
} }
#[inline] #[inline]
pub fn read_i16(&mut self) -> Result<i16, UnexpectedEofError> { pub fn read_i16(&mut self) -> Result<i16, UnexpectedEofError> {
@ -69,10 +75,7 @@ impl<'a> Reader<'a> {
#[inline] #[inline]
pub fn read_u32(&mut self) -> Result<u32, UnexpectedEofError> { pub fn read_u32(&mut self) -> Result<u32, UnexpectedEofError> {
let value = self.read_type::<u32>(); self.read_type::<u32>().map(u32::to_be)
#[cfg(target_endian = "little")]
let value = value.map(u32::swap_bytes);
value
} }
#[inline] #[inline]
pub fn read_i32(&mut self) -> Result<i32, UnexpectedEofError> { pub fn read_i32(&mut self) -> Result<i32, UnexpectedEofError> {
@ -81,10 +84,7 @@ impl<'a> Reader<'a> {
#[inline] #[inline]
pub fn read_u64(&mut self) -> Result<u64, UnexpectedEofError> { pub fn read_u64(&mut self) -> Result<u64, UnexpectedEofError> {
let value = self.read_type::<u64>(); self.read_type::<u64>().map(u64::to_be)
#[cfg(target_endian = "little")]
let value = value.map(u64::swap_bytes);
value
} }
#[inline] #[inline]
pub fn read_i64(&mut self) -> Result<i64, UnexpectedEofError> { pub fn read_i64(&mut self) -> Result<i64, UnexpectedEofError> {
@ -108,11 +108,16 @@ impl<'a> Reader<'a> {
Ok(()) Ok(())
} }
#[inline]
pub unsafe fn skip_unchecked(&mut self, size: usize) {
self.cur = unsafe { self.cur.add(size) };
}
#[inline] #[inline]
pub fn read_slice(&mut self, size: usize) -> Result<&'a [u8], UnexpectedEofError> { pub fn read_slice(&mut self, size: usize) -> Result<&'a [u8], UnexpectedEofError> {
self.ensure_can_read(size)?; self.ensure_can_read(size)?;
let slice = unsafe { std::slice::from_raw_parts(self.cur, size) }; let slice = unsafe { std::slice::from_raw_parts(self.cur, size) };
self.cur = unsafe { self.cur.add(size) }; unsafe { self.skip_unchecked(size) };
Ok(slice) Ok(slice)
} }
} }