1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 15:36:03 +00:00

more optimal string skipping in read_tag_in_compound

This commit is contained in:
mat 2025-01-18 08:06:58 +00:00
parent bdf55aab6e
commit d4f50f2420
6 changed files with 84 additions and 60 deletions

View file

@ -2,10 +2,10 @@ use std::{hint::unreachable_unchecked, mem::MaybeUninit};
use crate::{
common::{
read_int_array, read_long_array, read_string, read_with_u32_length, unchecked_extend,
unchecked_push, unchecked_write_string, write_string, BYTE_ARRAY_ID, BYTE_ID, COMPOUND_ID,
DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID, LONG_ID,
MAX_DEPTH, SHORT_ID, STRING_ID,
extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string,
read_with_u32_length, skip_string, write_string, write_string_unchecked, BYTE_ARRAY_ID,
BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID,
LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
},
error::NonRootError,
reader::Reader,
@ -54,8 +54,8 @@ impl<'a: 'tape, 'tape> NbtCompound<'a, 'tape> {
// SAFETY: We just reserved enough space for the tag ID, the name length, the name, and
// 4 bytes of tag data.
unsafe {
unchecked_push(data, tag.id());
unchecked_write_string(data, name);
push_unchecked(data, tag.id());
write_string_unchecked(data, name);
}
write_tag(tag, data);
@ -403,7 +403,7 @@ pub(crate) fn read_tag_in_compound<'a>(
let tag_name_ptr = data.cur;
debug_assert_eq!(tag_name_ptr as u64 >> 56, 0);
read_string(data)?;
skip_string(data)?;
tapes.main.push(TapeElement::new(tag_name_ptr as u64));
read_tag(data, tapes, stack, tag_type)
@ -433,19 +433,19 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
let el = tag.element();
match el.kind() {
TapeTagKind::Byte => unsafe {
unchecked_push(data, tag.byte().unwrap() as u8);
push_unchecked(data, tag.byte().unwrap() as u8);
},
TapeTagKind::Short => unsafe {
unchecked_extend(data, &tag.short().unwrap().to_be_bytes());
extend_unchecked(data, &tag.short().unwrap().to_be_bytes());
},
TapeTagKind::Int => unsafe {
unchecked_extend(data, &tag.int().unwrap().to_be_bytes());
extend_unchecked(data, &tag.int().unwrap().to_be_bytes());
},
TapeTagKind::Long => {
data.extend_from_slice(&tag.long().unwrap().to_be_bytes());
}
TapeTagKind::Float => unsafe {
unchecked_extend(data, &tag.float().unwrap().to_be_bytes());
extend_unchecked(data, &tag.float().unwrap().to_be_bytes());
},
TapeTagKind::Double => {
data.extend_from_slice(&tag.double().unwrap().to_be_bytes());
@ -453,7 +453,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
TapeTagKind::ByteArray => {
let byte_array = tag.byte_array().unwrap();
unsafe {
unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes());
extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes());
}
data.extend_from_slice(byte_array);
}
@ -471,7 +471,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
let int_array =
unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i32>(el.ptr()).unwrap() };
unsafe {
unchecked_extend(data, &(int_array.len() as u32).to_be_bytes());
extend_unchecked(data, &(int_array.len() as u32).to_be_bytes());
}
data.extend_from_slice(int_array.as_big_endian());
}
@ -479,7 +479,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
let long_array =
unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i64>(el.ptr()).unwrap() };
unsafe {
unchecked_extend(data, &(long_array.len() as u32).to_be_bytes());
extend_unchecked(data, &(long_array.len() as u32).to_be_bytes());
}
data.extend_from_slice(long_array.as_big_endian());
}

View file

@ -30,7 +30,7 @@ pub fn read_with_u16_length<'a>(
width: usize,
) -> Result<&'a [u8], UnexpectedEofError> {
let length = data.read_u16()?;
let length_in_bytes = length as usize * width;
let length_in_bytes: usize = length as usize * width;
data.read_slice(length_in_bytes)
}
@ -49,6 +49,25 @@ pub fn read_string<'a>(data: &mut Reader<'a>) -> Result<&'a Mutf8Str, Unexpected
Ok(Mutf8Str::from_slice(data))
}
pub fn skip_string(data: &mut Reader<'_>) -> Result<(), UnexpectedEofError> {
let cur_addr = data.cur_addr();
let end_addr = data.end_addr();
if cur_addr + 2 > end_addr {
return Err(UnexpectedEofError);
}
let length = unsafe { data.read_type_unchecked::<u16>() }.to_be();
let length_in_bytes: usize = length as usize;
if cur_addr + 2 + length_in_bytes > end_addr {
return Err(UnexpectedEofError);
}
unsafe { data.skip_unchecked(length_in_bytes) };
Ok(())
}
pub fn read_u8_array<'a>(data: &mut Reader<'a>) -> Result<&'a [u8], UnexpectedEofError> {
read_with_u32_length(data, 1)
}
@ -79,8 +98,8 @@ pub fn write_with_u32_length(data: &mut Vec<u8>, width: usize, value: &[u8]) {
let length = value.len() / width;
data.reserve(4 + value.len());
unsafe {
unchecked_extend(data, &(length as u32).to_be_bytes());
unchecked_extend(data, value);
extend_unchecked(data, &(length as u32).to_be_bytes());
extend_unchecked(data, value);
}
}
@ -91,7 +110,7 @@ pub fn write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
data.reserve(2 + value.len());
// SAFETY: We reserved enough capacity
unsafe {
unchecked_write_string(data, value);
write_string_unchecked(data, value);
}
}
/// Write a string to a Vec<u8> without checking if the Vec has enough capacity.
@ -101,9 +120,9 @@ pub fn write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
///
/// You must reserve enough capacity (2 + value.len()) in the Vec before calling this function.
#[inline]
pub unsafe fn unchecked_write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
unchecked_extend(data, &(value.len() as u16).to_be_bytes());
unchecked_extend(data, value.as_bytes());
pub unsafe fn write_string_unchecked(data: &mut Vec<u8>, value: &Mutf8Str) {
extend_unchecked(data, &(value.len() as u16).to_be_bytes());
extend_unchecked(data, value.as_bytes());
}
/// Extend a Vec<u8> with a slice of u8 without checking if the Vec has enough capacity.
@ -114,7 +133,7 @@ pub unsafe fn unchecked_write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
///
/// You must reserve enough capacity in the Vec before calling this function.
#[inline]
pub unsafe fn unchecked_extend(data: &mut Vec<u8>, value: &[u8]) {
pub unsafe fn extend_unchecked(data: &mut Vec<u8>, value: &[u8]) {
let ptr = data.as_mut_ptr();
let len = data.len();
std::ptr::copy_nonoverlapping(value.as_ptr(), ptr.add(len), value.len());
@ -122,7 +141,7 @@ pub unsafe fn unchecked_extend(data: &mut Vec<u8>, value: &[u8]) {
}
#[inline]
pub unsafe fn unchecked_push(data: &mut Vec<u8>, value: u8) {
pub unsafe fn push_unchecked(data: &mut Vec<u8>, value: u8) {
let ptr = data.as_mut_ptr();
let len = data.len();
std::ptr::write(ptr.add(len), value);

View file

@ -1,7 +1,7 @@
use std::mem::{self, MaybeUninit};
use crate::{
common::{read_string, unchecked_push, unchecked_write_string, END_ID, MAX_DEPTH},
common::{push_unchecked, read_string, write_string_unchecked, END_ID, MAX_DEPTH},
error::NonRootError,
mutf8::Mutf8String,
reader::Reader,
@ -84,9 +84,9 @@ impl NbtCompound {
// SAFETY: We just reserved enough space for the tag ID, the name length, the name, and
// 4 bytes of tag data.
unsafe {
unchecked_push(data, tag.id());
unchecked_write_string(data, name);
tag.unchecked_write_without_tag_type(data);
push_unchecked(data, tag.id());
write_string_unchecked(data, name);
tag.write_without_tag_type_unchecked(data);
}
}
data.push(END_ID);

View file

@ -1,8 +1,8 @@
use crate::{
common::{
read_i8_array, read_int_array, read_long_array, read_string, read_u8_array,
read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, unchecked_extend,
unchecked_push, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID,
read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, extend_unchecked,
push_unchecked, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID,
COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID,
LONG_ID, SHORT_ID, STRING_ID,
},
@ -117,8 +117,8 @@ impl NbtList {
data.reserve(5);
// SAFETY: we just reserved 5 bytes
unsafe {
unchecked_push(data, COMPOUND_ID);
unchecked_extend(data, &(compounds.len() as u32).to_be_bytes());
push_unchecked(data, COMPOUND_ID);
extend_unchecked(data, &(compounds.len() as u32).to_be_bytes());
}
for compound in compounds {
compound.write(data);

View file

@ -8,10 +8,10 @@ use std::{io::Cursor, ops::Deref};
use crate::{
common::{
read_int_array, read_long_array, read_string, read_with_u32_length,
slice_into_u8_big_endian, unchecked_extend, unchecked_push, write_string, BYTE_ARRAY_ID,
BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID,
LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string,
read_with_u32_length, slice_into_u8_big_endian, write_string, BYTE_ARRAY_ID, BYTE_ID,
COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID,
LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
},
error::NonRootError,
mutf8::Mutf8String,
@ -325,29 +325,29 @@ impl NbtTag {
/// This function is unsafe because it doesn't check that there's enough space in the data.
/// 4 bytes MUST be reserved before calling this function.
#[inline]
unsafe fn unchecked_write_without_tag_type(&self, data: &mut Vec<u8>) {
unsafe fn write_without_tag_type_unchecked(&self, data: &mut Vec<u8>) {
match self {
NbtTag::Byte(byte) => unsafe {
unchecked_push(data, *byte as u8);
push_unchecked(data, *byte as u8);
},
NbtTag::Short(short) => unsafe {
unchecked_extend(data, &short.to_be_bytes());
extend_unchecked(data, &short.to_be_bytes());
},
NbtTag::Int(int) => unsafe {
unchecked_extend(data, &int.to_be_bytes());
extend_unchecked(data, &int.to_be_bytes());
},
NbtTag::Long(long) => {
data.extend_from_slice(&long.to_be_bytes());
}
NbtTag::Float(float) => unsafe {
unchecked_extend(data, &float.to_be_bytes());
extend_unchecked(data, &float.to_be_bytes());
},
NbtTag::Double(double) => {
data.extend_from_slice(&double.to_be_bytes());
}
NbtTag::ByteArray(byte_array) => {
unsafe {
unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes());
extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes());
}
data.extend_from_slice(byte_array);
}
@ -362,13 +362,13 @@ impl NbtTag {
}
NbtTag::IntArray(int_array) => {
unsafe {
unchecked_extend(data, &(int_array.len() as u32).to_be_bytes());
extend_unchecked(data, &(int_array.len() as u32).to_be_bytes());
}
data.extend_from_slice(&slice_into_u8_big_endian(int_array));
}
NbtTag::LongArray(long_array) => {
unsafe {
unchecked_extend(data, &(long_array.len() as u32).to_be_bytes());
extend_unchecked(data, &(long_array.len() as u32).to_be_bytes());
}
data.extend_from_slice(&slice_into_u8_big_endian(long_array));
}
@ -379,8 +379,8 @@ impl NbtTag {
data.reserve(1 + 4);
// SAFETY: We just reserved enough space for the tag ID and 4 bytes of tag data.
unsafe {
unchecked_push(data, self.id());
self.unchecked_write_without_tag_type(data);
push_unchecked(data, self.id());
self.write_without_tag_type_unchecked(data);
}
}

View file

@ -24,6 +24,15 @@ impl<'a> Reader<'a> {
}
}
#[inline]
pub fn cur_addr(&self) -> usize {
self.cur as usize
}
#[inline]
pub fn end_addr(&self) -> usize {
self.end as usize
}
pub fn ensure_can_read(&self, size: usize) -> Result<(), UnexpectedEofError> {
let data_addr = self.cur as usize;
let end_addr = self.end as usize;
@ -35,7 +44,7 @@ impl<'a> Reader<'a> {
}
#[inline]
pub unsafe fn unchecked_read_type<T>(&mut self) -> T {
pub unsafe fn read_type_unchecked<T>(&mut self) -> T {
let value = unsafe { self.cur.cast::<T>().read_unaligned() };
self.cur = unsafe { self.cur.add(mem::size_of::<T>()) };
value
@ -43,7 +52,7 @@ impl<'a> Reader<'a> {
pub fn read_type<T: Copy>(&mut self) -> Result<T, UnexpectedEofError> {
self.ensure_can_read(mem::size_of::<T>())?;
Ok(unsafe { self.unchecked_read_type() })
Ok(unsafe { self.read_type_unchecked() })
}
#[inline]
@ -57,10 +66,7 @@ impl<'a> Reader<'a> {
#[inline]
pub fn read_u16(&mut self) -> Result<u16, UnexpectedEofError> {
let value = self.read_type::<u16>();
#[cfg(target_endian = "little")]
let value = value.map(u16::swap_bytes);
value
self.read_type::<u16>().map(u16::to_be)
}
#[inline]
pub fn read_i16(&mut self) -> Result<i16, UnexpectedEofError> {
@ -69,10 +75,7 @@ impl<'a> Reader<'a> {
#[inline]
pub fn read_u32(&mut self) -> Result<u32, UnexpectedEofError> {
let value = self.read_type::<u32>();
#[cfg(target_endian = "little")]
let value = value.map(u32::swap_bytes);
value
self.read_type::<u32>().map(u32::to_be)
}
#[inline]
pub fn read_i32(&mut self) -> Result<i32, UnexpectedEofError> {
@ -81,10 +84,7 @@ impl<'a> Reader<'a> {
#[inline]
pub fn read_u64(&mut self) -> Result<u64, UnexpectedEofError> {
let value = self.read_type::<u64>();
#[cfg(target_endian = "little")]
let value = value.map(u64::swap_bytes);
value
self.read_type::<u64>().map(u64::to_be)
}
#[inline]
pub fn read_i64(&mut self) -> Result<i64, UnexpectedEofError> {
@ -108,11 +108,16 @@ impl<'a> Reader<'a> {
Ok(())
}
#[inline]
pub unsafe fn skip_unchecked(&mut self, size: usize) {
self.cur = unsafe { self.cur.add(size) };
}
#[inline]
pub fn read_slice(&mut self, size: usize) -> Result<&'a [u8], UnexpectedEofError> {
self.ensure_can_read(size)?;
let slice = unsafe { std::slice::from_raw_parts(self.cur, size) };
self.cur = unsafe { self.cur.add(size) };
unsafe { self.skip_unchecked(size) };
Ok(slice)
}
}