From d4f50f2420c8170da4934ca3cc0f11fc0ffcac5c Mon Sep 17 00:00:00 2001 From: mat Date: Sat, 18 Jan 2025 08:06:58 +0000 Subject: [PATCH] more optimal string skipping in read_tag_in_compound --- simdnbt/src/borrow/compound.rs | 28 ++++++++++++------------- simdnbt/src/common.rs | 37 +++++++++++++++++++++++++--------- simdnbt/src/owned/compound.rs | 8 ++++---- simdnbt/src/owned/list.rs | 8 ++++---- simdnbt/src/owned/mod.rs | 28 ++++++++++++------------- simdnbt/src/reader.rs | 35 ++++++++++++++++++-------------- 6 files changed, 84 insertions(+), 60 deletions(-) diff --git a/simdnbt/src/borrow/compound.rs b/simdnbt/src/borrow/compound.rs index f8e0001..8f42ed9 100644 --- a/simdnbt/src/borrow/compound.rs +++ b/simdnbt/src/borrow/compound.rs @@ -2,10 +2,10 @@ use std::{hint::unreachable_unchecked, mem::MaybeUninit}; use crate::{ common::{ - read_int_array, read_long_array, read_string, read_with_u32_length, unchecked_extend, - unchecked_push, unchecked_write_string, write_string, BYTE_ARRAY_ID, BYTE_ID, COMPOUND_ID, - DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID, LONG_ID, - MAX_DEPTH, SHORT_ID, STRING_ID, + extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string, + read_with_u32_length, skip_string, write_string, write_string_unchecked, BYTE_ARRAY_ID, + BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, + LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID, }, error::NonRootError, reader::Reader, @@ -54,8 +54,8 @@ impl<'a: 'tape, 'tape> NbtCompound<'a, 'tape> { // SAFETY: We just reserved enough space for the tag ID, the name length, the name, and // 4 bytes of tag data. unsafe { - unchecked_push(data, tag.id()); - unchecked_write_string(data, name); + push_unchecked(data, tag.id()); + write_string_unchecked(data, name); } write_tag(tag, data); @@ -403,7 +403,7 @@ pub(crate) fn read_tag_in_compound<'a>( let tag_name_ptr = data.cur; debug_assert_eq!(tag_name_ptr as u64 >> 56, 0); - read_string(data)?; + skip_string(data)?; tapes.main.push(TapeElement::new(tag_name_ptr as u64)); read_tag(data, tapes, stack, tag_type) @@ -433,19 +433,19 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec) { let el = tag.element(); match el.kind() { TapeTagKind::Byte => unsafe { - unchecked_push(data, tag.byte().unwrap() as u8); + push_unchecked(data, tag.byte().unwrap() as u8); }, TapeTagKind::Short => unsafe { - unchecked_extend(data, &tag.short().unwrap().to_be_bytes()); + extend_unchecked(data, &tag.short().unwrap().to_be_bytes()); }, TapeTagKind::Int => unsafe { - unchecked_extend(data, &tag.int().unwrap().to_be_bytes()); + extend_unchecked(data, &tag.int().unwrap().to_be_bytes()); }, TapeTagKind::Long => { data.extend_from_slice(&tag.long().unwrap().to_be_bytes()); } TapeTagKind::Float => unsafe { - unchecked_extend(data, &tag.float().unwrap().to_be_bytes()); + extend_unchecked(data, &tag.float().unwrap().to_be_bytes()); }, TapeTagKind::Double => { data.extend_from_slice(&tag.double().unwrap().to_be_bytes()); @@ -453,7 +453,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec) { TapeTagKind::ByteArray => { let byte_array = tag.byte_array().unwrap(); unsafe { - unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes()); + extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes()); } data.extend_from_slice(byte_array); } @@ -471,7 +471,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec) { let int_array = unsafe { list::u32_prefixed_list_to_rawlist_unchecked::(el.ptr()).unwrap() }; unsafe { - unchecked_extend(data, &(int_array.len() as u32).to_be_bytes()); + extend_unchecked(data, &(int_array.len() as u32).to_be_bytes()); } data.extend_from_slice(int_array.as_big_endian()); } @@ -479,7 +479,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec) { let long_array = unsafe { list::u32_prefixed_list_to_rawlist_unchecked::(el.ptr()).unwrap() }; unsafe { - unchecked_extend(data, &(long_array.len() as u32).to_be_bytes()); + extend_unchecked(data, &(long_array.len() as u32).to_be_bytes()); } data.extend_from_slice(long_array.as_big_endian()); } diff --git a/simdnbt/src/common.rs b/simdnbt/src/common.rs index 6801976..c4986f1 100644 --- a/simdnbt/src/common.rs +++ b/simdnbt/src/common.rs @@ -30,7 +30,7 @@ pub fn read_with_u16_length<'a>( width: usize, ) -> Result<&'a [u8], UnexpectedEofError> { let length = data.read_u16()?; - let length_in_bytes = length as usize * width; + let length_in_bytes: usize = length as usize * width; data.read_slice(length_in_bytes) } @@ -49,6 +49,25 @@ pub fn read_string<'a>(data: &mut Reader<'a>) -> Result<&'a Mutf8Str, Unexpected Ok(Mutf8Str::from_slice(data)) } +pub fn skip_string(data: &mut Reader<'_>) -> Result<(), UnexpectedEofError> { + let cur_addr = data.cur_addr(); + let end_addr = data.end_addr(); + + if cur_addr + 2 > end_addr { + return Err(UnexpectedEofError); + } + + let length = unsafe { data.read_type_unchecked::() }.to_be(); + let length_in_bytes: usize = length as usize; + if cur_addr + 2 + length_in_bytes > end_addr { + return Err(UnexpectedEofError); + } + + unsafe { data.skip_unchecked(length_in_bytes) }; + + Ok(()) +} + pub fn read_u8_array<'a>(data: &mut Reader<'a>) -> Result<&'a [u8], UnexpectedEofError> { read_with_u32_length(data, 1) } @@ -79,8 +98,8 @@ pub fn write_with_u32_length(data: &mut Vec, width: usize, value: &[u8]) { let length = value.len() / width; data.reserve(4 + value.len()); unsafe { - unchecked_extend(data, &(length as u32).to_be_bytes()); - unchecked_extend(data, value); + extend_unchecked(data, &(length as u32).to_be_bytes()); + extend_unchecked(data, value); } } @@ -91,7 +110,7 @@ pub fn write_string(data: &mut Vec, value: &Mutf8Str) { data.reserve(2 + value.len()); // SAFETY: We reserved enough capacity unsafe { - unchecked_write_string(data, value); + write_string_unchecked(data, value); } } /// Write a string to a Vec without checking if the Vec has enough capacity. @@ -101,9 +120,9 @@ pub fn write_string(data: &mut Vec, value: &Mutf8Str) { /// /// You must reserve enough capacity (2 + value.len()) in the Vec before calling this function. #[inline] -pub unsafe fn unchecked_write_string(data: &mut Vec, value: &Mutf8Str) { - unchecked_extend(data, &(value.len() as u16).to_be_bytes()); - unchecked_extend(data, value.as_bytes()); +pub unsafe fn write_string_unchecked(data: &mut Vec, value: &Mutf8Str) { + extend_unchecked(data, &(value.len() as u16).to_be_bytes()); + extend_unchecked(data, value.as_bytes()); } /// Extend a Vec with a slice of u8 without checking if the Vec has enough capacity. @@ -114,7 +133,7 @@ pub unsafe fn unchecked_write_string(data: &mut Vec, value: &Mutf8Str) { /// /// You must reserve enough capacity in the Vec before calling this function. #[inline] -pub unsafe fn unchecked_extend(data: &mut Vec, value: &[u8]) { +pub unsafe fn extend_unchecked(data: &mut Vec, value: &[u8]) { let ptr = data.as_mut_ptr(); let len = data.len(); std::ptr::copy_nonoverlapping(value.as_ptr(), ptr.add(len), value.len()); @@ -122,7 +141,7 @@ pub unsafe fn unchecked_extend(data: &mut Vec, value: &[u8]) { } #[inline] -pub unsafe fn unchecked_push(data: &mut Vec, value: u8) { +pub unsafe fn push_unchecked(data: &mut Vec, value: u8) { let ptr = data.as_mut_ptr(); let len = data.len(); std::ptr::write(ptr.add(len), value); diff --git a/simdnbt/src/owned/compound.rs b/simdnbt/src/owned/compound.rs index 0d96f4d..a613c4f 100644 --- a/simdnbt/src/owned/compound.rs +++ b/simdnbt/src/owned/compound.rs @@ -1,7 +1,7 @@ use std::mem::{self, MaybeUninit}; use crate::{ - common::{read_string, unchecked_push, unchecked_write_string, END_ID, MAX_DEPTH}, + common::{push_unchecked, read_string, write_string_unchecked, END_ID, MAX_DEPTH}, error::NonRootError, mutf8::Mutf8String, reader::Reader, @@ -84,9 +84,9 @@ impl NbtCompound { // SAFETY: We just reserved enough space for the tag ID, the name length, the name, and // 4 bytes of tag data. unsafe { - unchecked_push(data, tag.id()); - unchecked_write_string(data, name); - tag.unchecked_write_without_tag_type(data); + push_unchecked(data, tag.id()); + write_string_unchecked(data, name); + tag.write_without_tag_type_unchecked(data); } } data.push(END_ID); diff --git a/simdnbt/src/owned/list.rs b/simdnbt/src/owned/list.rs index b9b2427..117f6b3 100644 --- a/simdnbt/src/owned/list.rs +++ b/simdnbt/src/owned/list.rs @@ -1,8 +1,8 @@ use crate::{ common::{ read_i8_array, read_int_array, read_long_array, read_string, read_u8_array, - read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, unchecked_extend, - unchecked_push, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID, + read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, extend_unchecked, + push_unchecked, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID, LONG_ID, SHORT_ID, STRING_ID, }, @@ -117,8 +117,8 @@ impl NbtList { data.reserve(5); // SAFETY: we just reserved 5 bytes unsafe { - unchecked_push(data, COMPOUND_ID); - unchecked_extend(data, &(compounds.len() as u32).to_be_bytes()); + push_unchecked(data, COMPOUND_ID); + extend_unchecked(data, &(compounds.len() as u32).to_be_bytes()); } for compound in compounds { compound.write(data); diff --git a/simdnbt/src/owned/mod.rs b/simdnbt/src/owned/mod.rs index 6365ce9..83e1e2d 100644 --- a/simdnbt/src/owned/mod.rs +++ b/simdnbt/src/owned/mod.rs @@ -8,10 +8,10 @@ use std::{io::Cursor, ops::Deref}; use crate::{ common::{ - read_int_array, read_long_array, read_string, read_with_u32_length, - slice_into_u8_big_endian, unchecked_extend, unchecked_push, write_string, BYTE_ARRAY_ID, - BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, - LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID, + extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string, + read_with_u32_length, slice_into_u8_big_endian, write_string, BYTE_ARRAY_ID, BYTE_ID, + COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID, + LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID, }, error::NonRootError, mutf8::Mutf8String, @@ -325,29 +325,29 @@ impl NbtTag { /// This function is unsafe because it doesn't check that there's enough space in the data. /// 4 bytes MUST be reserved before calling this function. #[inline] - unsafe fn unchecked_write_without_tag_type(&self, data: &mut Vec) { + unsafe fn write_without_tag_type_unchecked(&self, data: &mut Vec) { match self { NbtTag::Byte(byte) => unsafe { - unchecked_push(data, *byte as u8); + push_unchecked(data, *byte as u8); }, NbtTag::Short(short) => unsafe { - unchecked_extend(data, &short.to_be_bytes()); + extend_unchecked(data, &short.to_be_bytes()); }, NbtTag::Int(int) => unsafe { - unchecked_extend(data, &int.to_be_bytes()); + extend_unchecked(data, &int.to_be_bytes()); }, NbtTag::Long(long) => { data.extend_from_slice(&long.to_be_bytes()); } NbtTag::Float(float) => unsafe { - unchecked_extend(data, &float.to_be_bytes()); + extend_unchecked(data, &float.to_be_bytes()); }, NbtTag::Double(double) => { data.extend_from_slice(&double.to_be_bytes()); } NbtTag::ByteArray(byte_array) => { unsafe { - unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes()); + extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes()); } data.extend_from_slice(byte_array); } @@ -362,13 +362,13 @@ impl NbtTag { } NbtTag::IntArray(int_array) => { unsafe { - unchecked_extend(data, &(int_array.len() as u32).to_be_bytes()); + extend_unchecked(data, &(int_array.len() as u32).to_be_bytes()); } data.extend_from_slice(&slice_into_u8_big_endian(int_array)); } NbtTag::LongArray(long_array) => { unsafe { - unchecked_extend(data, &(long_array.len() as u32).to_be_bytes()); + extend_unchecked(data, &(long_array.len() as u32).to_be_bytes()); } data.extend_from_slice(&slice_into_u8_big_endian(long_array)); } @@ -379,8 +379,8 @@ impl NbtTag { data.reserve(1 + 4); // SAFETY: We just reserved enough space for the tag ID and 4 bytes of tag data. unsafe { - unchecked_push(data, self.id()); - self.unchecked_write_without_tag_type(data); + push_unchecked(data, self.id()); + self.write_without_tag_type_unchecked(data); } } diff --git a/simdnbt/src/reader.rs b/simdnbt/src/reader.rs index e4792ed..fc16f49 100644 --- a/simdnbt/src/reader.rs +++ b/simdnbt/src/reader.rs @@ -24,6 +24,15 @@ impl<'a> Reader<'a> { } } + #[inline] + pub fn cur_addr(&self) -> usize { + self.cur as usize + } + #[inline] + pub fn end_addr(&self) -> usize { + self.end as usize + } + pub fn ensure_can_read(&self, size: usize) -> Result<(), UnexpectedEofError> { let data_addr = self.cur as usize; let end_addr = self.end as usize; @@ -35,7 +44,7 @@ impl<'a> Reader<'a> { } #[inline] - pub unsafe fn unchecked_read_type(&mut self) -> T { + pub unsafe fn read_type_unchecked(&mut self) -> T { let value = unsafe { self.cur.cast::().read_unaligned() }; self.cur = unsafe { self.cur.add(mem::size_of::()) }; value @@ -43,7 +52,7 @@ impl<'a> Reader<'a> { pub fn read_type(&mut self) -> Result { self.ensure_can_read(mem::size_of::())?; - Ok(unsafe { self.unchecked_read_type() }) + Ok(unsafe { self.read_type_unchecked() }) } #[inline] @@ -57,10 +66,7 @@ impl<'a> Reader<'a> { #[inline] pub fn read_u16(&mut self) -> Result { - let value = self.read_type::(); - #[cfg(target_endian = "little")] - let value = value.map(u16::swap_bytes); - value + self.read_type::().map(u16::to_be) } #[inline] pub fn read_i16(&mut self) -> Result { @@ -69,10 +75,7 @@ impl<'a> Reader<'a> { #[inline] pub fn read_u32(&mut self) -> Result { - let value = self.read_type::(); - #[cfg(target_endian = "little")] - let value = value.map(u32::swap_bytes); - value + self.read_type::().map(u32::to_be) } #[inline] pub fn read_i32(&mut self) -> Result { @@ -81,10 +84,7 @@ impl<'a> Reader<'a> { #[inline] pub fn read_u64(&mut self) -> Result { - let value = self.read_type::(); - #[cfg(target_endian = "little")] - let value = value.map(u64::swap_bytes); - value + self.read_type::().map(u64::to_be) } #[inline] pub fn read_i64(&mut self) -> Result { @@ -108,11 +108,16 @@ impl<'a> Reader<'a> { Ok(()) } + #[inline] + pub unsafe fn skip_unchecked(&mut self, size: usize) { + self.cur = unsafe { self.cur.add(size) }; + } + #[inline] pub fn read_slice(&mut self, size: usize) -> Result<&'a [u8], UnexpectedEofError> { self.ensure_can_read(size)?; let slice = unsafe { std::slice::from_raw_parts(self.cur, size) }; - self.cur = unsafe { self.cur.add(size) }; + unsafe { self.skip_unchecked(size) }; Ok(slice) } }