more optimal string skipping in read_tag_in_compound

2025-08-02 07:26:04 +00:00 · 2025-01-18 08:06:58 +00:00 · 2025-01-18 08:06:58 +00:00 · d4f50f2420
commit d4f50f2420
parent bdf55aab6e
6 changed files with 84 additions and 60 deletions
--- a/simdnbt/src/borrow/compound.rs
+++ b/simdnbt/src/borrow/compound.rs
@ -2,10 +2,10 @@ use std::{hint::unreachable_unchecked, mem::MaybeUninit};

 use crate::{
    common::{
-        read_int_array, read_long_array, read_string, read_with_u32_length, unchecked_extend,
-        unchecked_push, unchecked_write_string, write_string, BYTE_ARRAY_ID, BYTE_ID, COMPOUND_ID,
-        DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID, LONG_ID,
-        MAX_DEPTH, SHORT_ID, STRING_ID,
+        extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string,
+        read_with_u32_length, skip_string, write_string, write_string_unchecked, BYTE_ARRAY_ID,
+        BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID,
+        LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
    },
    error::NonRootError,
    reader::Reader,
@ -54,8 +54,8 @@ impl<'a: 'tape, 'tape> NbtCompound<'a, 'tape> {
            // SAFETY: We just reserved enough space for the tag ID, the name length, the name, and
            // 4 bytes of tag data.
            unsafe {
-                unchecked_push(data, tag.id());
-                unchecked_write_string(data, name);
+                push_unchecked(data, tag.id());
+                write_string_unchecked(data, name);
            }

            write_tag(tag, data);
@ -403,7 +403,7 @@ pub(crate) fn read_tag_in_compound<'a>(

    let tag_name_ptr = data.cur;
    debug_assert_eq!(tag_name_ptr as u64 >> 56, 0);
-    read_string(data)?;
+    skip_string(data)?;
    tapes.main.push(TapeElement::new(tag_name_ptr as u64));

    read_tag(data, tapes, stack, tag_type)
@ -433,19 +433,19 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
    let el = tag.element();
    match el.kind() {
        TapeTagKind::Byte => unsafe {
-            unchecked_push(data, tag.byte().unwrap() as u8);
+            push_unchecked(data, tag.byte().unwrap() as u8);
        },
        TapeTagKind::Short => unsafe {
-            unchecked_extend(data, &tag.short().unwrap().to_be_bytes());
+            extend_unchecked(data, &tag.short().unwrap().to_be_bytes());
        },
        TapeTagKind::Int => unsafe {
-            unchecked_extend(data, &tag.int().unwrap().to_be_bytes());
+            extend_unchecked(data, &tag.int().unwrap().to_be_bytes());
        },
        TapeTagKind::Long => {
            data.extend_from_slice(&tag.long().unwrap().to_be_bytes());
        }
        TapeTagKind::Float => unsafe {
-            unchecked_extend(data, &tag.float().unwrap().to_be_bytes());
+            extend_unchecked(data, &tag.float().unwrap().to_be_bytes());
        },
        TapeTagKind::Double => {
            data.extend_from_slice(&tag.double().unwrap().to_be_bytes());
@ -453,7 +453,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
        TapeTagKind::ByteArray => {
            let byte_array = tag.byte_array().unwrap();
            unsafe {
-                unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes());
+                extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes());
            }
            data.extend_from_slice(byte_array);
        }
@ -471,7 +471,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
            let int_array =
                unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i32>(el.ptr()).unwrap() };
            unsafe {
-                unchecked_extend(data, &(int_array.len() as u32).to_be_bytes());
+                extend_unchecked(data, &(int_array.len() as u32).to_be_bytes());
            }
            data.extend_from_slice(int_array.as_big_endian());
        }
@ -479,7 +479,7 @@ pub(crate) fn write_tag(tag: NbtTag, data: &mut Vec<u8>) {
            let long_array =
                unsafe { list::u32_prefixed_list_to_rawlist_unchecked::<i64>(el.ptr()).unwrap() };
            unsafe {
-                unchecked_extend(data, &(long_array.len() as u32).to_be_bytes());
+                extend_unchecked(data, &(long_array.len() as u32).to_be_bytes());
            }
            data.extend_from_slice(long_array.as_big_endian());
        }
--- a/simdnbt/src/common.rs
+++ b/simdnbt/src/common.rs
@ -30,7 +30,7 @@ pub fn read_with_u16_length<'a>(
    width: usize,
 ) -> Result<&'a [u8], UnexpectedEofError> {
    let length = data.read_u16()?;
-    let length_in_bytes = length as usize * width;
+    let length_in_bytes: usize = length as usize * width;
    data.read_slice(length_in_bytes)
 }

@ -49,6 +49,25 @@ pub fn read_string<'a>(data: &mut Reader<'a>) -> Result<&'a Mutf8Str, Unexpected
    Ok(Mutf8Str::from_slice(data))
 }

+pub fn skip_string(data: &mut Reader<'_>) -> Result<(), UnexpectedEofError> {
+    let cur_addr = data.cur_addr();
+    let end_addr = data.end_addr();
+
+    if cur_addr + 2 > end_addr {
+        return Err(UnexpectedEofError);
+    }
+
+    let length = unsafe { data.read_type_unchecked::<u16>() }.to_be();
+    let length_in_bytes: usize = length as usize;
+    if cur_addr + 2 + length_in_bytes > end_addr {
+        return Err(UnexpectedEofError);
+    }
+
+    unsafe { data.skip_unchecked(length_in_bytes) };
+
+    Ok(())
+}
+
 pub fn read_u8_array<'a>(data: &mut Reader<'a>) -> Result<&'a [u8], UnexpectedEofError> {
    read_with_u32_length(data, 1)
 }
@ -79,8 +98,8 @@ pub fn write_with_u32_length(data: &mut Vec<u8>, width: usize, value: &[u8]) {
    let length = value.len() / width;
    data.reserve(4 + value.len());
    unsafe {
-        unchecked_extend(data, &(length as u32).to_be_bytes());
-        unchecked_extend(data, value);
+        extend_unchecked(data, &(length as u32).to_be_bytes());
+        extend_unchecked(data, value);
    }
 }

@ -91,7 +110,7 @@ pub fn write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
    data.reserve(2 + value.len());
    // SAFETY: We reserved enough capacity
    unsafe {
-        unchecked_write_string(data, value);
+        write_string_unchecked(data, value);
    }
 }
 /// Write a string to a Vec<u8> without checking if the Vec has enough capacity.
@ -101,9 +120,9 @@ pub fn write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
 ///
 /// You must reserve enough capacity (2 + value.len()) in the Vec before calling this function.
 #[inline]
-pub unsafe fn unchecked_write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
-    unchecked_extend(data, &(value.len() as u16).to_be_bytes());
-    unchecked_extend(data, value.as_bytes());
+pub unsafe fn write_string_unchecked(data: &mut Vec<u8>, value: &Mutf8Str) {
+    extend_unchecked(data, &(value.len() as u16).to_be_bytes());
+    extend_unchecked(data, value.as_bytes());
 }

 /// Extend a Vec<u8> with a slice of u8 without checking if the Vec has enough capacity.
@ -114,7 +133,7 @@ pub unsafe fn unchecked_write_string(data: &mut Vec<u8>, value: &Mutf8Str) {
 ///
 /// You must reserve enough capacity in the Vec before calling this function.
 #[inline]
-pub unsafe fn unchecked_extend(data: &mut Vec<u8>, value: &[u8]) {
+pub unsafe fn extend_unchecked(data: &mut Vec<u8>, value: &[u8]) {
    let ptr = data.as_mut_ptr();
    let len = data.len();
    std::ptr::copy_nonoverlapping(value.as_ptr(), ptr.add(len), value.len());
@ -122,7 +141,7 @@ pub unsafe fn unchecked_extend(data: &mut Vec<u8>, value: &[u8]) {
 }

 #[inline]
-pub unsafe fn unchecked_push(data: &mut Vec<u8>, value: u8) {
+pub unsafe fn push_unchecked(data: &mut Vec<u8>, value: u8) {
    let ptr = data.as_mut_ptr();
    let len = data.len();
    std::ptr::write(ptr.add(len), value);
--- a/simdnbt/src/owned/compound.rs
+++ b/simdnbt/src/owned/compound.rs
@ -1,7 +1,7 @@
 use std::mem::{self, MaybeUninit};

 use crate::{
-    common::{read_string, unchecked_push, unchecked_write_string, END_ID, MAX_DEPTH},
+    common::{push_unchecked, read_string, write_string_unchecked, END_ID, MAX_DEPTH},
    error::NonRootError,
    mutf8::Mutf8String,
    reader::Reader,
@ -84,9 +84,9 @@ impl NbtCompound {
            // SAFETY: We just reserved enough space for the tag ID, the name length, the name, and
            // 4 bytes of tag data.
            unsafe {
-                unchecked_push(data, tag.id());
-                unchecked_write_string(data, name);
-                tag.unchecked_write_without_tag_type(data);
+                push_unchecked(data, tag.id());
+                write_string_unchecked(data, name);
+                tag.write_without_tag_type_unchecked(data);
            }
        }
        data.push(END_ID);
--- a/simdnbt/src/owned/list.rs
+++ b/simdnbt/src/owned/list.rs
@ -1,8 +1,8 @@
 use crate::{
    common::{
        read_i8_array, read_int_array, read_long_array, read_string, read_u8_array,
-        read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, unchecked_extend,
-        unchecked_push, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID,
+        read_with_u32_length, slice_i8_into_u8, slice_into_u8_big_endian, extend_unchecked,
+        push_unchecked, write_string, write_u32, write_with_u32_length, BYTE_ARRAY_ID, BYTE_ID,
        COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID,
        LONG_ID, SHORT_ID, STRING_ID,
    },
@ -117,8 +117,8 @@ impl NbtList {
            data.reserve(5);
            // SAFETY: we just reserved 5 bytes
            unsafe {
-                unchecked_push(data, COMPOUND_ID);
-                unchecked_extend(data, &(compounds.len() as u32).to_be_bytes());
+                push_unchecked(data, COMPOUND_ID);
+                extend_unchecked(data, &(compounds.len() as u32).to_be_bytes());
            }
            for compound in compounds {
                compound.write(data);
--- a/simdnbt/src/owned/mod.rs
+++ b/simdnbt/src/owned/mod.rs
@ -8,10 +8,10 @@ use std::{io::Cursor, ops::Deref};

 use crate::{
    common::{
-        read_int_array, read_long_array, read_string, read_with_u32_length,
-        slice_into_u8_big_endian, unchecked_extend, unchecked_push, write_string, BYTE_ARRAY_ID,
-        BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID,
-        LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
+        extend_unchecked, push_unchecked, read_int_array, read_long_array, read_string,
+        read_with_u32_length, slice_into_u8_big_endian, write_string, BYTE_ARRAY_ID, BYTE_ID,
+        COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID, LONG_ARRAY_ID,
+        LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
    },
    error::NonRootError,
    mutf8::Mutf8String,
@ -325,29 +325,29 @@ impl NbtTag {
    /// This function is unsafe because it doesn't check that there's enough space in the data.
    /// 4 bytes MUST be reserved before calling this function.
    #[inline]
-    unsafe fn unchecked_write_without_tag_type(&self, data: &mut Vec<u8>) {
+    unsafe fn write_without_tag_type_unchecked(&self, data: &mut Vec<u8>) {
        match self {
            NbtTag::Byte(byte) => unsafe {
-                unchecked_push(data, *byte as u8);
+                push_unchecked(data, *byte as u8);
            },
            NbtTag::Short(short) => unsafe {
-                unchecked_extend(data, &short.to_be_bytes());
+                extend_unchecked(data, &short.to_be_bytes());
            },
            NbtTag::Int(int) => unsafe {
-                unchecked_extend(data, &int.to_be_bytes());
+                extend_unchecked(data, &int.to_be_bytes());
            },
            NbtTag::Long(long) => {
                data.extend_from_slice(&long.to_be_bytes());
            }
            NbtTag::Float(float) => unsafe {
-                unchecked_extend(data, &float.to_be_bytes());
+                extend_unchecked(data, &float.to_be_bytes());
            },
            NbtTag::Double(double) => {
                data.extend_from_slice(&double.to_be_bytes());
            }
            NbtTag::ByteArray(byte_array) => {
                unsafe {
-                    unchecked_extend(data, &(byte_array.len() as u32).to_be_bytes());
+                    extend_unchecked(data, &(byte_array.len() as u32).to_be_bytes());
                }
                data.extend_from_slice(byte_array);
            }
@ -362,13 +362,13 @@ impl NbtTag {
            }
            NbtTag::IntArray(int_array) => {
                unsafe {
-                    unchecked_extend(data, &(int_array.len() as u32).to_be_bytes());
+                    extend_unchecked(data, &(int_array.len() as u32).to_be_bytes());
                }
                data.extend_from_slice(&slice_into_u8_big_endian(int_array));
            }
            NbtTag::LongArray(long_array) => {
                unsafe {
-                    unchecked_extend(data, &(long_array.len() as u32).to_be_bytes());
+                    extend_unchecked(data, &(long_array.len() as u32).to_be_bytes());
                }
                data.extend_from_slice(&slice_into_u8_big_endian(long_array));
            }
@ -379,8 +379,8 @@ impl NbtTag {
        data.reserve(1 + 4);
        // SAFETY: We just reserved enough space for the tag ID and 4 bytes of tag data.
        unsafe {
-            unchecked_push(data, self.id());
-            self.unchecked_write_without_tag_type(data);
+            push_unchecked(data, self.id());
+            self.write_without_tag_type_unchecked(data);
        }
    }

--- a/simdnbt/src/reader.rs
+++ b/simdnbt/src/reader.rs
@ -24,6 +24,15 @@ impl<'a> Reader<'a> {
        }
    }

+    #[inline]
+    pub fn cur_addr(&self) -> usize {
+        self.cur as usize
+    }
+    #[inline]
+    pub fn end_addr(&self) -> usize {
+        self.end as usize
+    }
+
    pub fn ensure_can_read(&self, size: usize) -> Result<(), UnexpectedEofError> {
        let data_addr = self.cur as usize;
        let end_addr = self.end as usize;
@ -35,7 +44,7 @@ impl<'a> Reader<'a> {
    }

    #[inline]
-    pub unsafe fn unchecked_read_type<T>(&mut self) -> T {
+    pub unsafe fn read_type_unchecked<T>(&mut self) -> T {
        let value = unsafe { self.cur.cast::<T>().read_unaligned() };
        self.cur = unsafe { self.cur.add(mem::size_of::<T>()) };
        value
@ -43,7 +52,7 @@ impl<'a> Reader<'a> {

    pub fn read_type<T: Copy>(&mut self) -> Result<T, UnexpectedEofError> {
        self.ensure_can_read(mem::size_of::<T>())?;
-        Ok(unsafe { self.unchecked_read_type() })
+        Ok(unsafe { self.read_type_unchecked() })
    }

    #[inline]
@ -57,10 +66,7 @@ impl<'a> Reader<'a> {

    #[inline]
    pub fn read_u16(&mut self) -> Result<u16, UnexpectedEofError> {
-        let value = self.read_type::<u16>();
-        #[cfg(target_endian = "little")]
-        let value = value.map(u16::swap_bytes);
-        value
+        self.read_type::<u16>().map(u16::to_be)
    }
    #[inline]
    pub fn read_i16(&mut self) -> Result<i16, UnexpectedEofError> {
@ -69,10 +75,7 @@ impl<'a> Reader<'a> {

    #[inline]
    pub fn read_u32(&mut self) -> Result<u32, UnexpectedEofError> {
-        let value = self.read_type::<u32>();
-        #[cfg(target_endian = "little")]
-        let value = value.map(u32::swap_bytes);
-        value
+        self.read_type::<u32>().map(u32::to_be)
    }
    #[inline]
    pub fn read_i32(&mut self) -> Result<i32, UnexpectedEofError> {
@ -81,10 +84,7 @@ impl<'a> Reader<'a> {

    #[inline]
    pub fn read_u64(&mut self) -> Result<u64, UnexpectedEofError> {
-        let value = self.read_type::<u64>();
-        #[cfg(target_endian = "little")]
-        let value = value.map(u64::swap_bytes);
-        value
+        self.read_type::<u64>().map(u64::to_be)
    }
    #[inline]
    pub fn read_i64(&mut self) -> Result<i64, UnexpectedEofError> {
@ -108,11 +108,16 @@ impl<'a> Reader<'a> {
        Ok(())
    }

+    #[inline]
+    pub unsafe fn skip_unchecked(&mut self, size: usize) {
+        self.cur = unsafe { self.cur.add(size) };
+    }
+
    #[inline]
    pub fn read_slice(&mut self, size: usize) -> Result<&'a [u8], UnexpectedEofError> {
        self.ensure_can_read(size)?;
        let slice = unsafe { std::slice::from_raw_parts(self.cur, size) };
-        self.cur = unsafe { self.cur.add(size) };
+        unsafe { self.skip_unchecked(size) };
        Ok(slice)
    }
 }