mirror of
https://github.com/azalea-rs/simdnbt.git
synced 2025-08-02 15:36:03 +00:00
make deserializing int arrays lazy
This commit is contained in:
parent
b2e155f19c
commit
9c73382043
5 changed files with 428 additions and 421 deletions
|
@ -29,18 +29,6 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
|||
let input = black_box(input);
|
||||
let nbt = azalea_nbt::Nbt::read(&mut Cursor::new(input)).unwrap();
|
||||
black_box(nbt);
|
||||
// black_box(
|
||||
// nbt.as_compound()
|
||||
// .unwrap()
|
||||
// .get("")
|
||||
// .unwrap()
|
||||
// .as_compound()
|
||||
// .unwrap()
|
||||
// .get("PersistentId")
|
||||
// .unwrap()
|
||||
// .as_int()
|
||||
// .unwrap(),
|
||||
// );
|
||||
})
|
||||
});
|
||||
|
||||
|
@ -49,7 +37,6 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
|||
let input = black_box(input);
|
||||
let nbt = graphite_binary::nbt::decode::read(&mut &input[..]).unwrap();
|
||||
black_box(nbt);
|
||||
// black_box(nbt.find_root("PersistentId").unwrap().as_int());
|
||||
})
|
||||
});
|
||||
|
||||
|
@ -57,34 +44,34 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
|||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = simdnbt::Nbt::new(&mut Cursor::new(input)).unwrap().unwrap();
|
||||
black_box(nbt);
|
||||
// black_box(nbt.int("PersistentId").unwrap());
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("valence_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = valence_nbt::Compound::from_binary(&mut &input[..]).unwrap();
|
||||
let _ = black_box(nbt.list("").unwrap().ints());
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("fastnbt_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt: fastnbt::Value = fastnbt::from_bytes(input).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
// group.bench_function("valence_parse", |b| {
|
||||
// b.iter(|| {
|
||||
// let input = black_box(input);
|
||||
// let nbt = valence_nbt::Compound::from_binary(&mut &input[..]).unwrap();
|
||||
// black_box(nbt);
|
||||
// })
|
||||
// });
|
||||
|
||||
group.bench_function("hematite_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = nbt::Blob::from_reader(&mut Cursor::new(input)).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
// group.bench_function("fastnbt_parse", |b| {
|
||||
// b.iter(|| {
|
||||
// let input = black_box(input);
|
||||
// let nbt: fastnbt::Value = fastnbt::from_bytes(input).unwrap();
|
||||
// black_box(nbt);
|
||||
// })
|
||||
// });
|
||||
|
||||
// group.bench_function("hematite_parse", |b| {
|
||||
// b.iter(|| {
|
||||
// let input = black_box(input);
|
||||
// let nbt = nbt::Blob::from_reader(&mut Cursor::new(input)).unwrap();
|
||||
// black_box(nbt);
|
||||
// })
|
||||
// });
|
||||
}
|
||||
|
||||
fn bench(c: &mut Criterion) {
|
||||
|
@ -92,9 +79,9 @@ fn bench(c: &mut Criterion) {
|
|||
// bench_read_file("bigtest.nbt", c);
|
||||
// bench_read_file("simple_player.dat", c);
|
||||
bench_read_file("complex_player.dat", c);
|
||||
// bench_read_file("level.dat", c);
|
||||
bench_read_file("level.dat", c);
|
||||
// bench_read_file("stringtest.nbt", c);
|
||||
// bench_read_file("inttest.nbt", c);
|
||||
bench_read_file("inttest1023.nbt", c);
|
||||
}
|
||||
|
||||
criterion_group!(compare, bench);
|
||||
|
|
|
@ -12,12 +12,18 @@ fn bench(c: &mut Criterion) {
|
|||
black_box(input.to_str());
|
||||
})
|
||||
});
|
||||
group.bench_function("to_string", |b| {
|
||||
group.bench_function("to_string long", |b| {
|
||||
let input = black_box(Mutf8Str::from_slice(b"asgwjiebrtiowuubrtiowerthb8qwertyuwerpotihnqwiortuhbweinoqwner9opiquwehuiowrtjbwerioltubnwrioutunqweol;rkinqweuiorbqweruiqvbwefP;WOEJ Q0OEPWRIYGYUIEDRYASZTFHGC Ijkbuiljfn qwilrtb qsokjladfnqiowugrtbquiowerbq we;roiqwerghqwioerhd5rtea456etrsdyutrioutyopuipjklhkjfgghjdffghasdgxvncm,bn,.bnioug78yufvukyhfyutdyf"));
|
||||
b.iter(|| {
|
||||
black_box(input.to_string());
|
||||
})
|
||||
});
|
||||
group.bench_function("to_string short", |b| {
|
||||
let input = black_box(Mutf8Str::from_slice(b"hello world"));
|
||||
b.iter(|| {
|
||||
black_box(input.to_string());
|
||||
})
|
||||
});
|
||||
group.bench_function("to_owned into_string", |b| {
|
||||
let input = black_box(Mutf8Str::from_slice(b"asgwjiebrtiowuubrtiowerthb8qwertyuwerpotihnqwiortuhbweinoqwner9opiquwehuiowrtjbwerioltubnwrioutunqweol;rkinqweuiorbqweruiqvbwefP;WOEJ Q0OEPWRIYGYUIEDRYASZTFHGC Ijkbuiljfn qwilrtb qsokjladfnqiowugrtbquiowerbq we;roiqwerghqwioerhd5rtea456etrsdyutrioutyopuipjklhkjfgghjdffghasdgxvncm,bn,.bnioug78yufvukyhfyutdyf"));
|
||||
b.iter(|| {
|
||||
|
|
386
src/lib.rs
386
src/lib.rs
|
@ -16,12 +16,15 @@
|
|||
#![feature(split_array)]
|
||||
|
||||
mod error;
|
||||
mod list;
|
||||
mod mutf8;
|
||||
|
||||
use std::{io::Cursor, ops::Deref, simd::prelude::*, slice};
|
||||
use std::{io::Cursor, ops::Deref};
|
||||
|
||||
use byteorder::{ReadBytesExt, BE};
|
||||
pub use error::Error;
|
||||
pub use list::ListTag;
|
||||
use list::{read_int_array, read_long_array};
|
||||
pub use mutf8::Mutf8Str;
|
||||
|
||||
/// A complete NBT container. This contains a name and a compound tag.
|
||||
|
@ -298,228 +301,6 @@ impl<'a> CompoundTag<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn read_u8_array<'a>(data: &mut Cursor<&'a [u8]>) -> Result<&'a [u8], Error> {
|
||||
read_with_u32_length(data, 1)
|
||||
}
|
||||
fn read_i8_array<'a>(data: &mut Cursor<&'a [u8]>) -> Result<&'a [i8], Error> {
|
||||
Ok(slice_u8_into_i8(read_u8_array(data)?))
|
||||
}
|
||||
fn read_short_array(data: &mut Cursor<&[u8]>) -> Result<Vec<i16>, Error> {
|
||||
let array_bytes = read_with_u32_length(data, 2)?;
|
||||
let mut array_bytes_cursor = Cursor::new(array_bytes);
|
||||
let length = array_bytes.len() / 2;
|
||||
let mut shorts = Vec::with_capacity(length);
|
||||
for _ in 0..length {
|
||||
shorts.push(
|
||||
array_bytes_cursor
|
||||
.read_i16::<BE>()
|
||||
.map_err(|_| Error::UnexpectedEof)?,
|
||||
);
|
||||
}
|
||||
Ok(shorts)
|
||||
}
|
||||
fn read_int_array(data: &mut Cursor<&[u8]>) -> Result<Vec<i32>, Error> {
|
||||
let array_bytes = read_with_u32_length(data, 4)?;
|
||||
let length = array_bytes.len() / 4;
|
||||
let mut ints = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_32bit(&mut ints, length);
|
||||
}
|
||||
|
||||
let ints = {
|
||||
let ptr = ints.as_ptr() as *const i32;
|
||||
std::mem::forget(ints);
|
||||
// SAFETY: the width provided to read_with_u32_length guarantees that it'll be a multiple of 4
|
||||
unsafe { Vec::from_raw_parts(ptr as *mut i32, length, length) }
|
||||
};
|
||||
|
||||
Ok(ints)
|
||||
}
|
||||
|
||||
fn read_long_array(data: &mut Cursor<&[u8]>) -> Result<Vec<i64>, Error> {
|
||||
let array_bytes = read_with_u32_length(data, 8)?;
|
||||
let length = array_bytes.len() / 8;
|
||||
let mut ints = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_64bit(&mut ints, length);
|
||||
}
|
||||
|
||||
let ints = {
|
||||
let ptr = ints.as_ptr() as *const i64;
|
||||
std::mem::forget(ints);
|
||||
// SAFETY: the width provided to read_with_u32_length guarantees that it'll be a multiple of 8
|
||||
unsafe { Vec::from_raw_parts(ptr as *mut i64, length, length) }
|
||||
};
|
||||
|
||||
Ok(ints)
|
||||
}
|
||||
fn read_float_array(data: &mut Cursor<&[u8]>) -> Result<Vec<f32>, Error> {
|
||||
let array_bytes = read_with_u32_length(data, 4)?;
|
||||
let length = array_bytes.len() / 4;
|
||||
let mut floats = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_32bit(&mut floats, length);
|
||||
}
|
||||
|
||||
let floats = {
|
||||
let ptr = floats.as_ptr() as *const f32;
|
||||
std::mem::forget(floats);
|
||||
// SAFETY: the width provided to read_with_u32_length guarantees that it'll be a multiple of 4
|
||||
unsafe { Vec::from_raw_parts(ptr as *mut f32, length, length) }
|
||||
};
|
||||
|
||||
Ok(floats)
|
||||
}
|
||||
fn read_double_array(data: &mut Cursor<&[u8]>) -> Result<Vec<f64>, Error> {
|
||||
let array_bytes = read_with_u32_length(data, 8)?;
|
||||
let length = array_bytes.len() / 8;
|
||||
let mut doubles = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_64bit(&mut doubles, length);
|
||||
}
|
||||
|
||||
let doubles = {
|
||||
let ptr = doubles.as_ptr() as *const f64;
|
||||
std::mem::forget(doubles);
|
||||
// SAFETY: the width provided to read_with_u32_length guarantees that it'll be a multiple of 8
|
||||
unsafe { Vec::from_raw_parts(ptr as *mut f64, length, length) }
|
||||
};
|
||||
|
||||
Ok(doubles)
|
||||
}
|
||||
|
||||
fn swap_endianness_32bit(bytes: &mut [u8], num: usize) {
|
||||
for i in 0..num / 16 {
|
||||
let simd: u8x64 = Simd::from_slice(bytes[i * 16 * 4..(i + 1) * 16 * 4].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
11, 10, 9, 8,
|
||||
15, 14, 13, 12,
|
||||
19, 18, 17, 16,
|
||||
23, 22, 21, 20,
|
||||
27, 26, 25, 24,
|
||||
31, 30, 29, 28,
|
||||
35, 34, 33, 32,
|
||||
39, 38, 37, 36,
|
||||
43, 42, 41, 40,
|
||||
47, 46, 45, 44,
|
||||
51, 50, 49, 48,
|
||||
55, 54, 53, 52,
|
||||
59, 58, 57, 56,
|
||||
63, 62, 61, 60,
|
||||
]);
|
||||
bytes[i * 16 * 4..(i + 1) * 16 * 4].copy_from_slice(simd.as_array());
|
||||
}
|
||||
|
||||
let mut i = num / 16 * 16;
|
||||
if i + 8 <= num {
|
||||
let simd: u8x32 = Simd::from_slice(bytes[i * 4..i * 4 + 32].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
11, 10, 9, 8,
|
||||
15, 14, 13, 12,
|
||||
19, 18, 17, 16,
|
||||
23, 22, 21, 20,
|
||||
27, 26, 25, 24,
|
||||
31, 30, 29, 28,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 32].copy_from_slice(simd.as_array());
|
||||
i += 8;
|
||||
}
|
||||
if i + 4 <= num {
|
||||
let simd: u8x16 = Simd::from_slice(bytes[i * 4..i * 4 + 16].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
11, 10, 9, 8,
|
||||
15, 14, 13, 12,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 16].copy_from_slice(simd.as_array());
|
||||
i += 4;
|
||||
}
|
||||
if i + 2 <= num {
|
||||
let simd: u8x8 = Simd::from_slice(bytes[i * 4..i * 4 + 8].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 8].copy_from_slice(simd.as_array());
|
||||
i += 2;
|
||||
}
|
||||
if i < num {
|
||||
let simd: u8x4 = Simd::from_slice(bytes[i * 4..i * 4 + 4].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 4].copy_from_slice(simd.as_array());
|
||||
}
|
||||
}
|
||||
|
||||
fn swap_endianness_64bit(bytes: &mut [u8], num: usize) {
|
||||
for i in 0..num / 8 {
|
||||
let simd: u8x64 = Simd::from_slice(bytes[i * 64..i * 64 + 64].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
15, 14, 13, 12, 11, 10, 9, 8,
|
||||
23, 22, 21, 20, 19, 18, 17, 16,
|
||||
31, 30, 29, 28, 27, 26, 25, 24,
|
||||
39, 38, 37, 36, 35, 34, 33, 32,
|
||||
47, 46, 45, 44, 43, 42, 41, 40,
|
||||
55, 54, 53, 52, 51, 50, 49, 48,
|
||||
63, 62, 61, 60, 59, 58, 57, 56,
|
||||
]);
|
||||
bytes[i * 64..i * 64 + 64].copy_from_slice(simd.as_array());
|
||||
}
|
||||
|
||||
let mut i = num / 8 * 8;
|
||||
if i + 4 <= num {
|
||||
let simd: u8x32 = Simd::from_slice(bytes[i * 8..i * 8 + 32].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
15, 14, 13, 12, 11, 10, 9, 8,
|
||||
23, 22, 21, 20, 19, 18, 17, 16,
|
||||
31, 30, 29, 28, 27, 26, 25, 24,
|
||||
]);
|
||||
bytes[i * 8..i * 8 + 32].copy_from_slice(simd.as_array());
|
||||
i += 4;
|
||||
}
|
||||
if i + 2 <= num {
|
||||
let simd: u8x16 = Simd::from_slice(bytes[i * 8..i * 8 + 16].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
15, 14, 13, 12, 11, 10, 9, 8,
|
||||
]);
|
||||
bytes[i * 8..i * 8 + 16].copy_from_slice(simd.as_array());
|
||||
i += 2;
|
||||
}
|
||||
if i < num {
|
||||
let simd: u8x8 = Simd::from_slice(bytes[i * 8..i * 8 + 8].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
]);
|
||||
bytes[i * 8..i * 8 + 8].copy_from_slice(simd.as_array());
|
||||
}
|
||||
}
|
||||
|
||||
fn slice_u8_into_i8(s: &[u8]) -> &[i8] {
|
||||
unsafe { slice::from_raw_parts(s.as_ptr() as *const i8, s.len()) }
|
||||
}
|
||||
|
||||
/// A single NBT tag.
|
||||
#[derive(Debug)]
|
||||
pub enum Tag<'a> {
|
||||
|
@ -611,165 +392,6 @@ impl<'a> Tag<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// A list of NBT tags of a single type.
|
||||
#[derive(Debug, Default)]
|
||||
pub enum ListTag<'a> {
|
||||
#[default]
|
||||
Empty,
|
||||
Byte(&'a [i8]),
|
||||
Short(Vec<i16>),
|
||||
Int(Vec<i32>),
|
||||
Long(Vec<i64>),
|
||||
Float(Vec<f32>),
|
||||
Double(Vec<f64>),
|
||||
ByteArray(&'a [u8]),
|
||||
String(Vec<&'a Mutf8Str>),
|
||||
List(Vec<ListTag<'a>>),
|
||||
Compound(Vec<CompoundTag<'a>>),
|
||||
IntArray(Vec<Vec<i32>>),
|
||||
LongArray(Vec<Vec<i64>>),
|
||||
}
|
||||
impl<'a> ListTag<'a> {
|
||||
pub fn new(data: &mut Cursor<&'a [u8]>, depth: usize) -> Result<Self, Error> {
|
||||
if depth > MAX_DEPTH {
|
||||
return Err(Error::MaxDepthExceeded);
|
||||
}
|
||||
let tag_type = data.read_u8().map_err(|_| Error::UnexpectedEof)?;
|
||||
Ok(match tag_type {
|
||||
END_ID => {
|
||||
data.set_position(data.position() + 4);
|
||||
ListTag::Empty
|
||||
}
|
||||
BYTE_ID => ListTag::Byte(read_i8_array(data)?),
|
||||
SHORT_ID => ListTag::Short(read_short_array(data)?),
|
||||
INT_ID => ListTag::Int(read_int_array(data)?),
|
||||
LONG_ID => ListTag::Long(read_long_array(data)?),
|
||||
FLOAT_ID => ListTag::Float(read_float_array(data)?),
|
||||
DOUBLE_ID => ListTag::Double(read_double_array(data)?),
|
||||
BYTE_ARRAY_ID => ListTag::ByteArray(read_u8_array(data)?),
|
||||
STRING_ID => ListTag::String({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut strings = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
strings.push(read_string(data)?)
|
||||
}
|
||||
strings
|
||||
}),
|
||||
LIST_ID => ListTag::List({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut lists = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
lists.push(ListTag::new(data, depth + 1)?)
|
||||
}
|
||||
lists
|
||||
}),
|
||||
COMPOUND_ID => ListTag::Compound({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut compounds = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
compounds.push(CompoundTag::new(data, depth + 1)?)
|
||||
}
|
||||
compounds
|
||||
}),
|
||||
INT_ARRAY_ID => ListTag::IntArray({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut arrays = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
arrays.push(read_int_array(data)?)
|
||||
}
|
||||
arrays
|
||||
}),
|
||||
LONG_ARRAY_ID => ListTag::LongArray({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut arrays = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
arrays.push(read_long_array(data)?)
|
||||
}
|
||||
arrays
|
||||
}),
|
||||
_ => return Err(Error::UnknownTagId(tag_type)),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn bytes(&self) -> Option<&[i8]> {
|
||||
match self {
|
||||
ListTag::Byte(bytes) => Some(bytes),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn shorts(&self) -> Option<&[i16]> {
|
||||
match self {
|
||||
ListTag::Short(shorts) => Some(shorts),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn ints(&self) -> Option<&[i32]> {
|
||||
match self {
|
||||
ListTag::Int(ints) => Some(ints),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn longs(&self) -> Option<&[i64]> {
|
||||
match self {
|
||||
ListTag::Long(longs) => Some(longs),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn floats(&self) -> Option<&[f32]> {
|
||||
match self {
|
||||
ListTag::Float(floats) => Some(floats),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn doubles(&self) -> Option<&[f64]> {
|
||||
match self {
|
||||
ListTag::Double(doubles) => Some(doubles),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn byte_arrays(&self) -> Option<&[u8]> {
|
||||
match self {
|
||||
ListTag::ByteArray(byte_arrays) => Some(byte_arrays),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn strings(&self) -> Option<&[&Mutf8Str]> {
|
||||
match self {
|
||||
ListTag::String(strings) => Some(strings),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn lists(&self) -> Option<&[ListTag]> {
|
||||
match self {
|
||||
ListTag::List(lists) => Some(lists),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn compounds(&self) -> Option<&[CompoundTag]> {
|
||||
match self {
|
||||
ListTag::Compound(compounds) => Some(compounds),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn int_arrays(&self) -> Option<&[Vec<i32>]> {
|
||||
match self {
|
||||
ListTag::IntArray(int_arrays) => Some(int_arrays),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn long_arrays(&self) -> Option<&[Vec<i64>]> {
|
||||
match self {
|
||||
ListTag::LongArray(long_arrays) => Some(long_arrays),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Read;
|
||||
|
|
381
src/list.rs
Normal file
381
src/list.rs
Normal file
|
@ -0,0 +1,381 @@
|
|||
use std::{io::Cursor, marker::PhantomData, simd::prelude::*, slice};
|
||||
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
use crate::{
|
||||
read_string, read_u32, read_with_u32_length, CompoundTag, Error, Mutf8Str, BYTE_ARRAY_ID,
|
||||
BYTE_ID, COMPOUND_ID, DOUBLE_ID, END_ID, FLOAT_ID, INT_ARRAY_ID, INT_ID, LIST_ID,
|
||||
LONG_ARRAY_ID, LONG_ID, MAX_DEPTH, SHORT_ID, STRING_ID,
|
||||
};
|
||||
|
||||
/// A list of NBT tags of a single type.
|
||||
#[derive(Debug, Default)]
|
||||
pub enum ListTag<'a> {
|
||||
#[default]
|
||||
Empty,
|
||||
Byte(&'a [i8]),
|
||||
Short(RawList<'a, i16>),
|
||||
Int(RawList<'a, i32>),
|
||||
Long(RawList<'a, i64>),
|
||||
Float(RawList<'a, f32>),
|
||||
Double(RawList<'a, f64>),
|
||||
ByteArray(&'a [u8]),
|
||||
String(Vec<&'a Mutf8Str>),
|
||||
List(Vec<ListTag<'a>>),
|
||||
Compound(Vec<CompoundTag<'a>>),
|
||||
IntArray(Vec<Vec<i32>>),
|
||||
LongArray(Vec<Vec<i64>>),
|
||||
}
|
||||
impl<'a> ListTag<'a> {
|
||||
pub fn new(data: &mut Cursor<&'a [u8]>, depth: usize) -> Result<Self, Error> {
|
||||
if depth > MAX_DEPTH {
|
||||
return Err(Error::MaxDepthExceeded);
|
||||
}
|
||||
let tag_type = data.read_u8().map_err(|_| Error::UnexpectedEof)?;
|
||||
Ok(match tag_type {
|
||||
END_ID => {
|
||||
data.set_position(data.position() + 4);
|
||||
ListTag::Empty
|
||||
}
|
||||
BYTE_ID => ListTag::Byte(read_i8_array(data)?),
|
||||
SHORT_ID => ListTag::Short(RawList::new(read_with_u32_length(data, 2)?)),
|
||||
INT_ID => ListTag::Int(RawList::new(read_with_u32_length(data, 4)?)),
|
||||
LONG_ID => ListTag::Long(RawList::new(read_with_u32_length(data, 8)?)),
|
||||
FLOAT_ID => ListTag::Float(RawList::new(read_with_u32_length(data, 4)?)),
|
||||
DOUBLE_ID => ListTag::Double(RawList::new(read_with_u32_length(data, 8)?)),
|
||||
BYTE_ARRAY_ID => ListTag::ByteArray(read_u8_array(data)?),
|
||||
STRING_ID => ListTag::String({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut strings = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
strings.push(read_string(data)?)
|
||||
}
|
||||
strings
|
||||
}),
|
||||
LIST_ID => ListTag::List({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut lists = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
lists.push(ListTag::new(data, depth + 1)?)
|
||||
}
|
||||
lists
|
||||
}),
|
||||
COMPOUND_ID => ListTag::Compound({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut compounds = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
compounds.push(CompoundTag::new(data, depth + 1)?)
|
||||
}
|
||||
compounds
|
||||
}),
|
||||
INT_ARRAY_ID => ListTag::IntArray({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut arrays = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
arrays.push(read_int_array(data)?)
|
||||
}
|
||||
arrays
|
||||
}),
|
||||
LONG_ARRAY_ID => ListTag::LongArray({
|
||||
let length = read_u32(data)?;
|
||||
// arbitrary number to prevent big allocations
|
||||
let mut arrays = Vec::with_capacity(length.min(128) as usize);
|
||||
for _ in 0..length {
|
||||
arrays.push(read_long_array(data)?)
|
||||
}
|
||||
arrays
|
||||
}),
|
||||
_ => return Err(Error::UnknownTagId(tag_type)),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn bytes(&self) -> Option<&[i8]> {
|
||||
match self {
|
||||
ListTag::Byte(bytes) => Some(bytes),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn shorts(&self) -> Option<Vec<i16>> {
|
||||
match self {
|
||||
ListTag::Short(shorts) => Some(shorts.to_vec()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn ints(&self) -> Option<Vec<i32>> {
|
||||
match self {
|
||||
ListTag::Int(ints) => Some(ints.to_vec()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn longs(&self) -> Option<Vec<i64>> {
|
||||
match self {
|
||||
ListTag::Long(longs) => Some(longs.to_vec()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn floats(&self) -> Option<Vec<f32>> {
|
||||
match self {
|
||||
ListTag::Float(floats) => Some(floats.to_vec()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn doubles(&self) -> Option<Vec<f64>> {
|
||||
match self {
|
||||
ListTag::Double(doubles) => Some(doubles.to_vec()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn byte_arrays(&self) -> Option<&[u8]> {
|
||||
match self {
|
||||
ListTag::ByteArray(byte_arrays) => Some(byte_arrays),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn strings(&self) -> Option<&[&Mutf8Str]> {
|
||||
match self {
|
||||
ListTag::String(strings) => Some(strings),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn lists(&self) -> Option<&[ListTag]> {
|
||||
match self {
|
||||
ListTag::List(lists) => Some(lists),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn compounds(&self) -> Option<&[CompoundTag]> {
|
||||
match self {
|
||||
ListTag::Compound(compounds) => Some(compounds),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn int_arrays(&self) -> Option<&[Vec<i32>]> {
|
||||
match self {
|
||||
ListTag::IntArray(int_arrays) => Some(int_arrays),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn long_arrays(&self) -> Option<&[Vec<i64>]> {
|
||||
match self {
|
||||
ListTag::LongArray(long_arrays) => Some(long_arrays),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RawList<'a, T> {
|
||||
data: &'a [u8],
|
||||
_marker: PhantomData<T>,
|
||||
}
|
||||
impl<'a, T> RawList<'a, T> {
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
Self {
|
||||
data,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> RawList<'_, T> {
|
||||
pub fn to_vec(&self) -> Vec<T>
|
||||
where
|
||||
T: Copy,
|
||||
{
|
||||
let data = self.data;
|
||||
let length = data.len() / std::mem::size_of::<T>();
|
||||
|
||||
let mut items = data.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
match std::mem::size_of::<T>() {
|
||||
4 => swap_endianness_32bit(&mut items, length),
|
||||
8 => swap_endianness_64bit(&mut items, length),
|
||||
_ => panic!("unsupported size of type"),
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let ptr = items.as_ptr() as *const T;
|
||||
std::mem::forget(items);
|
||||
// SAFETY: the width provided to read_with_u32_length guarantees that it'll be a multiple of 4
|
||||
unsafe { Vec::from_raw_parts(ptr as *mut T, length, length) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_u8_array<'a>(data: &mut Cursor<&'a [u8]>) -> Result<&'a [u8], Error> {
|
||||
read_with_u32_length(data, 1)
|
||||
}
|
||||
fn read_i8_array<'a>(data: &mut Cursor<&'a [u8]>) -> Result<&'a [i8], Error> {
|
||||
Ok(slice_u8_into_i8(read_u8_array(data)?))
|
||||
}
|
||||
pub fn read_int_array(data: &mut Cursor<&[u8]>) -> Result<Vec<i32>, Error> {
|
||||
let array_bytes = read_with_u32_length(data, 4)?;
|
||||
let length = array_bytes.len() / 4;
|
||||
let mut ints = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_32bit(&mut ints, length);
|
||||
}
|
||||
|
||||
let ints = {
|
||||
let ptr = ints.as_ptr() as *const i32;
|
||||
std::mem::forget(ints);
|
||||
// SAFETY: the width provided to read_with_u32_length guarantees that it'll be a multiple of 4
|
||||
unsafe { Vec::from_raw_parts(ptr as *mut i32, length, length) }
|
||||
};
|
||||
|
||||
Ok(ints)
|
||||
}
|
||||
|
||||
pub fn read_long_array(data: &mut Cursor<&[u8]>) -> Result<Vec<i64>, Error> {
|
||||
let array_bytes = read_with_u32_length(data, 8)?;
|
||||
let length = array_bytes.len() / 8;
|
||||
let mut ints = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_64bit(&mut ints, length);
|
||||
}
|
||||
|
||||
let ints = {
|
||||
let ptr = ints.as_ptr() as *const i64;
|
||||
std::mem::forget(ints);
|
||||
// SAFETY: the width provided to read_with_u32_length guarantees that it'll be a multiple of 8
|
||||
unsafe { Vec::from_raw_parts(ptr as *mut i64, length, length) }
|
||||
};
|
||||
|
||||
Ok(ints)
|
||||
}
|
||||
|
||||
fn swap_endianness_32bit(bytes: &mut [u8], num: usize) {
|
||||
for i in 0..num / 16 {
|
||||
let simd: u8x64 = Simd::from_slice(bytes[i * 16 * 4..(i + 1) * 16 * 4].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
11, 10, 9, 8,
|
||||
15, 14, 13, 12,
|
||||
19, 18, 17, 16,
|
||||
23, 22, 21, 20,
|
||||
27, 26, 25, 24,
|
||||
31, 30, 29, 28,
|
||||
35, 34, 33, 32,
|
||||
39, 38, 37, 36,
|
||||
43, 42, 41, 40,
|
||||
47, 46, 45, 44,
|
||||
51, 50, 49, 48,
|
||||
55, 54, 53, 52,
|
||||
59, 58, 57, 56,
|
||||
63, 62, 61, 60,
|
||||
]);
|
||||
bytes[i * 16 * 4..(i + 1) * 16 * 4].copy_from_slice(simd.as_array());
|
||||
}
|
||||
|
||||
let mut i = num / 16 * 16;
|
||||
if i + 8 <= num {
|
||||
let simd: u8x32 = Simd::from_slice(bytes[i * 4..i * 4 + 32].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
11, 10, 9, 8,
|
||||
15, 14, 13, 12,
|
||||
19, 18, 17, 16,
|
||||
23, 22, 21, 20,
|
||||
27, 26, 25, 24,
|
||||
31, 30, 29, 28,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 32].copy_from_slice(simd.as_array());
|
||||
i += 8;
|
||||
}
|
||||
if i + 4 <= num {
|
||||
let simd: u8x16 = Simd::from_slice(bytes[i * 4..i * 4 + 16].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
11, 10, 9, 8,
|
||||
15, 14, 13, 12,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 16].copy_from_slice(simd.as_array());
|
||||
i += 4;
|
||||
}
|
||||
if i + 2 <= num {
|
||||
let simd: u8x8 = Simd::from_slice(bytes[i * 4..i * 4 + 8].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 8].copy_from_slice(simd.as_array());
|
||||
i += 2;
|
||||
}
|
||||
if i < num {
|
||||
let simd: u8x4 = Simd::from_slice(bytes[i * 4..i * 4 + 4].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
3, 2, 1, 0,
|
||||
]);
|
||||
bytes[i * 4..i * 4 + 4].copy_from_slice(simd.as_array());
|
||||
}
|
||||
}
|
||||
|
||||
fn swap_endianness_64bit(bytes: &mut [u8], num: usize) {
|
||||
for i in 0..num / 8 {
|
||||
let simd: u8x64 = Simd::from_slice(bytes[i * 64..i * 64 + 64].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
15, 14, 13, 12, 11, 10, 9, 8,
|
||||
23, 22, 21, 20, 19, 18, 17, 16,
|
||||
31, 30, 29, 28, 27, 26, 25, 24,
|
||||
39, 38, 37, 36, 35, 34, 33, 32,
|
||||
47, 46, 45, 44, 43, 42, 41, 40,
|
||||
55, 54, 53, 52, 51, 50, 49, 48,
|
||||
63, 62, 61, 60, 59, 58, 57, 56,
|
||||
]);
|
||||
bytes[i * 64..i * 64 + 64].copy_from_slice(simd.as_array());
|
||||
}
|
||||
|
||||
let mut i = num / 8 * 8;
|
||||
if i + 4 <= num {
|
||||
let simd: u8x32 = Simd::from_slice(bytes[i * 8..i * 8 + 32].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
15, 14, 13, 12, 11, 10, 9, 8,
|
||||
23, 22, 21, 20, 19, 18, 17, 16,
|
||||
31, 30, 29, 28, 27, 26, 25, 24,
|
||||
]);
|
||||
bytes[i * 8..i * 8 + 32].copy_from_slice(simd.as_array());
|
||||
i += 4;
|
||||
}
|
||||
if i + 2 <= num {
|
||||
let simd: u8x16 = Simd::from_slice(bytes[i * 8..i * 8 + 16].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
15, 14, 13, 12, 11, 10, 9, 8,
|
||||
]);
|
||||
bytes[i * 8..i * 8 + 16].copy_from_slice(simd.as_array());
|
||||
i += 2;
|
||||
}
|
||||
if i < num {
|
||||
let simd: u8x8 = Simd::from_slice(bytes[i * 8..i * 8 + 8].as_ref());
|
||||
#[rustfmt::skip]
|
||||
let simd = simd_swizzle!(simd, [
|
||||
7, 6, 5, 4, 3, 2, 1, 0,
|
||||
]);
|
||||
bytes[i * 8..i * 8 + 8].copy_from_slice(simd.as_array());
|
||||
}
|
||||
}
|
||||
|
||||
fn slice_u8_into_i8(s: &[u8]) -> &[i8] {
|
||||
unsafe { slice::from_raw_parts(s.as_ptr() as *const i8, s.len()) }
|
||||
}
|
11
src/mutf8.rs
11
src/mutf8.rs
|
@ -45,6 +45,17 @@ fn is_plain_ascii(slice: &[u8]) -> bool {
|
|||
is_plain_ascii = false;
|
||||
}
|
||||
}
|
||||
if remainder.len() > 4 {
|
||||
let chunk;
|
||||
(chunk, remainder) = remainder.split_array_ref::<4>();
|
||||
let mask = u8x4::splat(0b10000000);
|
||||
let zero = u8x4::splat(0);
|
||||
let simd = u8x4::from_array(*chunk);
|
||||
let xor = simd & mask;
|
||||
if xor != zero {
|
||||
is_plain_ascii = false;
|
||||
}
|
||||
}
|
||||
for &byte in remainder {
|
||||
if byte & 0b10000000 != 0 {
|
||||
is_plain_ascii = false;
|
||||
|
|
Loading…
Add table
Reference in a new issue