mirror of
https://github.com/azalea-rs/simdnbt.git
synced 2025-08-02 07:26:04 +00:00
Optimize owned (#5)
* inline read_with_type for 12% speedup * optimize creating compounds * fix incorrect comment in mutf8.rs * update benchmarks in readme
This commit is contained in:
parent
2a9a1ca8d7
commit
7e29630c8b
9 changed files with 117 additions and 105 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -5,3 +5,7 @@
|
|||
flamegraph.svg
|
||||
perf.data
|
||||
perf.data.old
|
||||
|
||||
# sometimes i make this file when i pipe benchmark results to a file,
|
||||
# don't wanna accidentally commit it
|
||||
benchmark_result.txt
|
||||
|
|
|
@ -9,9 +9,9 @@ repository = "https://github.com/azalea-rs/simdnbt"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
proc-macro2 = "1.0.78"
|
||||
quote = "1.0.35"
|
||||
syn = "2.0.48"
|
||||
proc-macro2 = "1.0.82"
|
||||
quote = "1.0.36"
|
||||
syn = "2.0.63"
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
|
|
|
@ -10,10 +10,10 @@ repository = "https://github.com/azalea-rs/simdnbt"
|
|||
|
||||
[dependencies]
|
||||
byteorder = "1.5.0"
|
||||
flate2 = "^1.0.28"
|
||||
flate2 = "^1.0.30"
|
||||
residua-mutf8 = "2.0.0"
|
||||
simdnbt-derive = { version = "0.4.0", path = "../simdnbt-derive", optional = true }
|
||||
thiserror = "1.0.56"
|
||||
thiserror = "1.0.60"
|
||||
valence_nbt = { version = "0.8.0", features = ["binary"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -21,12 +21,12 @@ criterion = { version = "0.5.1", features = ["html_reports"] }
|
|||
|
||||
graphite_binary = "0.1.0"
|
||||
valence_nbt = { version = "0.8.0", features = ["binary"] }
|
||||
fastnbt = "2.4.4"
|
||||
fastnbt = "2.5.0"
|
||||
azalea-nbt = { git = "https://github.com/azalea-rs/azalea", rev = "84e036ce3752ecf57904b0f5aff1f33d43e95a32" }
|
||||
hematite-nbt = { version = "0.5.2", default-features = false }
|
||||
shen-nbt5 = "0.4.4"
|
||||
|
||||
mimalloc = "0.1.39"
|
||||
mimalloc = "0.1.41"
|
||||
|
||||
[features]
|
||||
default = ["derive"]
|
||||
|
|
|
@ -77,23 +77,23 @@ Here's a benchmark comparing Simdnbt against a few of the other fastest NBT crat
|
|||
|
||||
| Library | Throughput |
|
||||
| --------------------------------------------------------------------------- | ------------ |
|
||||
| [simdnbt::borrow](https://docs.rs/simdnbt/latest/simdnbt/borrow/index.html) | 1.7619 GiB/s |
|
||||
| [simdnbt::owned](https://docs.rs/simdnbt/latest/simdnbt/owned/index.html) | 329.10 MiB/s |
|
||||
| [shen_nbt5](https://docs.rs/shen-nbt5/latest/shen_nbt5/) | 306.58 MiB/s |
|
||||
| [azalea_nbt](https://docs.rs/azalea-nbt/latest/azalea_nbt/) | 297.28 MiB/s |
|
||||
| [valence_nbt](https://docs.rs/valence_nbt/latest/valence_nbt/) | 236.42 MiB/s |
|
||||
| [graphite_binary](https://docs.rs/graphite_binary/latest/graphite_binary/) | 210.51 MiB/s |
|
||||
| [fastnbt](https://docs.rs/fastnbt/latest/fastnbt/) | 115.54 MiB/s |
|
||||
| [hematite_nbt](https://docs.rs/hematite-nbt/latest/nbt/) | 108.91 MiB/s |
|
||||
| [simdnbt::borrow](https://docs.rs/simdnbt/latest/simdnbt/borrow/index.html) | 1.6795 GiB/s |
|
||||
| [simdnbt::owned](https://docs.rs/simdnbt/latest/simdnbt/owned/index.html) | 811.08 MiB/s |
|
||||
| [shen_nbt5](https://docs.rs/shen-nbt5/latest/shen_nbt5/) | 606.68 MiB/s |
|
||||
| [graphite_binary](https://docs.rs/graphite_binary/latest/graphite_binary/) | 363.94 MiB/s |
|
||||
| [azalea_nbt](https://docs.rs/azalea-nbt/latest/azalea_nbt/) | 330.46 MiB/s |
|
||||
| [valence_nbt](https://docs.rs/valence_nbt/latest/valence_nbt/) | 279.58 MiB/s |
|
||||
| [fastnbt](https://docs.rs/fastnbt/latest/fastnbt/) | 162.92 MiB/s |
|
||||
| [hematite_nbt](https://docs.rs/hematite-nbt/latest/nbt/) | 180.22 MiB/s |
|
||||
|
||||
And for writing `complex_player.dat`:
|
||||
|
||||
| Library | Throughput |
|
||||
| --------------- | ------------ |
|
||||
| simdnbt::borrow | 2.5914 GiB/s |
|
||||
| azalea_nbt | 2.1096 GiB/s |
|
||||
| simdnbt::owned | 1.9508 GiB/s |
|
||||
| graphite_binary | 1.7745 GiB/s |
|
||||
| simdnbt::borrow | 2.4670 GiB/s |
|
||||
| azalea_nbt | 2.4152 GiB/s |
|
||||
| simdnbt::owned | 1.9660 GiB/s |
|
||||
| graphite_binary | 1.8804 GiB/s |
|
||||
|
||||
The tables above were made from the [compare benchmark](https://github.com/azalea-rs/simdnbt/tree/master/simdnbt/benches) in this repo.
|
||||
Note that the benchmark is somewhat unfair, since `simdnbt::borrow` doesn't fully decode some things like strings and integer arrays until they're used.
|
||||
|
|
|
@ -6,56 +6,85 @@ use std::{
|
|||
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
||||
use flate2::read::GzDecoder;
|
||||
|
||||
pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
||||
fn bench_read_file(filename: &str, c: &mut Criterion) {
|
||||
let mut file = File::open(format!("tests/{filename}")).unwrap();
|
||||
let mut contents = Vec::new();
|
||||
file.read_to_end(&mut contents).unwrap();
|
||||
let mut src = &contents[..];
|
||||
|
||||
// decode the original src so most of the time isn't spent on unzipping
|
||||
let mut decoded_src_decoder = GzDecoder::new(&mut src);
|
||||
let mut src_decoder = GzDecoder::new(&mut src);
|
||||
let mut input = Vec::new();
|
||||
if decoded_src_decoder.read_to_end(&mut input).is_err() {
|
||||
if src_decoder.read_to_end(&mut input).is_err() {
|
||||
// oh probably wasn't gzipped then
|
||||
input = contents;
|
||||
}
|
||||
let input = input.as_slice();
|
||||
|
||||
let mut input_stream = Cursor::new(&input[..]);
|
||||
|
||||
let mut group = c.benchmark_group(format!("compare/{filename}"));
|
||||
group.throughput(Throughput::Bytes(input.len() as u64));
|
||||
|
||||
group.bench_function("simdnbt_borrow_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = simdnbt::borrow::Nbt::read(&mut Cursor::new(input))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
// let _ = black_box(nbt.list("").unwrap().ints());
|
||||
black_box(nbt);
|
||||
black_box(simdnbt::borrow::Nbt::read(&mut input_stream).unwrap());
|
||||
input_stream.set_position(0);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("simdnbt_owned_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = simdnbt::owned::Nbt::read(&mut Cursor::new(input))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
// let _ = black_box(nbt.list("").unwrap().ints());
|
||||
black_box(nbt);
|
||||
black_box(simdnbt::owned::Nbt::read(&mut input_stream).unwrap());
|
||||
input_stream.set_position(0);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("shen_parse", |b| {
|
||||
let mut input = black_box(input.to_vec());
|
||||
let mut input = input.to_vec();
|
||||
b.iter(|| {
|
||||
let nbt = shen_nbt5::NbtValue::from_binary::<shen_nbt5::nbt_version::Java>(&mut input)
|
||||
.unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
group.bench_function("azalea_parse", |b| {
|
||||
b.iter(|| {
|
||||
black_box(azalea_nbt::Nbt::read(&mut input_stream).unwrap());
|
||||
input_stream.set_position(0);
|
||||
})
|
||||
});
|
||||
group.bench_function("graphite_parse", |b| {
|
||||
b.iter(|| {
|
||||
black_box(graphite_binary::nbt::decode::read(&mut &input[..]).unwrap());
|
||||
})
|
||||
});
|
||||
group.bench_function("valence_parse", |b| {
|
||||
b.iter(|| {
|
||||
let nbt = valence_nbt::from_binary::<String>(&mut &input[..]).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
group.bench_function("fastnbt_parse", |b| {
|
||||
b.iter(|| {
|
||||
let nbt: fastnbt::Value = fastnbt::from_bytes(&input).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
group.bench_function("hematite_parse", |b| {
|
||||
b.iter(|| {
|
||||
black_box(nbt::Blob::from_reader(&mut input_stream).unwrap());
|
||||
input_stream.set_position(0);
|
||||
})
|
||||
});
|
||||
|
||||
let nbt = simdnbt::borrow::Nbt::read(&mut Cursor::new(input))
|
||||
let nbt = azalea_nbt::Nbt::read(&mut Cursor::new(&input)).unwrap();
|
||||
group.bench_function("azalea_write", |b| {
|
||||
b.iter(|| {
|
||||
let mut out = Vec::new();
|
||||
nbt.write(&mut out);
|
||||
black_box(out);
|
||||
})
|
||||
});
|
||||
|
||||
let nbt = simdnbt::borrow::Nbt::read(&mut Cursor::new(&input))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
group.bench_function("simdnbt_borrow_write", |b| {
|
||||
|
@ -66,7 +95,7 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
|||
})
|
||||
});
|
||||
|
||||
let nbt = simdnbt::owned::Nbt::read(&mut Cursor::new(input))
|
||||
let nbt = simdnbt::owned::Nbt::read(&mut Cursor::new(&input))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
group.bench_function("simdnbt_owned_write", |b| {
|
||||
|
@ -77,30 +106,6 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
|||
})
|
||||
});
|
||||
|
||||
group.bench_function("azalea_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = azalea_nbt::Nbt::read(&mut Cursor::new(input)).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
|
||||
let nbt = azalea_nbt::Nbt::read(&mut Cursor::new(input)).unwrap();
|
||||
group.bench_function("azalea_write", |b| {
|
||||
b.iter(|| {
|
||||
let mut out = Vec::new();
|
||||
nbt.write(&mut out);
|
||||
black_box(out);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("graphite_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = graphite_binary::nbt::decode::read(&mut &input[..]).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
let nbt = graphite_binary::nbt::decode::read(&mut &input[..]).unwrap();
|
||||
group.bench_function("graphite_write", |b| {
|
||||
b.iter(|| {
|
||||
|
@ -108,32 +113,11 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
|||
black_box(out);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("valence_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = valence_nbt::from_binary::<String>(&mut &input[..]).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("fastnbt_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt: fastnbt::Value = fastnbt::from_bytes(input).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("hematite_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = nbt::Blob::from_reader(&mut Cursor::new(input)).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[global_allocator]
|
||||
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn bench(c: &mut Criterion) {
|
||||
// bench_read_file("hello_world.nbt", c);
|
||||
// bench_read_file("bigtest.nbt", c);
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
||||
use flate2::read::GzDecoder;
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{Cursor, Read},
|
||||
};
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
||||
use flate2::read::GzDecoder;
|
||||
|
||||
fn bench_file(filename: &str, c: &mut Criterion) {
|
||||
let mut file = File::open(format!("tests/{filename}")).unwrap();
|
||||
let mut contents = Vec::new();
|
||||
|
@ -12,27 +13,26 @@ fn bench_file(filename: &str, c: &mut Criterion) {
|
|||
let mut src = &contents[..];
|
||||
|
||||
// decode the original src so most of the time isn't spent on unzipping
|
||||
let mut decoded_src_decoder = GzDecoder::new(&mut src);
|
||||
let mut decoded_src = Vec::new();
|
||||
if decoded_src_decoder.read_to_end(&mut decoded_src).is_err() {
|
||||
let mut src_decoder = GzDecoder::new(&mut src);
|
||||
let mut input = Vec::new();
|
||||
if src_decoder.read_to_end(&mut input).is_err() {
|
||||
// oh probably wasn't gzipped then
|
||||
decoded_src = contents;
|
||||
input = contents;
|
||||
}
|
||||
|
||||
let mut decoded_src_stream = Cursor::new(&decoded_src[..]);
|
||||
let mut input_stream = Cursor::new(&input[..]);
|
||||
|
||||
let mut group = c.benchmark_group(format!("nbt_borrow/{filename}"));
|
||||
|
||||
group.throughput(Throughput::Bytes(decoded_src.len() as u64));
|
||||
group.throughput(Throughput::Bytes(input.len() as u64));
|
||||
|
||||
group.bench_function("Decode", |b| {
|
||||
b.iter(|| {
|
||||
black_box(simdnbt::borrow::Nbt::read(&mut decoded_src_stream).unwrap());
|
||||
decoded_src_stream.set_position(0);
|
||||
black_box(simdnbt::borrow::Nbt::read(&mut input_stream).unwrap());
|
||||
input_stream.set_position(0);
|
||||
})
|
||||
});
|
||||
|
||||
let nbt = simdnbt::borrow::Nbt::read(&mut decoded_src_stream)
|
||||
let nbt = simdnbt::borrow::Nbt::read(&mut input_stream)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
group.bench_function("Get", |b| {
|
||||
|
|
|
@ -92,14 +92,14 @@ impl Mutf8Str {
|
|||
#[inline]
|
||||
pub fn from_str(s: &str) -> Cow<Mutf8Str> {
|
||||
match mutf8::encode(s) {
|
||||
Cow::Borrowed(b) => Cow::Borrowed(Mutf8Str::from_slice(b)),
|
||||
Cow::Owned(o) => Cow::Owned(Mutf8String { vec: o }),
|
||||
Cow::Borrowed(slice) => Cow::Borrowed(Mutf8Str::from_slice(slice)),
|
||||
Cow::Owned(vec) => Cow::Owned(Mutf8String { vec }),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_str(&self) -> Cow<str> {
|
||||
// fast check to skip if none of the bytes have the top bit set or are null
|
||||
// fast check to skip if none of the bytes have the top bit set
|
||||
if is_plain_ascii(&self.slice) {
|
||||
// SAFETY: &[u8] and &str are the same layout.
|
||||
unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(&self.slice)) }
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
use std::{io::Cursor, mem};
|
||||
use std::{
|
||||
io::Cursor,
|
||||
mem::{self, MaybeUninit},
|
||||
};
|
||||
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
|
@ -33,6 +36,12 @@ impl NbtCompound {
|
|||
if depth > MAX_DEPTH {
|
||||
return Err(Error::MaxDepthExceeded);
|
||||
}
|
||||
|
||||
let mut tags_buffer = unsafe {
|
||||
MaybeUninit::<[MaybeUninit<(Mutf8String, NbtTag)>; 8]>::uninit().assume_init()
|
||||
};
|
||||
let mut tags_buffer_len: usize = 0;
|
||||
|
||||
let mut values = Vec::with_capacity(8);
|
||||
loop {
|
||||
let tag_type = data.read_u8().map_err(|_| Error::UnexpectedEof)?;
|
||||
|
@ -40,9 +49,23 @@ impl NbtCompound {
|
|||
break;
|
||||
}
|
||||
let tag_name = read_string(data)?.to_owned();
|
||||
let tag = NbtTag::read_with_type(data, tag_type, depth)?;
|
||||
|
||||
values.push((tag_name, NbtTag::read_with_type(data, tag_type, depth)?));
|
||||
tags_buffer[tags_buffer_len] = MaybeUninit::new((tag_name, tag));
|
||||
tags_buffer_len += 1;
|
||||
if tags_buffer_len == tags_buffer.len() {
|
||||
// writing the tags in groups like this is slightly faster
|
||||
for i in 0..tags_buffer_len {
|
||||
values.push(unsafe { tags_buffer.get_unchecked(i).assume_init_read() });
|
||||
}
|
||||
tags_buffer_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..tags_buffer_len {
|
||||
values.push(unsafe { tags_buffer.get_unchecked(i).assume_init_read() });
|
||||
}
|
||||
|
||||
Ok(Self { values })
|
||||
}
|
||||
|
||||
|
|
|
@ -226,6 +226,7 @@ impl NbtTag {
|
|||
unsafe { *<*const _>::from(self).cast::<u8>() }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn read_with_type(data: &mut Cursor<&[u8]>, tag_type: u8, depth: usize) -> Result<Self, Error> {
|
||||
match tag_type {
|
||||
BYTE_ID => Ok(NbtTag::Byte(
|
||||
|
|
Loading…
Add table
Reference in a new issue