1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 07:26:04 +00:00

Optimize owned (#5)

* inline read_with_type for 12% speedup

* optimize creating compounds

* fix incorrect comment in mutf8.rs

* update benchmarks in readme
This commit is contained in:
mat 2024-05-13 06:21:14 -05:00 committed by GitHub
parent 2a9a1ca8d7
commit 7e29630c8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 117 additions and 105 deletions

4
.gitignore vendored
View file

@ -5,3 +5,7 @@
flamegraph.svg
perf.data
perf.data.old
# sometimes i make this file when i pipe benchmark results to a file,
# don't wanna accidentally commit it
benchmark_result.txt

View file

@ -9,9 +9,9 @@ repository = "https://github.com/azalea-rs/simdnbt"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
proc-macro2 = "1.0.78"
quote = "1.0.35"
syn = "2.0.48"
proc-macro2 = "1.0.82"
quote = "1.0.36"
syn = "2.0.63"
[lib]
proc-macro = true

View file

@ -10,10 +10,10 @@ repository = "https://github.com/azalea-rs/simdnbt"
[dependencies]
byteorder = "1.5.0"
flate2 = "^1.0.28"
flate2 = "^1.0.30"
residua-mutf8 = "2.0.0"
simdnbt-derive = { version = "0.4.0", path = "../simdnbt-derive", optional = true }
thiserror = "1.0.56"
thiserror = "1.0.60"
valence_nbt = { version = "0.8.0", features = ["binary"] }
[dev-dependencies]
@ -21,12 +21,12 @@ criterion = { version = "0.5.1", features = ["html_reports"] }
graphite_binary = "0.1.0"
valence_nbt = { version = "0.8.0", features = ["binary"] }
fastnbt = "2.4.4"
fastnbt = "2.5.0"
azalea-nbt = { git = "https://github.com/azalea-rs/azalea", rev = "84e036ce3752ecf57904b0f5aff1f33d43e95a32" }
hematite-nbt = { version = "0.5.2", default-features = false }
shen-nbt5 = "0.4.4"
mimalloc = "0.1.39"
mimalloc = "0.1.41"
[features]
default = ["derive"]

View file

@ -77,23 +77,23 @@ Here's a benchmark comparing Simdnbt against a few of the other fastest NBT crat
| Library | Throughput |
| --------------------------------------------------------------------------- | ------------ |
| [simdnbt::borrow](https://docs.rs/simdnbt/latest/simdnbt/borrow/index.html) | 1.7619 GiB/s |
| [simdnbt::owned](https://docs.rs/simdnbt/latest/simdnbt/owned/index.html) | 329.10 MiB/s |
| [shen_nbt5](https://docs.rs/shen-nbt5/latest/shen_nbt5/) | 306.58 MiB/s |
| [azalea_nbt](https://docs.rs/azalea-nbt/latest/azalea_nbt/) | 297.28 MiB/s |
| [valence_nbt](https://docs.rs/valence_nbt/latest/valence_nbt/) | 236.42 MiB/s |
| [graphite_binary](https://docs.rs/graphite_binary/latest/graphite_binary/) | 210.51 MiB/s |
| [fastnbt](https://docs.rs/fastnbt/latest/fastnbt/) | 115.54 MiB/s |
| [hematite_nbt](https://docs.rs/hematite-nbt/latest/nbt/) | 108.91 MiB/s |
| [simdnbt::borrow](https://docs.rs/simdnbt/latest/simdnbt/borrow/index.html) | 1.6795 GiB/s |
| [simdnbt::owned](https://docs.rs/simdnbt/latest/simdnbt/owned/index.html) | 811.08 MiB/s |
| [shen_nbt5](https://docs.rs/shen-nbt5/latest/shen_nbt5/) | 606.68 MiB/s |
| [graphite_binary](https://docs.rs/graphite_binary/latest/graphite_binary/) | 363.94 MiB/s |
| [azalea_nbt](https://docs.rs/azalea-nbt/latest/azalea_nbt/) | 330.46 MiB/s |
| [valence_nbt](https://docs.rs/valence_nbt/latest/valence_nbt/) | 279.58 MiB/s |
| [fastnbt](https://docs.rs/fastnbt/latest/fastnbt/) | 162.92 MiB/s |
| [hematite_nbt](https://docs.rs/hematite-nbt/latest/nbt/) | 180.22 MiB/s |
And for writing `complex_player.dat`:
| Library | Throughput |
| --------------- | ------------ |
| simdnbt::borrow | 2.5914 GiB/s |
| azalea_nbt | 2.1096 GiB/s |
| simdnbt::owned | 1.9508 GiB/s |
| graphite_binary | 1.7745 GiB/s |
| simdnbt::borrow | 2.4670 GiB/s |
| azalea_nbt | 2.4152 GiB/s |
| simdnbt::owned | 1.9660 GiB/s |
| graphite_binary | 1.8804 GiB/s |
The tables above were made from the [compare benchmark](https://github.com/azalea-rs/simdnbt/tree/master/simdnbt/benches) in this repo.
Note that the benchmark is somewhat unfair, since `simdnbt::borrow` doesn't fully decode some things like strings and integer arrays until they're used.

View file

@ -6,56 +6,85 @@ use std::{
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use flate2::read::GzDecoder;
pub fn bench_read_file(filename: &str, c: &mut Criterion) {
fn bench_read_file(filename: &str, c: &mut Criterion) {
let mut file = File::open(format!("tests/{filename}")).unwrap();
let mut contents = Vec::new();
file.read_to_end(&mut contents).unwrap();
let mut src = &contents[..];
// decode the original src so most of the time isn't spent on unzipping
let mut decoded_src_decoder = GzDecoder::new(&mut src);
let mut src_decoder = GzDecoder::new(&mut src);
let mut input = Vec::new();
if decoded_src_decoder.read_to_end(&mut input).is_err() {
if src_decoder.read_to_end(&mut input).is_err() {
// oh probably wasn't gzipped then
input = contents;
}
let input = input.as_slice();
let mut input_stream = Cursor::new(&input[..]);
let mut group = c.benchmark_group(format!("compare/{filename}"));
group.throughput(Throughput::Bytes(input.len() as u64));
group.bench_function("simdnbt_borrow_parse", |b| {
b.iter(|| {
let input = black_box(input);
let nbt = simdnbt::borrow::Nbt::read(&mut Cursor::new(input))
.unwrap()
.unwrap();
// let _ = black_box(nbt.list("").unwrap().ints());
black_box(nbt);
black_box(simdnbt::borrow::Nbt::read(&mut input_stream).unwrap());
input_stream.set_position(0);
})
});
group.bench_function("simdnbt_owned_parse", |b| {
b.iter(|| {
let input = black_box(input);
let nbt = simdnbt::owned::Nbt::read(&mut Cursor::new(input))
.unwrap()
.unwrap();
// let _ = black_box(nbt.list("").unwrap().ints());
black_box(nbt);
black_box(simdnbt::owned::Nbt::read(&mut input_stream).unwrap());
input_stream.set_position(0);
})
});
group.bench_function("shen_parse", |b| {
let mut input = black_box(input.to_vec());
let mut input = input.to_vec();
b.iter(|| {
let nbt = shen_nbt5::NbtValue::from_binary::<shen_nbt5::nbt_version::Java>(&mut input)
.unwrap();
black_box(nbt);
})
});
group.bench_function("azalea_parse", |b| {
b.iter(|| {
black_box(azalea_nbt::Nbt::read(&mut input_stream).unwrap());
input_stream.set_position(0);
})
});
group.bench_function("graphite_parse", |b| {
b.iter(|| {
black_box(graphite_binary::nbt::decode::read(&mut &input[..]).unwrap());
})
});
group.bench_function("valence_parse", |b| {
b.iter(|| {
let nbt = valence_nbt::from_binary::<String>(&mut &input[..]).unwrap();
black_box(nbt);
})
});
group.bench_function("fastnbt_parse", |b| {
b.iter(|| {
let nbt: fastnbt::Value = fastnbt::from_bytes(&input).unwrap();
black_box(nbt);
})
});
group.bench_function("hematite_parse", |b| {
b.iter(|| {
black_box(nbt::Blob::from_reader(&mut input_stream).unwrap());
input_stream.set_position(0);
})
});
let nbt = simdnbt::borrow::Nbt::read(&mut Cursor::new(input))
let nbt = azalea_nbt::Nbt::read(&mut Cursor::new(&input)).unwrap();
group.bench_function("azalea_write", |b| {
b.iter(|| {
let mut out = Vec::new();
nbt.write(&mut out);
black_box(out);
})
});
let nbt = simdnbt::borrow::Nbt::read(&mut Cursor::new(&input))
.unwrap()
.unwrap();
group.bench_function("simdnbt_borrow_write", |b| {
@ -66,7 +95,7 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
})
});
let nbt = simdnbt::owned::Nbt::read(&mut Cursor::new(input))
let nbt = simdnbt::owned::Nbt::read(&mut Cursor::new(&input))
.unwrap()
.unwrap();
group.bench_function("simdnbt_owned_write", |b| {
@ -77,30 +106,6 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
})
});
group.bench_function("azalea_parse", |b| {
b.iter(|| {
let input = black_box(input);
let nbt = azalea_nbt::Nbt::read(&mut Cursor::new(input)).unwrap();
black_box(nbt);
})
});
let nbt = azalea_nbt::Nbt::read(&mut Cursor::new(input)).unwrap();
group.bench_function("azalea_write", |b| {
b.iter(|| {
let mut out = Vec::new();
nbt.write(&mut out);
black_box(out);
})
});
group.bench_function("graphite_parse", |b| {
b.iter(|| {
let input = black_box(input);
let nbt = graphite_binary::nbt::decode::read(&mut &input[..]).unwrap();
black_box(nbt);
})
});
let nbt = graphite_binary::nbt::decode::read(&mut &input[..]).unwrap();
group.bench_function("graphite_write", |b| {
b.iter(|| {
@ -108,32 +113,11 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
black_box(out);
})
});
group.bench_function("valence_parse", |b| {
b.iter(|| {
let input = black_box(input);
let nbt = valence_nbt::from_binary::<String>(&mut &input[..]).unwrap();
black_box(nbt);
})
});
group.bench_function("fastnbt_parse", |b| {
b.iter(|| {
let input = black_box(input);
let nbt: fastnbt::Value = fastnbt::from_bytes(input).unwrap();
black_box(nbt);
})
});
group.bench_function("hematite_parse", |b| {
b.iter(|| {
let input = black_box(input);
let nbt = nbt::Blob::from_reader(&mut Cursor::new(input)).unwrap();
black_box(nbt);
})
});
}
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn bench(c: &mut Criterion) {
// bench_read_file("hello_world.nbt", c);
// bench_read_file("bigtest.nbt", c);

View file

@ -1,10 +1,11 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use flate2::read::GzDecoder;
use std::{
fs::File,
io::{Cursor, Read},
};
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use flate2::read::GzDecoder;
fn bench_file(filename: &str, c: &mut Criterion) {
let mut file = File::open(format!("tests/{filename}")).unwrap();
let mut contents = Vec::new();
@ -12,27 +13,26 @@ fn bench_file(filename: &str, c: &mut Criterion) {
let mut src = &contents[..];
// decode the original src so most of the time isn't spent on unzipping
let mut decoded_src_decoder = GzDecoder::new(&mut src);
let mut decoded_src = Vec::new();
if decoded_src_decoder.read_to_end(&mut decoded_src).is_err() {
let mut src_decoder = GzDecoder::new(&mut src);
let mut input = Vec::new();
if src_decoder.read_to_end(&mut input).is_err() {
// oh probably wasn't gzipped then
decoded_src = contents;
input = contents;
}
let mut decoded_src_stream = Cursor::new(&decoded_src[..]);
let mut input_stream = Cursor::new(&input[..]);
let mut group = c.benchmark_group(format!("nbt_borrow/{filename}"));
group.throughput(Throughput::Bytes(decoded_src.len() as u64));
group.throughput(Throughput::Bytes(input.len() as u64));
group.bench_function("Decode", |b| {
b.iter(|| {
black_box(simdnbt::borrow::Nbt::read(&mut decoded_src_stream).unwrap());
decoded_src_stream.set_position(0);
black_box(simdnbt::borrow::Nbt::read(&mut input_stream).unwrap());
input_stream.set_position(0);
})
});
let nbt = simdnbt::borrow::Nbt::read(&mut decoded_src_stream)
let nbt = simdnbt::borrow::Nbt::read(&mut input_stream)
.unwrap()
.unwrap();
group.bench_function("Get", |b| {

View file

@ -92,14 +92,14 @@ impl Mutf8Str {
#[inline]
pub fn from_str(s: &str) -> Cow<Mutf8Str> {
match mutf8::encode(s) {
Cow::Borrowed(b) => Cow::Borrowed(Mutf8Str::from_slice(b)),
Cow::Owned(o) => Cow::Owned(Mutf8String { vec: o }),
Cow::Borrowed(slice) => Cow::Borrowed(Mutf8Str::from_slice(slice)),
Cow::Owned(vec) => Cow::Owned(Mutf8String { vec }),
}
}
#[inline]
pub fn to_str(&self) -> Cow<str> {
// fast check to skip if none of the bytes have the top bit set or are null
// fast check to skip if none of the bytes have the top bit set
if is_plain_ascii(&self.slice) {
// SAFETY: &[u8] and &str are the same layout.
unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(&self.slice)) }

View file

@ -1,4 +1,7 @@
use std::{io::Cursor, mem};
use std::{
io::Cursor,
mem::{self, MaybeUninit},
};
use byteorder::ReadBytesExt;
@ -33,6 +36,12 @@ impl NbtCompound {
if depth > MAX_DEPTH {
return Err(Error::MaxDepthExceeded);
}
let mut tags_buffer = unsafe {
MaybeUninit::<[MaybeUninit<(Mutf8String, NbtTag)>; 8]>::uninit().assume_init()
};
let mut tags_buffer_len: usize = 0;
let mut values = Vec::with_capacity(8);
loop {
let tag_type = data.read_u8().map_err(|_| Error::UnexpectedEof)?;
@ -40,9 +49,23 @@ impl NbtCompound {
break;
}
let tag_name = read_string(data)?.to_owned();
let tag = NbtTag::read_with_type(data, tag_type, depth)?;
values.push((tag_name, NbtTag::read_with_type(data, tag_type, depth)?));
tags_buffer[tags_buffer_len] = MaybeUninit::new((tag_name, tag));
tags_buffer_len += 1;
if tags_buffer_len == tags_buffer.len() {
// writing the tags in groups like this is slightly faster
for i in 0..tags_buffer_len {
values.push(unsafe { tags_buffer.get_unchecked(i).assume_init_read() });
}
tags_buffer_len = 0;
}
}
for i in 0..tags_buffer_len {
values.push(unsafe { tags_buffer.get_unchecked(i).assume_init_read() });
}
Ok(Self { values })
}

View file

@ -226,6 +226,7 @@ impl NbtTag {
unsafe { *<*const _>::from(self).cast::<u8>() }
}
#[inline(always)]
fn read_with_type(data: &mut Cursor<&[u8]>, tag_type: u8, depth: usize) -> Result<Self, Error> {
match tag_type {
BYTE_ID => Ok(NbtTag::Byte(