1
0
Fork 0
mirror of https://github.com/azalea-rs/simdnbt.git synced 2025-08-02 15:36:03 +00:00

optimized converting mutf8 to utf8

This commit is contained in:
mat 2023-09-02 22:12:24 -05:00
parent 07fa3ed4f6
commit 7117d0adb0
5 changed files with 57 additions and 11 deletions

View file

@ -120,10 +120,10 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
fn bench(c: &mut Criterion) {
// bench_read_file("hello_world.nbt", c);
// bench_read_file("bigtest.nbt", c);
// bench_read_file("simple_player.dat", c);
bench_read_file("bigtest.nbt", c);
bench_read_file("simple_player.dat", c);
bench_read_file("complex_player.dat", c);
// bench_read_file("level.dat", c);
bench_read_file("level.dat", c);
// bench_read_file("stringtest.nbt", c);
// bench_read_file("inttest.nbt", c);
}

View file

@ -1,6 +1,18 @@
use criterion::{criterion_group, criterion_main, Criterion};
use std::hint::black_box;
fn bench(_c: &mut Criterion) {}
use criterion::{criterion_group, criterion_main, Criterion};
use simdnbt::Mutf8Str;
fn bench(c: &mut Criterion) {
let mut group = c.benchmark_group(format!("mutf8"));
group.bench_function("to_str", |b| {
let input = black_box(Mutf8Str::from_slice(b"hello world"));
b.iter(|| {
black_box(input.to_str());
})
});
}
criterion_group!(benches, bench);
criterion_main!(benches);

View file

@ -772,6 +772,7 @@ impl<'a> ListTag<'a> {
mod tests {
use std::io::Read;
use byteorder::WriteBytesExt;
use flate2::read::GzDecoder;
use super::*;
@ -831,7 +832,6 @@ mod tests {
#[test]
fn inttest_1024() {
use byteorder::WriteBytesExt;
let mut data = Vec::new();
data.write_u8(COMPOUND_ID).unwrap();
data.write_u16::<BE>(0).unwrap();
@ -854,7 +854,6 @@ mod tests {
#[test]
fn inttest_1021() {
use byteorder::WriteBytesExt;
let mut data = Vec::new();
data.write_u8(COMPOUND_ID).unwrap();
data.write_u16::<BE>(0).unwrap();
@ -877,7 +876,6 @@ mod tests {
#[test]
fn longtest_1023() {
use byteorder::WriteBytesExt;
let mut data = Vec::new();
data.write_u8(COMPOUND_ID).unwrap();
data.write_u16::<BE>(0).unwrap();
@ -898,6 +896,15 @@ mod tests {
assert_eq!(ints.len(), 1023);
}
#[test]
fn stringtest() {
let nbt = Nbt::new(&mut Cursor::new(include_bytes!("../tests/stringtest.nbt")))
.unwrap()
.unwrap();
assert_eq!(nbt.list("😃").unwrap().strings().unwrap().len(), 16);
}
// #[test]
// fn generate_inttest() {
// use byteorder::WriteBytesExt;
@ -916,4 +923,18 @@ mod tests {
// std::fs::write("tests/inttest1023.nbt", out).unwrap();
// }
// #[test]
// fn generate_stringtest() {
// let mut out = Vec::new();
// out.write_u8(COMPOUND_ID).unwrap();
// out.write_u16::<BE>(0).unwrap();
// out.write_u8(LIST_ID).unwrap();
// out.write_u16::<BE>(0).unwrap();
// out.write_u8(STRING_ID).unwrap();
// out.write_i32::<BE>(16).unwrap();
// out.extend_from_slice(&std::fs::read("tests/stringtest.nbt").unwrap().as_slice()[13..]);
// out.write_u8(END_ID).unwrap();
// std::fs::write("tests/stringtest2.nbt", out).unwrap();
// }
}

View file

@ -35,9 +35,22 @@ impl Mutf8Str {
}
}
pub fn to_str(&self) -> Cow<str> {
match mutf8::decode(&self.slice).expect("Mutf8Str must alwaus be valid MUTF-8") {
Cow::Borrowed(b) => Cow::Borrowed(b),
Cow::Owned(o) => Cow::Owned(o),
// fast check to skip if none of the bytes have the top bit set or are null
let mut is_not_ascii = false;
for &byte in self.slice.into_iter() {
if byte & 0b1000_0000 != 0 || byte == 0 {
is_not_ascii = true;
}
}
if is_not_ascii {
return match mutf8::decode(&self.slice).expect("Mutf8Str must alwaus be valid MUTF-8") {
Cow::Borrowed(b) => Cow::Borrowed(b),
Cow::Owned(o) => Cow::Owned(o),
};
} else {
// SAFETY: &[u8] and &str are the same layout.
unsafe { Cow::Borrowed(mem::transmute(self.slice.as_ref())) }
}
}
}

BIN
tests/stringtest.nbt Executable file → Normal file

Binary file not shown.