mirror of
https://github.com/azalea-rs/simdnbt.git
synced 2025-08-02 15:36:03 +00:00
use simd for is_plain_ascii
This commit is contained in:
parent
c68c1f859f
commit
d9e0cb390c
4 changed files with 14 additions and 17 deletions
|
@ -2,7 +2,9 @@
|
|||
|
||||
an unnecessarily fast nbt decoder. like seriously you probably don't need this unless you're trying to win benchmarks.
|
||||
|
||||
simdnbt currently only makes use of simd instructions for swapping the endianness of arrays, and tbh that's really only there so i can call it "simdnbt" without lying. the name is mostly a play on simdjson.
|
||||
simdnbt currently makes use of simd instructions for two things:
|
||||
- swapping the endianness of int arrays
|
||||
- checking if a string is plain ascii for faster mutf8 to utf8 conversion
|
||||
|
||||
simdnbt might be the fastest nbt decoder currently in existence. however to achieve this silly speed, it takes a couple of shortcuts:
|
||||
1. it requires a reference to the original data (to avoid cloning)
|
||||
|
|
|
@ -101,7 +101,7 @@ fn simdnbt_items_from_nbt(nbt: simdnbt::Nbt) -> Option<Vec<Option<Item>>> {
|
|||
|
||||
fn main() {
|
||||
let input = black_box(include_bytes!("../tests/realworld.nbt"));
|
||||
for _ in 0..1000000 {
|
||||
for _ in 0..100000 {
|
||||
let nbt = Nbt::new(&mut Cursor::new(input));
|
||||
let nbt = black_box(nbt.unwrap().unwrap());
|
||||
black_box(simdnbt_items_from_nbt(nbt));
|
||||
|
|
|
@ -897,15 +897,6 @@ mod tests {
|
|||
assert_eq!(ints.len(), 1023);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stringtest() {
|
||||
let nbt = Nbt::new(&mut Cursor::new(include_bytes!("../tests/stringtest.nbt")))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(nbt.list("😃").unwrap().strings().unwrap().len(), 16);
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// fn generate_inttest() {
|
||||
// use byteorder::WriteBytesExt;
|
||||
|
|
16
src/mutf8.rs
16
src/mutf8.rs
|
@ -4,6 +4,7 @@ use std::{
|
|||
borrow::{Borrow, Cow},
|
||||
fmt, mem,
|
||||
ops::Deref,
|
||||
simd::prelude::*,
|
||||
};
|
||||
|
||||
/// A M-UTF8 string slice. This is how strings are represented internally in NBT.
|
||||
|
@ -20,19 +21,22 @@ pub struct Mutf8String {
|
|||
#[inline]
|
||||
fn is_plain_ascii(slice: &[u8]) -> bool {
|
||||
let mut is_plain_ascii = true;
|
||||
let chunks_exact = slice.array_chunks::<4>();
|
||||
let remainder = chunks_exact.remainder();
|
||||
for &byte in remainder {
|
||||
if byte & 0b1000_0000 != 0 {
|
||||
let chunks_exact = slice.array_chunks::<32>();
|
||||
for &byte in chunks_exact.remainder() {
|
||||
if byte & 0b10000000 != 0 {
|
||||
is_plain_ascii = false;
|
||||
}
|
||||
}
|
||||
let mask = u8x32::splat(0b10000000);
|
||||
let zero = u8x32::splat(0);
|
||||
for &chunk in chunks_exact {
|
||||
let chunk = u32::from_be_bytes(chunk);
|
||||
if chunk & 0b10000000_10000000_10000000_10000000 != 0 {
|
||||
let simd = u8x32::from_array(chunk);
|
||||
let xor = simd & mask;
|
||||
if xor != zero {
|
||||
is_plain_ascii = false;
|
||||
}
|
||||
}
|
||||
|
||||
is_plain_ascii
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue