mirror of
https://github.com/azalea-rs/simdnbt.git
synced 2025-08-02 07:26:04 +00:00
more silly string optimizations
This commit is contained in:
parent
7117d0adb0
commit
c68c1f859f
8 changed files with 457 additions and 47 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -1,3 +1,7 @@
|
|||
/target
|
||||
/Cargo.lock
|
||||
.vscode
|
||||
|
||||
/flamegraph.svg
|
||||
/perf.data
|
||||
/perf.data.old
|
||||
|
|
|
@ -23,7 +23,7 @@ hematite-nbt = { version = "0.5.2", default-features = false }
|
|||
|
||||
[profile.release]
|
||||
lto = true
|
||||
# debug = true
|
||||
debug = true
|
||||
|
||||
[profile.bench]
|
||||
lto = true
|
||||
|
@ -37,6 +37,10 @@ name = "nbt"
|
|||
harness = false
|
||||
name = "compare"
|
||||
|
||||
[[bench]]
|
||||
harness = false
|
||||
name = "compare_realworld"
|
||||
|
||||
[[bench]]
|
||||
harness = false
|
||||
name = "mutf8"
|
||||
|
|
|
@ -62,29 +62,29 @@ pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
|||
})
|
||||
});
|
||||
|
||||
group.bench_function("valence_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = valence_nbt::Compound::from_binary(&mut &input[..]).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
// group.bench_function("valence_parse", |b| {
|
||||
// b.iter(|| {
|
||||
// let input = black_box(input);
|
||||
// let nbt = valence_nbt::Compound::from_binary(&mut &input[..]).unwrap();
|
||||
// black_box(nbt);
|
||||
// })
|
||||
// });
|
||||
|
||||
group.bench_function("fastnbt_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt: fastnbt::Value = fastnbt::from_bytes(input).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
// group.bench_function("fastnbt_parse", |b| {
|
||||
// b.iter(|| {
|
||||
// let input = black_box(input);
|
||||
// let nbt: fastnbt::Value = fastnbt::from_bytes(input).unwrap();
|
||||
// black_box(nbt);
|
||||
// })
|
||||
// });
|
||||
|
||||
group.bench_function("hematite_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = nbt::Blob::from_reader(&mut Cursor::new(input)).unwrap();
|
||||
black_box(nbt);
|
||||
})
|
||||
});
|
||||
// group.bench_function("hematite_parse", |b| {
|
||||
// b.iter(|| {
|
||||
// let input = black_box(input);
|
||||
// let nbt = nbt::Blob::from_reader(&mut Cursor::new(input)).unwrap();
|
||||
// black_box(nbt);
|
||||
// })
|
||||
// });
|
||||
|
||||
// // writing
|
||||
|
||||
|
|
357
benches/compare_realworld.rs
Normal file
357
benches/compare_realworld.rs
Normal file
|
@ -0,0 +1,357 @@
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
fs::File,
|
||||
io::{Cursor, Read},
|
||||
};
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
||||
use flate2::read::GzDecoder;
|
||||
|
||||
pub fn bench_read_file(filename: &str, c: &mut Criterion) {
|
||||
let mut file = File::open(format!("tests/{filename}")).unwrap();
|
||||
let mut contents = Vec::new();
|
||||
file.read_to_end(&mut contents).unwrap();
|
||||
let mut src = &contents[..];
|
||||
|
||||
// decode the original src so most of the time isn't spent on unzipping
|
||||
let mut decoded_src_decoder = GzDecoder::new(&mut src);
|
||||
let mut input = Vec::new();
|
||||
if decoded_src_decoder.read_to_end(&mut input).is_err() {
|
||||
// oh probably wasn't gzipped then
|
||||
input = contents;
|
||||
}
|
||||
let input = input.as_slice();
|
||||
|
||||
let mut group = c.benchmark_group(format!("compare_realworld/{filename}"));
|
||||
group.throughput(Throughput::Bytes(input.len() as u64));
|
||||
|
||||
{
|
||||
// compare to make sure they decode equally
|
||||
let azalea_nbt =
|
||||
azalea_items_from_nbt(azalea_nbt::Nbt::read(&mut Cursor::new(input)).unwrap()).unwrap();
|
||||
let graphite_nbt =
|
||||
graphite_items_from_nbt(graphite_binary::nbt::decode::read(&mut &input[..]).unwrap())
|
||||
.unwrap();
|
||||
let simdnbt_nbt =
|
||||
simdnbt_items_from_nbt(simdnbt::Nbt::new(&mut Cursor::new(input)).unwrap().unwrap())
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(azalea_nbt, graphite_nbt);
|
||||
assert_eq!(azalea_nbt, simdnbt_nbt);
|
||||
}
|
||||
|
||||
group.bench_function("azalea_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = black_box(azalea_nbt::Nbt::read(&mut Cursor::new(input)).unwrap());
|
||||
black_box(azalea_items_from_nbt(nbt));
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("graphite_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = black_box(graphite_binary::nbt::decode::read(&mut &input[..]).unwrap());
|
||||
// black_box(nbt);
|
||||
black_box(graphite_items_from_nbt(nbt));
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("simdnbt_parse", |b| {
|
||||
b.iter(|| {
|
||||
let input = black_box(input);
|
||||
let nbt = black_box(simdnbt::Nbt::new(&mut Cursor::new(input)));
|
||||
let nbt = nbt.unwrap().unwrap();
|
||||
black_box(simdnbt_items_from_nbt(nbt));
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
pub struct Item {
|
||||
pub id: i16,
|
||||
pub damage: i16,
|
||||
pub count: i8,
|
||||
|
||||
pub head_texture_id: Option<String>,
|
||||
|
||||
pub skyblock_id: Option<String>,
|
||||
pub reforge: Option<String>,
|
||||
|
||||
pub display: ItemDisplay,
|
||||
|
||||
pub enchantments: HashMap<String, i32>,
|
||||
pub timestamp: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
pub struct ItemDisplay {
|
||||
pub name: String,
|
||||
pub lore: Vec<String>,
|
||||
|
||||
pub has_glint: bool,
|
||||
|
||||
pub color: Option<i32>,
|
||||
}
|
||||
|
||||
fn simdnbt_items_from_nbt(nbt: simdnbt::Nbt) -> Option<Vec<Option<Item>>> {
|
||||
let mut items = Vec::new();
|
||||
for item_nbt in nbt
|
||||
.list("i")
|
||||
.and_then(|list| list.compounds())
|
||||
.unwrap_or_default()
|
||||
{
|
||||
// check if "id" is present, if not, skip
|
||||
if !item_nbt.contains("id") {
|
||||
// this just means the item isn't present
|
||||
items.push(None);
|
||||
continue;
|
||||
}
|
||||
|
||||
let item_tag = item_nbt.compound("tag")?;
|
||||
let item_extra_attributes = item_tag.compound("ExtraAttributes");
|
||||
let item_display = item_tag.compound("display");
|
||||
|
||||
items.push(Some(Item {
|
||||
id: item_nbt.short("id")?,
|
||||
damage: item_nbt.short("Damage")?,
|
||||
count: item_nbt.byte("Count")?,
|
||||
|
||||
head_texture_id: item_tag
|
||||
.compound("SkullOwner")
|
||||
.and_then(|skull_owner| skull_owner.compound("Properties"))
|
||||
.and_then(|properties| properties.list("textures"))
|
||||
.and_then(|textures| textures.compounds())
|
||||
.and_then(|textures| textures.get(0))
|
||||
.and_then(|texture| texture.string("Value"))
|
||||
// the real program does some base64+json decoding here but that's unnecessary for the benchmark
|
||||
.map(|value| value.to_string()),
|
||||
skyblock_id: item_extra_attributes
|
||||
.and_then(|e| e.string("id"))
|
||||
.map(|id| id.to_string()),
|
||||
reforge: item_extra_attributes
|
||||
.and_then(|e| e.string("modifier"))
|
||||
.map(|id| id.to_string()),
|
||||
|
||||
display: ItemDisplay {
|
||||
name: item_display
|
||||
.and_then(|d| d.string("Name"))
|
||||
.map(|n| n.to_string())
|
||||
.unwrap_or_default(),
|
||||
lore: item_display
|
||||
.and_then(|d| d.list("Lore"))
|
||||
.and_then(|l| l.strings())
|
||||
.map(|l| l.iter().map(|s| s.to_string()).collect())
|
||||
.unwrap_or_default(),
|
||||
color: item_display.and_then(|d| d.int("color")),
|
||||
has_glint: item_extra_attributes
|
||||
.map(|e| e.contains("ench"))
|
||||
.unwrap_or_default(),
|
||||
},
|
||||
enchantments: item_extra_attributes
|
||||
.and_then(|e| e.compound("enchantments"))
|
||||
.map(|e| {
|
||||
e.iter()
|
||||
.map(|(k, v)| (k.to_string(), v.int().unwrap_or_default()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
timestamp: item_extra_attributes
|
||||
.and_then(|e| e.string("timestamp"))
|
||||
.map(|t| t.to_string()),
|
||||
}));
|
||||
}
|
||||
Some(items)
|
||||
}
|
||||
|
||||
fn azalea_items_from_nbt(nbt: azalea_nbt::Nbt) -> Option<Vec<Option<Item>>> {
|
||||
let mut items = Vec::new();
|
||||
let azalea_nbt::NbtList::Compound(item_compound_list) = nbt
|
||||
.as_compound()
|
||||
.and_then(|c| c.get(""))
|
||||
.and_then(|c| c.as_compound())
|
||||
.and_then(|c| c.get("i"))
|
||||
.and_then(|i| i.as_list())?
|
||||
else {
|
||||
return None;
|
||||
};
|
||||
for item_nbt in item_compound_list {
|
||||
// check if "id" is present, if not, skip
|
||||
let Some(id) = item_nbt.get("id") else {
|
||||
// this just means the item isn't present
|
||||
items.push(None);
|
||||
continue;
|
||||
};
|
||||
|
||||
// let item_tag = item_nbt.compound("tag")?;
|
||||
// let item_extra_attributes = item_tag.compound("ExtraAttributes");
|
||||
// let item_display = item_tag.compound("display");
|
||||
let item_tag = item_nbt.get("tag")?.as_compound()?;
|
||||
let item_extra_attributes = item_tag
|
||||
.get("ExtraAttributes")
|
||||
.and_then(|e| e.as_compound());
|
||||
let item_display = item_tag.get("display").and_then(|d| d.as_compound());
|
||||
|
||||
items.push(Some(Item {
|
||||
id: *id.as_short()?,
|
||||
damage: *item_nbt.get("Damage")?.as_short()?,
|
||||
count: *item_nbt.get("Count")?.as_byte()?,
|
||||
head_texture_id: item_tag
|
||||
.get("SkullOwner")
|
||||
.and_then(|skull_owner| skull_owner.as_compound())
|
||||
.and_then(|skull_owner| {
|
||||
skull_owner
|
||||
.get("Properties")
|
||||
.and_then(|properties| properties.as_compound())
|
||||
})
|
||||
.and_then(|properties| {
|
||||
properties
|
||||
.get("textures")
|
||||
.and_then(|textures| textures.as_list())
|
||||
})
|
||||
.and_then(|textures| {
|
||||
if let azalea_nbt::NbtList::Compound(textures) = textures {
|
||||
textures
|
||||
.get(0)
|
||||
.and_then(|texture| texture.get("Value"))
|
||||
.and_then(|value| value.as_string().cloned())
|
||||
.map(|string| string.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}),
|
||||
skyblock_id: item_extra_attributes
|
||||
.and_then(|e| e.get("id"))
|
||||
.and_then(|id| id.as_string().cloned())
|
||||
.map(|string| string.to_string()),
|
||||
reforge: item_extra_attributes
|
||||
.and_then(|e| e.get("modifier"))
|
||||
.and_then(|id| id.as_string().cloned())
|
||||
.map(|string| string.to_string()),
|
||||
display: ItemDisplay {
|
||||
name: item_display
|
||||
.and_then(|d| d.get("Name"))
|
||||
.and_then(|n| n.as_string().cloned())
|
||||
.unwrap_or_default()
|
||||
.to_string(),
|
||||
lore: item_display
|
||||
.and_then(|d| d.get("Lore"))
|
||||
.and_then(|l| l.as_list())
|
||||
.and_then(|l| {
|
||||
if let azalea_nbt::NbtList::String(l) = l {
|
||||
Some(l)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|l| l.iter().map(|s| s.to_string()).collect())
|
||||
.unwrap_or_default(),
|
||||
color: item_display
|
||||
.and_then(|d| d.get("color"))
|
||||
.and_then(|c| c.as_int())
|
||||
.copied(),
|
||||
has_glint: item_extra_attributes
|
||||
.map(|e| e.get("ench").is_some())
|
||||
.unwrap_or_default(),
|
||||
},
|
||||
enchantments: item_extra_attributes
|
||||
.and_then(|e| e.get("enchantments"))
|
||||
.and_then(|e| e.as_compound())
|
||||
.map(|e| {
|
||||
e.iter()
|
||||
.map(|(k, v)| (k.to_string(), v.as_int().copied().unwrap_or_default()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
timestamp: item_extra_attributes
|
||||
.and_then(|e| e.get("timestamp"))
|
||||
.and_then(|t| t.as_string().cloned())
|
||||
.map(|string| string.to_string()),
|
||||
}));
|
||||
}
|
||||
Some(items)
|
||||
}
|
||||
|
||||
fn graphite_items_from_nbt(nbt: graphite_binary::nbt::NBT) -> Option<Vec<Option<Item>>> {
|
||||
let mut items = Vec::new();
|
||||
for item_nbt in nbt.find_root("i").and_then(|i| nbt.iter(i))? {
|
||||
// check if "id" is present, if not, skip
|
||||
let Some(id) = nbt.find(item_nbt, "id") else {
|
||||
// this just means the item isn't present
|
||||
items.push(None);
|
||||
continue;
|
||||
};
|
||||
|
||||
let item_tag = nbt.find(item_nbt, "tag")?;
|
||||
let item_extra_attributes = nbt.find(item_tag, "ExtraAttributes");
|
||||
let item_display = nbt.find(item_tag, "display");
|
||||
|
||||
items.push(Some(Item {
|
||||
id: id.as_short()?,
|
||||
damage: nbt.find(item_nbt, "Damage")?.as_short()?,
|
||||
count: nbt.find(item_nbt, "Count")?.as_byte()?,
|
||||
|
||||
head_texture_id: nbt
|
||||
.find(item_tag, "SkullOwner")
|
||||
.and_then(|skull_owner| nbt.find(skull_owner, "Properties"))
|
||||
.and_then(|properties| nbt.find(properties, "textures"))
|
||||
.and_then(|textures| nbt.iter(textures)?.next())
|
||||
.and_then(|texture| nbt.find(texture, "Value"))
|
||||
// the real program does some base64+json decoding here but that's unnecessary for the benchmark
|
||||
.and_then(|value| value.as_string().cloned()),
|
||||
skyblock_id: item_extra_attributes
|
||||
.and_then(|e| nbt.find(e, "id"))
|
||||
.and_then(|id| id.as_string().cloned()),
|
||||
reforge: item_extra_attributes
|
||||
.and_then(|e| nbt.find(e, "modifier"))
|
||||
.and_then(|id| id.as_string().cloned()),
|
||||
|
||||
display: ItemDisplay {
|
||||
name: item_display
|
||||
.and_then(|d| nbt.find(d, "Name"))
|
||||
.and_then(|n| n.as_string().cloned())
|
||||
.unwrap_or_default(),
|
||||
lore: item_display
|
||||
.and_then(|d| nbt.find(d, "Lore"))
|
||||
.and_then(|l| nbt.iter(l))
|
||||
.map(|l| l.filter_map(|s| s.as_string().cloned()).collect())
|
||||
.unwrap_or_default(),
|
||||
color: item_display
|
||||
.and_then(|d| nbt.find(d, "color"))
|
||||
.and_then(|c| c.as_int()),
|
||||
has_glint: item_extra_attributes
|
||||
.map(|e| nbt.find(e, "ench").is_some())
|
||||
.unwrap_or_default(),
|
||||
},
|
||||
enchantments: item_extra_attributes
|
||||
.and_then(|e| nbt.find(e, "enchantments"))
|
||||
.and_then(|e| nbt.iter(e))
|
||||
.map(|e| {
|
||||
e.map(|n| {
|
||||
(
|
||||
nbt.find(n, "key")
|
||||
.and_then(|k| k.as_string())
|
||||
.cloned()
|
||||
.unwrap_or_default(),
|
||||
nbt.find(n, "value")
|
||||
.and_then(|v| v.as_int())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
timestamp: item_extra_attributes
|
||||
.and_then(|e| nbt.find(e, "timestamp"))
|
||||
.and_then(|t| t.as_string().cloned()),
|
||||
}));
|
||||
}
|
||||
Some(items)
|
||||
}
|
||||
|
||||
fn bench(c: &mut Criterion) {
|
||||
bench_read_file("realworld.nbt", c);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench);
|
||||
criterion_main!(benches);
|
|
@ -4,14 +4,32 @@ use criterion::{criterion_group, criterion_main, Criterion};
|
|||
use simdnbt::Mutf8Str;
|
||||
|
||||
fn bench(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group(format!("mutf8"));
|
||||
let mut group = c.benchmark_group("mutf8");
|
||||
|
||||
group.bench_function("to_str", |b| {
|
||||
let input = black_box(Mutf8Str::from_slice(b"hello world"));
|
||||
let input = black_box(Mutf8Str::from_slice(b"asgwjiebrtiowuubrtiowerthb8qwertyuwerpotihnqwiortuhbweinoqwner9opiquwehuiowrtjbwerioltubnwrioutunqweol;rkinqweuiorbqweruiqvbwefP;WOEJ Q0OEPWRIYGYUIEDRYASZTFHGC Ijkbuiljfn qwilrtb qsokjladfnqiowugrtbquiowerbq we;roiqwerghqwioerhd5rtea456etrsdyutrioutyopuipjklhkjfgghjdffghasdgxvncm,bn,.bnioug78yufvukyhfyutdyf"));
|
||||
b.iter(|| {
|
||||
black_box(input.to_str());
|
||||
})
|
||||
});
|
||||
group.bench_function("to_string", |b| {
|
||||
let input = black_box(Mutf8Str::from_slice(b"asgwjiebrtiowuubrtiowerthb8qwertyuwerpotihnqwiortuhbweinoqwner9opiquwehuiowrtjbwerioltubnwrioutunqweol;rkinqweuiorbqweruiqvbwefP;WOEJ Q0OEPWRIYGYUIEDRYASZTFHGC Ijkbuiljfn qwilrtb qsokjladfnqiowugrtbquiowerbq we;roiqwerghqwioerhd5rtea456etrsdyutrioutyopuipjklhkjfgghjdffghasdgxvncm,bn,.bnioug78yufvukyhfyutdyf"));
|
||||
b.iter(|| {
|
||||
black_box(input.to_string());
|
||||
})
|
||||
});
|
||||
group.bench_function("to_owned into_string", |b| {
|
||||
let input = black_box(Mutf8Str::from_slice(b"asgwjiebrtiowuubrtiowerthb8qwertyuwerpotihnqwiortuhbweinoqwner9opiquwehuiowrtjbwerioltubnwrioutunqweol;rkinqweuiorbqweruiqvbwefP;WOEJ Q0OEPWRIYGYUIEDRYASZTFHGC Ijkbuiljfn qwilrtb qsokjladfnqiowugrtbquiowerbq we;roiqwerghqwioerhd5rtea456etrsdyutrioutyopuipjklhkjfgghjdffghasdgxvncm,bn,.bnioug78yufvukyhfyutdyf"));
|
||||
b.iter(|| {
|
||||
black_box(input.to_owned().into_string());
|
||||
})
|
||||
});
|
||||
group.bench_function("to_owned", |b| {
|
||||
let input = black_box(Mutf8Str::from_slice(b"asgwjiebrtiowuubrtiowerthb8qwertyuwerpotihnqwiortuhbweinoqwner9opiquwehuiowrtjbwerioltubnwrioutunqweol;rkinqweuiorbqweruiqvbwefP;WOEJ Q0OEPWRIYGYUIEDRYASZTFHGC Ijkbuiljfn qwilrtb qsokjladfnqiowugrtbquiowerbq we;roiqwerghqwioerhd5rtea456etrsdyutrioutyopuipjklhkjfgghjdffghasdgxvncm,bn,.bnioug78yufvukyhfyutdyf"));
|
||||
b.iter(|| {
|
||||
black_box(input.to_owned());
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench);
|
||||
|
|
|
@ -101,7 +101,7 @@ fn simdnbt_items_from_nbt(nbt: simdnbt::Nbt) -> Option<Vec<Option<Item>>> {
|
|||
|
||||
fn main() {
|
||||
let input = black_box(include_bytes!("../tests/realworld.nbt"));
|
||||
for _ in 0..100 {
|
||||
for _ in 0..1000000 {
|
||||
let nbt = Nbt::new(&mut Cursor::new(input));
|
||||
let nbt = black_box(nbt.unwrap().unwrap());
|
||||
black_box(simdnbt_items_from_nbt(nbt));
|
||||
|
|
13
src/lib.rs
13
src/lib.rs
|
@ -12,6 +12,7 @@
|
|||
//! ```
|
||||
|
||||
#![feature(portable_simd)]
|
||||
#![feature(array_chunks)]
|
||||
|
||||
mod error;
|
||||
mod mutf8;
|
||||
|
@ -322,7 +323,7 @@ fn read_int_array(data: &mut Cursor<&[u8]>) -> Result<Vec<i32>, Error> {
|
|||
let mut ints = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_i32(&mut ints, length);
|
||||
swap_endianness_32bit(&mut ints, length);
|
||||
}
|
||||
|
||||
let ints = {
|
||||
|
@ -341,7 +342,7 @@ fn read_long_array(data: &mut Cursor<&[u8]>) -> Result<Vec<i64>, Error> {
|
|||
let mut ints = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_i64(&mut ints, length);
|
||||
swap_endianness_64bit(&mut ints, length);
|
||||
}
|
||||
|
||||
let ints = {
|
||||
|
@ -359,7 +360,7 @@ fn read_float_array(data: &mut Cursor<&[u8]>) -> Result<Vec<f32>, Error> {
|
|||
let mut floats = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_i32(&mut floats, length);
|
||||
swap_endianness_32bit(&mut floats, length);
|
||||
}
|
||||
|
||||
let floats = {
|
||||
|
@ -377,7 +378,7 @@ fn read_double_array(data: &mut Cursor<&[u8]>) -> Result<Vec<f64>, Error> {
|
|||
let mut doubles = array_bytes.to_vec();
|
||||
|
||||
if cfg!(target_endian = "little") {
|
||||
swap_endianness_i64(&mut doubles, length);
|
||||
swap_endianness_64bit(&mut doubles, length);
|
||||
}
|
||||
|
||||
let doubles = {
|
||||
|
@ -390,7 +391,7 @@ fn read_double_array(data: &mut Cursor<&[u8]>) -> Result<Vec<f64>, Error> {
|
|||
Ok(doubles)
|
||||
}
|
||||
|
||||
fn swap_endianness_i32(bytes: &mut [u8], num: usize) {
|
||||
fn swap_endianness_32bit(bytes: &mut [u8], num: usize) {
|
||||
for i in 0..num / 16 {
|
||||
let simd: u8x64 = Simd::from_slice(bytes[i * 16 * 4..(i + 1) * 16 * 4].as_ref());
|
||||
#[rustfmt::skip]
|
||||
|
@ -464,7 +465,7 @@ fn swap_endianness_i32(bytes: &mut [u8], num: usize) {
|
|||
}
|
||||
}
|
||||
|
||||
fn swap_endianness_i64(bytes: &mut [u8], num: usize) {
|
||||
fn swap_endianness_64bit(bytes: &mut [u8], num: usize) {
|
||||
for i in 0..num / 8 {
|
||||
let simd: u8x64 = Simd::from_slice(bytes[i * 64..i * 64 + 64].as_ref());
|
||||
#[rustfmt::skip]
|
||||
|
|
58
src/mutf8.rs
58
src/mutf8.rs
|
@ -17,6 +17,25 @@ pub struct Mutf8String {
|
|||
vec: Vec<u8>,
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_plain_ascii(slice: &[u8]) -> bool {
|
||||
let mut is_plain_ascii = true;
|
||||
let chunks_exact = slice.array_chunks::<4>();
|
||||
let remainder = chunks_exact.remainder();
|
||||
for &byte in remainder {
|
||||
if byte & 0b1000_0000 != 0 {
|
||||
is_plain_ascii = false;
|
||||
}
|
||||
}
|
||||
for &chunk in chunks_exact {
|
||||
let chunk = u32::from_be_bytes(chunk);
|
||||
if chunk & 0b10000000_10000000_10000000_10000000 != 0 {
|
||||
is_plain_ascii = false;
|
||||
}
|
||||
}
|
||||
is_plain_ascii
|
||||
}
|
||||
|
||||
impl Mutf8Str {
|
||||
pub fn to_string_lossy(&self) -> Cow<str> {
|
||||
String::from_utf8_lossy(&self.slice)
|
||||
|
@ -25,40 +44,35 @@ impl Mutf8Str {
|
|||
#[inline]
|
||||
pub fn from_slice(slice: &[u8]) -> &Mutf8Str {
|
||||
// SAFETY: &[u8] and &Mutf8Str are the same layout.
|
||||
unsafe { mem::transmute(slice) }
|
||||
unsafe { mem::transmute::<&[u8], &Mutf8Str>(slice) }
|
||||
}
|
||||
|
||||
// we can't implement FromStr on Cow<Mutf8Str>
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn from_str(s: &str) -> Cow<Mutf8Str> {
|
||||
match mutf8::encode(s) {
|
||||
Cow::Borrowed(b) => Cow::Borrowed(Mutf8Str::from_slice(b)),
|
||||
Cow::Owned(o) => Cow::Owned(Mutf8String { vec: o }),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_str(&self) -> Cow<str> {
|
||||
// fast check to skip if none of the bytes have the top bit set or are null
|
||||
let mut is_not_ascii = false;
|
||||
for &byte in self.slice.into_iter() {
|
||||
if byte & 0b1000_0000 != 0 || byte == 0 {
|
||||
is_not_ascii = true;
|
||||
}
|
||||
}
|
||||
|
||||
if is_not_ascii {
|
||||
return match mutf8::decode(&self.slice).expect("Mutf8Str must alwaus be valid MUTF-8") {
|
||||
if is_plain_ascii(&self.slice) {
|
||||
// SAFETY: &[u8] and &str are the same layout.
|
||||
unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(&self.slice)) }
|
||||
} else {
|
||||
match mutf8::decode(&self.slice).expect("Mutf8Str must alwaus be valid MUTF-8") {
|
||||
Cow::Borrowed(b) => Cow::Borrowed(b),
|
||||
Cow::Owned(o) => Cow::Owned(o),
|
||||
};
|
||||
} else {
|
||||
// SAFETY: &[u8] and &str are the same layout.
|
||||
unsafe { Cow::Borrowed(mem::transmute(self.slice.as_ref())) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Mutf8Str {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let self_as_str = self.to_str();
|
||||
self_as_str.fmt(f)
|
||||
f.write_str(&self.to_str())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -82,6 +96,18 @@ impl Mutf8String {
|
|||
pub fn as_str(&self) -> &Mutf8Str {
|
||||
Mutf8Str::from_slice(self.vec.as_slice())
|
||||
}
|
||||
|
||||
pub fn into_string(self) -> String {
|
||||
if is_plain_ascii(&self.vec) {
|
||||
// SAFETY: &[u8] and &str are the same layout.
|
||||
unsafe { String::from_utf8_unchecked(self.vec) }
|
||||
} else {
|
||||
match mutf8::decode(&self.vec).expect("Mutf8Str must alwaus be valid MUTF-8") {
|
||||
Cow::Borrowed(b) => b.to_owned(),
|
||||
Cow::Owned(o) => o,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Deref for Mutf8String {
|
||||
type Target = Mutf8Str;
|
||||
|
|
Loading…
Add table
Reference in a new issue