Source code

Revision control

Copy as Markdown

Other Tools

pub fn slide_hash(state: &mut crate::deflate::State) {
let wsize = state.w_size as u16;
// The state.head and state.prev slices have a length that is a power of 2 between 8 and 16.
// That knowledge means `chunks_exact` with a (small) power of 2 can be used without risk of
// missing elements.
slide_hash_chain(state.head.as_mut_slice(), wsize);
slide_hash_chain(state.prev.as_mut_slice(), wsize);
}
fn slide_hash_chain(table: &mut [u16], wsize: u16) {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
// SAFETY: the avx2 and bmi2 target feature are enabled.
return unsafe { avx2::slide_hash_chain(table, wsize) };
}
#[cfg(target_arch = "aarch64")]
if crate::cpu_features::is_enabled_neon() {
return unsafe { neon::slide_hash_chain(table, wsize) };
}
#[cfg(target_arch = "wasm32")]
if crate::cpu_features::is_enabled_simd128() {
// SAFETY: the simd128 target feature is enabled.
return unsafe { wasm::slide_hash_chain(table, wsize) };
}
rust::slide_hash_chain(table, wsize);
}
#[inline(always)]
fn generic_slide_hash_chain<const N: usize>(table: &mut [u16], wsize: u16) {
debug_assert_eq!(table.len() % N, 0);
for chunk in table.chunks_exact_mut(N) {
for m in chunk.iter_mut() {
*m = m.saturating_sub(wsize);
}
}
}
mod rust {
pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {
// 32 means that 4 128-bit values can be processed per iteration. That appear to be the
// optimal amount on x86_64 (SSE) and aarch64 (NEON).
super::generic_slide_hash_chain::<32>(table, wsize);
}
}
#[cfg(target_arch = "x86_64")]
mod avx2 {
/// # Safety
///
/// Behavior is undefined if the `avx2` target feature is not enabled
#[target_feature(enable = "avx2")]
#[target_feature(enable = "bmi2")]
#[target_feature(enable = "bmi1")]
pub unsafe fn slide_hash_chain(table: &mut [u16], wsize: u16) {
// 64 means that 4 256-bit values can be processed per iteration.
// That appear to be the optimal amount for avx2.
//
// This vectorizes well https://godbolt.org/z/sGbdYba7K
super::generic_slide_hash_chain::<64>(table, wsize);
}
}
#[cfg(target_arch = "aarch64")]
mod neon {
/// # Safety
///
/// Behavior is undefined if the `neon` target feature is not enabled
#[target_feature(enable = "neon")]
pub unsafe fn slide_hash_chain(table: &mut [u16], wsize: u16) {
// 32 means that 4 128-bit values can be processed per iteration. That appear to be the
// optimal amount for neon.
super::generic_slide_hash_chain::<32>(table, wsize);
}
}
#[cfg(target_arch = "wasm32")]
mod wasm {
/// # Safety
///
/// Behavior is undefined if the `simd128` target feature is not enabled
#[target_feature(enable = "simd128")]
pub unsafe fn slide_hash_chain(table: &mut [u16], wsize: u16) {
// 32 means that 4 128-bit values can be processed per iteration. That appear to be the
// optimal amount on x86_64 (SSE) and aarch64 (NEON), which is what this will ultimately
// compile down to.
super::generic_slide_hash_chain::<32>(table, wsize);
}
}
#[cfg(test)]
mod tests {
use super::*;
const WSIZE: u16 = 32768;
const INPUT: [u16; 64] = [
0, 0, 28790, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43884, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 64412, 0, 0, 0, 0, 0, 21043, 0, 0, 0, 0, 0, 23707, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 64026, 0, 0, 20182,
];
const OUTPUT: [u16; 64] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 31644, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 31258, 0, 0, 0,
];
#[test]
fn test_slide_hash_rust() {
let mut input = INPUT;
rust::slide_hash_chain(&mut input, WSIZE);
assert_eq!(input, OUTPUT);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_slide_hash_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
let mut input = INPUT;
unsafe { avx2::slide_hash_chain(&mut input, WSIZE) };
assert_eq!(input, OUTPUT);
}
}
#[test]
#[cfg(target_arch = "aarch64")]
fn test_slide_hash_neon() {
if crate::cpu_features::is_enabled_neon() {
let mut input = INPUT;
unsafe { neon::slide_hash_chain(&mut input, WSIZE) };
assert_eq!(input, OUTPUT);
}
}
#[test]
#[cfg(target_arch = "wasm32")]
fn test_slide_hash_wasm() {
if crate::cpu_features::is_enabled_simd128() {
let mut input = INPUT;
unsafe { wasm::slide_hash_chain(&mut input, WSIZE) };
assert_eq!(input, OUTPUT);
}
}
quickcheck::quickcheck! {
fn slide_is_rust_slide(v: Vec<u16>, wsize: u16) -> bool {
// pad to a multiple of 64 (the biggest chunk size currently in use)
let difference = v.len().next_multiple_of(64) - v.len();
let mut v = v;
v.extend(core::iter::repeat(u16::MAX).take(difference));
let mut a = v.clone();
let mut b = v;
rust::slide_hash_chain(&mut a, wsize);
slide_hash_chain(&mut b, wsize);
a == b
}
}
}