hashbrown/control/group/
neon.rs

1use super::super::{BitMask, Tag};
2use core::arch::aarch64 as neon;
3use core::mem;
4use core::num::NonZeroU64;
5
6pub(crate) type BitMaskWord = u64;
7pub(crate) type NonZeroBitMaskWord = NonZeroU64;
8pub(crate) const BITMASK_STRIDE: usize = 8;
9pub(crate) const BITMASK_MASK: BitMaskWord = !0;
10pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080;
11
12/// Abstraction over a group of control tags which can be scanned in
13/// parallel.
14///
15/// This implementation uses a 64-bit NEON value.
16#[derive(Copy, Clone)]
17pub(crate) struct Group(neon::uint8x8_t);
18
19#[allow(clippy::use_self)]
20impl Group {
21    /// Number of bytes in the group.
22    pub(crate) const WIDTH: usize = mem::size_of::<Self>();
23
24    /// Returns a full group of empty tags, suitable for use as the initial
25    /// value for an empty hash table.
26    ///
27    /// This is guaranteed to be aligned to the group size.
28    #[inline]
29    pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
30        #[repr(C)]
31        struct AlignedTags {
32            _align: [Group; 0],
33            tags: [Tag; Group::WIDTH],
34        }
35        const ALIGNED_TAGS: AlignedTags = AlignedTags {
36            _align: [],
37            tags: [Tag::EMPTY; Group::WIDTH],
38        };
39        &ALIGNED_TAGS.tags
40    }
41
42    /// Loads a group of tags starting at the given address.
43    #[inline]
44    #[allow(clippy::cast_ptr_alignment)] // unaligned load
45    pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
46        Group(neon::vld1_u8(ptr.cast()))
47    }
48
49    /// Loads a group of tags starting at the given address, which must be
50    /// aligned to `mem::align_of::<Group>()`.
51    #[inline]
52    #[allow(clippy::cast_ptr_alignment)]
53    pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
54        debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
55        Group(neon::vld1_u8(ptr.cast()))
56    }
57
58    /// Stores the group of tags to the given address, which must be
59    /// aligned to `mem::align_of::<Group>()`.
60    #[inline]
61    #[allow(clippy::cast_ptr_alignment)]
62    pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
63        debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
64        neon::vst1_u8(ptr.cast(), self.0);
65    }
66
67    /// Returns a `BitMask` indicating all tags in the group which *may*
68    /// have the given value.
69    #[inline]
70    pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
71        unsafe {
72            let cmp = neon::vceq_u8(self.0, neon::vdup_n_u8(tag.0));
73            BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0))
74        }
75    }
76
77    /// Returns a `BitMask` indicating all tags in the group which are
78    /// `EMPTY`.
79    #[inline]
80    pub(crate) fn match_empty(self) -> BitMask {
81        self.match_tag(Tag::EMPTY)
82    }
83
84    /// Returns a `BitMask` indicating all tags in the group which are
85    /// `EMPTY` or `DELETED`.
86    #[inline]
87    pub(crate) fn match_empty_or_deleted(self) -> BitMask {
88        unsafe {
89            let cmp = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0));
90            BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0))
91        }
92    }
93
94    /// Returns a `BitMask` indicating all tags in the group which are full.
95    #[inline]
96    pub(crate) fn match_full(self) -> BitMask {
97        unsafe {
98            let cmp = neon::vcgez_s8(neon::vreinterpret_s8_u8(self.0));
99            BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0))
100        }
101    }
102
103    /// Performs the following transformation on all tags in the group:
104    /// - `EMPTY => EMPTY`
105    /// - `DELETED => EMPTY`
106    /// - `FULL => DELETED`
107    #[inline]
108    pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
109        // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
110        // and high_bit = 0 (FULL) to 1000_0000
111        //
112        // Here's this logic expanded to concrete values:
113        //   let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false)
114        //   1111_1111 | 1000_0000 = 1111_1111
115        //   0000_0000 | 1000_0000 = 1000_0000
116        unsafe {
117            let special = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0));
118            Group(neon::vorr_u8(special, neon::vdup_n_u8(0x80)))
119        }
120    }
121}