1 use alloc::boxed::Box;
2 use core::convert::TryInto;
3 
4 /// The Hashtable trait used by the compression to store hashed bytes to their position.
5 /// `val` can be maximum the size of the input in bytes.
6 ///
7 /// `pos` can have a maximum value of u16::MAX or 65535
8 /// If the hashtable is smaller it needs to reduce the pos to its space, e.g. by right
9 /// shifting.
10 ///
11 /// Duplication dictionary size.
12 ///
13 /// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and
14 /// thus collisions are more likely, hurting the compression ratio.
15 
16 /// hashes and right shifts to a maximum value of 16bit, 65535
17 /// The right shift is done in order to not exceed, the hashtables capacity
18 #[inline]
hash(sequence: u32) -> u3219 fn hash(sequence: u32) -> u32 {
20     (sequence.wrapping_mul(2654435761_u32)) >> 16
21 }
22 
23 /// hashes and right shifts to a maximum value of 16bit, 65535
24 /// The right shift is done in order to not exceed, the hashtables capacity
25 #[cfg(target_pointer_width = "64")]
26 #[inline]
hash5(sequence: usize) -> u3227 fn hash5(sequence: usize) -> u32 {
28     let primebytes = if cfg!(target_endian = "little") {
29         889523592379_usize
30     } else {
31         11400714785074694791_usize
32     };
33     (((sequence << 24).wrapping_mul(primebytes)) >> 48) as u32
34 }
35 
36 pub trait HashTable {
get_at(&self, pos: usize) -> usize37     fn get_at(&self, pos: usize) -> usize;
put_at(&mut self, pos: usize, val: usize)38     fn put_at(&mut self, pos: usize, val: usize);
clear(&mut self)39     fn clear(&mut self);
40     #[inline]
41     #[cfg(target_pointer_width = "64")]
get_hash_at(input: &[u8], pos: usize) -> usize42     fn get_hash_at(input: &[u8], pos: usize) -> usize {
43         hash5(super::compress::get_batch_arch(input, pos)) as usize
44     }
45     #[inline]
46     #[cfg(target_pointer_width = "32")]
get_hash_at(input: &[u8], pos: usize) -> usize47     fn get_hash_at(input: &[u8], pos: usize) -> usize {
48         hash(super::compress::get_batch(input, pos)) as usize
49     }
50 }
51 
52 const HASHTABLE_SIZE_4K: usize = 4 * 1024;
53 const HASHTABLE_BIT_SHIFT_4K: usize = 4;
54 
55 #[derive(Debug)]
56 #[repr(align(64))]
57 pub struct HashTable4KU16 {
58     dict: Box<[u16; HASHTABLE_SIZE_4K]>,
59 }
60 impl HashTable4KU16 {
61     #[inline]
new() -> Self62     pub fn new() -> Self {
63         // This generates more efficient assembly in contrast to Box::new(slice), because of an
64         // optmized call alloc_zeroed, vs. alloc + memset
65         // try_into is optimized away
66         let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
67             .into_boxed_slice()
68             .try_into()
69             .unwrap();
70         Self { dict }
71     }
72 }
73 impl HashTable for HashTable4KU16 {
74     #[inline]
get_at(&self, hash: usize) -> usize75     fn get_at(&self, hash: usize) -> usize {
76         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
77     }
78     #[inline]
put_at(&mut self, hash: usize, val: usize)79     fn put_at(&mut self, hash: usize, val: usize) {
80         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16;
81     }
82     #[inline]
clear(&mut self)83     fn clear(&mut self) {
84         self.dict.fill(0);
85     }
86     #[inline]
get_hash_at(input: &[u8], pos: usize) -> usize87     fn get_hash_at(input: &[u8], pos: usize) -> usize {
88         hash(super::get_batch(input, pos)) as usize
89     }
90 }
91 
92 #[derive(Debug)]
93 pub struct HashTable4K {
94     dict: Box<[u32; HASHTABLE_SIZE_4K]>,
95 }
96 impl HashTable4K {
97     #[inline]
new() -> Self98     pub fn new() -> Self {
99         let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
100             .into_boxed_slice()
101             .try_into()
102             .unwrap();
103         Self { dict }
104     }
105 
106     #[cold]
107     #[allow(dead_code)]
reposition(&mut self, offset: u32)108     pub fn reposition(&mut self, offset: u32) {
109         for i in self.dict.iter_mut() {
110             *i = i.saturating_sub(offset);
111         }
112     }
113 }
114 impl HashTable for HashTable4K {
115     #[inline]
get_at(&self, hash: usize) -> usize116     fn get_at(&self, hash: usize) -> usize {
117         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
118     }
119     #[inline]
put_at(&mut self, hash: usize, val: usize)120     fn put_at(&mut self, hash: usize, val: usize) {
121         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u32;
122     }
123     #[inline]
clear(&mut self)124     fn clear(&mut self) {
125         self.dict.fill(0);
126     }
127 }
128 
129 const HASHTABLE_SIZE_8K: usize = 8 * 1024;
130 const HASH_TABLE_BIT_SHIFT_8K: usize = 3;
131 
132 #[derive(Debug)]
133 pub struct HashTable8K {
134     dict: Box<[u32; HASHTABLE_SIZE_8K]>,
135 }
136 #[allow(dead_code)]
137 impl HashTable8K {
138     #[inline]
new() -> Self139     pub fn new() -> Self {
140         let dict = alloc::vec![0; HASHTABLE_SIZE_8K]
141             .into_boxed_slice()
142             .try_into()
143             .unwrap();
144 
145         Self { dict }
146     }
147 }
148 impl HashTable for HashTable8K {
149     #[inline]
get_at(&self, hash: usize) -> usize150     fn get_at(&self, hash: usize) -> usize {
151         self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize
152     }
153     #[inline]
put_at(&mut self, hash: usize, val: usize)154     fn put_at(&mut self, hash: usize, val: usize) {
155         self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32;
156     }
157     #[inline]
clear(&mut self)158     fn clear(&mut self) {
159         self.dict.fill(0);
160     }
161 }
162