1use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
2use arrayref::{array_mut_ref, array_ref};
3
4cfg_if::cfg_if! {
5 if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
6 cfg_if::cfg_if! {
7 if #[cfg(blake3_avx512_ffi)] {
8 pub const MAX_SIMD_DEGREE: usize = 16;
9 } else {
10 pub const MAX_SIMD_DEGREE: usize = 8;
11 }
12 }
13 } else if #[cfg(blake3_neon)] {
14 pub const MAX_SIMD_DEGREE: usize = 4;
15 } else if #[cfg(blake3_wasm32_simd)] {
16 pub const MAX_SIMD_DEGREE: usize = 4;
17 } else {
18 pub const MAX_SIMD_DEGREE: usize = 1;
19 }
20}
21
22cfg_if::cfg_if! {
27 if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
28 cfg_if::cfg_if! {
29 if #[cfg(blake3_avx512_ffi)] {
30 pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
31 } else {
32 pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
33 }
34 }
35 } else if #[cfg(blake3_neon)] {
36 pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
37 } else if #[cfg(blake3_wasm32_simd)] {
38 pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
39 } else {
40 pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
41 }
42}
43
44#[derive(Clone, Copy, Debug)]
45pub enum Platform {
46 Portable,
47 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48 SSE2,
49 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
50 SSE41,
51 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
52 AVX2,
53 #[cfg(blake3_avx512_ffi)]
54 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
55 AVX512,
56 #[cfg(blake3_neon)]
57 NEON,
58 #[cfg(blake3_wasm32_simd)]
59 #[allow(non_camel_case_types)]
60 WASM32_SIMD,
61}
62
63impl Platform {
64 #[allow(unreachable_code)]
65 pub fn detect() -> Self {
66 #[cfg(miri)]
67 {
68 return Platform::Portable;
69 }
70
71 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
72 {
73 #[cfg(blake3_avx512_ffi)]
74 {
75 if avx512_detected() {
76 return Platform::AVX512;
77 }
78 }
79 if avx2_detected() {
80 return Platform::AVX2;
81 }
82 if sse41_detected() {
83 return Platform::SSE41;
84 }
85 if sse2_detected() {
86 return Platform::SSE2;
87 }
88 }
89 #[cfg(blake3_neon)]
92 {
93 return Platform::NEON;
94 }
95 #[cfg(blake3_wasm32_simd)]
96 {
97 return Platform::WASM32_SIMD;
98 }
99 Platform::Portable
100 }
101
102 pub fn simd_degree(&self) -> usize {
103 let degree = match self {
104 Platform::Portable => 1,
105 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
106 Platform::SSE2 => 4,
107 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
108 Platform::SSE41 => 4,
109 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
110 Platform::AVX2 => 8,
111 #[cfg(blake3_avx512_ffi)]
112 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
113 Platform::AVX512 => 16,
114 #[cfg(blake3_neon)]
115 Platform::NEON => 4,
116 #[cfg(blake3_wasm32_simd)]
117 Platform::WASM32_SIMD => 4,
118 };
119 debug_assert!(degree <= MAX_SIMD_DEGREE);
120 degree
121 }
122
123 pub fn compress_in_place(
124 &self,
125 cv: &mut CVWords,
126 block: &[u8; BLOCK_LEN],
127 block_len: u8,
128 counter: u64,
129 flags: u8,
130 ) {
131 match self {
132 Platform::Portable => portable::compress_in_place(cv, block, block_len, counter, flags),
133 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
135 Platform::SSE2 => unsafe {
136 crate::sse2::compress_in_place(cv, block, block_len, counter, flags)
137 },
138 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
140 Platform::SSE41 | Platform::AVX2 => unsafe {
141 crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
142 },
143 #[cfg(blake3_avx512_ffi)]
145 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
146 Platform::AVX512 => unsafe {
147 crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
148 },
149 #[cfg(blake3_neon)]
151 Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
152 #[cfg(blake3_wasm32_simd)]
153 Platform::WASM32_SIMD => {
154 crate::wasm32_simd::compress_in_place(cv, block, block_len, counter, flags)
155 }
156 }
157 }
158
159 pub fn compress_xof(
160 &self,
161 cv: &CVWords,
162 block: &[u8; BLOCK_LEN],
163 block_len: u8,
164 counter: u64,
165 flags: u8,
166 ) -> [u8; 64] {
167 match self {
168 Platform::Portable => portable::compress_xof(cv, block, block_len, counter, flags),
169 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
171 Platform::SSE2 => unsafe {
172 crate::sse2::compress_xof(cv, block, block_len, counter, flags)
173 },
174 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
176 Platform::SSE41 | Platform::AVX2 => unsafe {
177 crate::sse41::compress_xof(cv, block, block_len, counter, flags)
178 },
179 #[cfg(blake3_avx512_ffi)]
181 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
182 Platform::AVX512 => unsafe {
183 crate::avx512::compress_xof(cv, block, block_len, counter, flags)
184 },
185 #[cfg(blake3_neon)]
187 Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
188 #[cfg(blake3_wasm32_simd)]
189 Platform::WASM32_SIMD => {
190 crate::wasm32_simd::compress_xof(cv, block, block_len, counter, flags)
191 }
192 }
193 }
194
195 pub fn hash_many<const N: usize>(
206 &self,
207 inputs: &[&[u8; N]],
208 key: &CVWords,
209 counter: u64,
210 increment_counter: IncrementCounter,
211 flags: u8,
212 flags_start: u8,
213 flags_end: u8,
214 out: &mut [u8],
215 ) {
216 match self {
217 Platform::Portable => portable::hash_many(
218 inputs,
219 key,
220 counter,
221 increment_counter,
222 flags,
223 flags_start,
224 flags_end,
225 out,
226 ),
227 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
229 Platform::SSE2 => unsafe {
230 crate::sse2::hash_many(
231 inputs,
232 key,
233 counter,
234 increment_counter,
235 flags,
236 flags_start,
237 flags_end,
238 out,
239 )
240 },
241 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
243 Platform::SSE41 => unsafe {
244 crate::sse41::hash_many(
245 inputs,
246 key,
247 counter,
248 increment_counter,
249 flags,
250 flags_start,
251 flags_end,
252 out,
253 )
254 },
255 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
257 Platform::AVX2 => unsafe {
258 crate::avx2::hash_many(
259 inputs,
260 key,
261 counter,
262 increment_counter,
263 flags,
264 flags_start,
265 flags_end,
266 out,
267 )
268 },
269 #[cfg(blake3_avx512_ffi)]
271 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
272 Platform::AVX512 => unsafe {
273 crate::avx512::hash_many(
274 inputs,
275 key,
276 counter,
277 increment_counter,
278 flags,
279 flags_start,
280 flags_end,
281 out,
282 )
283 },
284 #[cfg(blake3_neon)]
286 Platform::NEON => unsafe {
287 crate::neon::hash_many(
288 inputs,
289 key,
290 counter,
291 increment_counter,
292 flags,
293 flags_start,
294 flags_end,
295 out,
296 )
297 },
298 #[cfg(blake3_wasm32_simd)]
300 Platform::WASM32_SIMD => unsafe {
301 crate::wasm32_simd::hash_many(
302 inputs,
303 key,
304 counter,
305 increment_counter,
306 flags,
307 flags_start,
308 flags_end,
309 out,
310 )
311 },
312 }
313 }
314
315 pub fn xof_many(
316 &self,
317 cv: &CVWords,
318 block: &[u8; BLOCK_LEN],
319 block_len: u8,
320 mut counter: u64,
321 flags: u8,
322 out: &mut [u8],
323 ) {
324 debug_assert_eq!(0, out.len() % BLOCK_LEN, "whole blocks only");
325 if out.is_empty() {
326 return;
328 }
329 match self {
330 #[cfg(blake3_avx512_ffi)]
332 #[cfg(unix)]
333 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
334 Platform::AVX512 => unsafe {
335 crate::avx512::xof_many(cv, block, block_len, counter, flags, out)
336 },
337 _ => {
338 for out_block in out.chunks_exact_mut(BLOCK_LEN) {
341 let out_array: &mut [u8; BLOCK_LEN] = out_block.try_into().unwrap();
343 *out_array = self.compress_xof(cv, block, block_len, counter, flags);
344 counter += 1;
345 }
346 }
347 }
348 }
349
350 pub fn portable() -> Self {
353 Self::Portable
354 }
355
356 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
357 pub fn sse2() -> Option<Self> {
358 if sse2_detected() {
359 Some(Self::SSE2)
360 } else {
361 None
362 }
363 }
364
365 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
366 pub fn sse41() -> Option<Self> {
367 if sse41_detected() {
368 Some(Self::SSE41)
369 } else {
370 None
371 }
372 }
373
374 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
375 pub fn avx2() -> Option<Self> {
376 if avx2_detected() {
377 Some(Self::AVX2)
378 } else {
379 None
380 }
381 }
382
383 #[cfg(blake3_avx512_ffi)]
384 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
385 pub fn avx512() -> Option<Self> {
386 if avx512_detected() {
387 Some(Self::AVX512)
388 } else {
389 None
390 }
391 }
392
393 #[cfg(blake3_neon)]
394 pub fn neon() -> Option<Self> {
395 Some(Self::NEON)
397 }
398
399 #[cfg(blake3_wasm32_simd)]
400 pub fn wasm32_simd() -> Option<Self> {
401 Some(Self::WASM32_SIMD)
403 }
404}
405
406#[cfg(blake3_avx512_ffi)]
409#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
410#[inline(always)]
411#[allow(unreachable_code)]
412pub fn avx512_detected() -> bool {
413 if cfg!(miri) {
414 return false;
415 }
416
417 if cfg!(feature = "no_avx512") {
419 return false;
420 }
421 #[cfg(all(target_feature = "avx512f", target_feature = "avx512vl"))]
423 {
424 return true;
425 }
426 #[cfg(feature = "std")]
428 {
429 if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl") {
430 return true;
431 }
432 }
433 false
434}
435
436#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
437#[inline(always)]
438#[allow(unreachable_code)]
439pub fn avx2_detected() -> bool {
440 if cfg!(miri) {
441 return false;
442 }
443
444 if cfg!(feature = "no_avx2") {
446 return false;
447 }
448 #[cfg(target_feature = "avx2")]
450 {
451 return true;
452 }
453 #[cfg(feature = "std")]
455 {
456 if is_x86_feature_detected!("avx2") {
457 return true;
458 }
459 }
460 false
461}
462
463#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
464#[inline(always)]
465#[allow(unreachable_code)]
466pub fn sse41_detected() -> bool {
467 if cfg!(miri) {
468 return false;
469 }
470
471 if cfg!(feature = "no_sse41") {
473 return false;
474 }
475 #[cfg(target_feature = "sse4.1")]
477 {
478 return true;
479 }
480 #[cfg(feature = "std")]
482 {
483 if is_x86_feature_detected!("sse4.1") {
484 return true;
485 }
486 }
487 false
488}
489
490#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
491#[inline(always)]
492#[allow(unreachable_code)]
493pub fn sse2_detected() -> bool {
494 if cfg!(miri) {
495 return false;
496 }
497
498 if cfg!(feature = "no_sse2") {
500 return false;
501 }
502 #[cfg(target_feature = "sse2")]
504 {
505 return true;
506 }
507 #[cfg(feature = "std")]
509 {
510 if is_x86_feature_detected!("sse2") {
511 return true;
512 }
513 }
514 false
515}
516
517#[inline(always)]
518pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] {
519 let mut out = [0; 8];
520 out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
521 out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
522 out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
523 out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
524 out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
525 out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
526 out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
527 out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
528 out
529}
530
531#[inline(always)]
532pub fn words_from_le_bytes_64(bytes: &[u8; 64]) -> [u32; 16] {
533 let mut out = [0; 16];
534 out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
535 out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
536 out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
537 out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
538 out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
539 out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
540 out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
541 out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
542 out[8] = u32::from_le_bytes(*array_ref!(bytes, 8 * 4, 4));
543 out[9] = u32::from_le_bytes(*array_ref!(bytes, 9 * 4, 4));
544 out[10] = u32::from_le_bytes(*array_ref!(bytes, 10 * 4, 4));
545 out[11] = u32::from_le_bytes(*array_ref!(bytes, 11 * 4, 4));
546 out[12] = u32::from_le_bytes(*array_ref!(bytes, 12 * 4, 4));
547 out[13] = u32::from_le_bytes(*array_ref!(bytes, 13 * 4, 4));
548 out[14] = u32::from_le_bytes(*array_ref!(bytes, 14 * 4, 4));
549 out[15] = u32::from_le_bytes(*array_ref!(bytes, 15 * 4, 4));
550 out
551}
552
553#[inline(always)]
554pub fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] {
555 let mut out = [0; 32];
556 *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
557 *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
558 *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
559 *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
560 *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
561 *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
562 *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
563 *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
564 out
565}
566
567#[inline(always)]
568pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
569 let mut out = [0; 64];
570 *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
571 *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
572 *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
573 *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
574 *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
575 *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
576 *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
577 *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
578 *array_mut_ref!(out, 8 * 4, 4) = words[8].to_le_bytes();
579 *array_mut_ref!(out, 9 * 4, 4) = words[9].to_le_bytes();
580 *array_mut_ref!(out, 10 * 4, 4) = words[10].to_le_bytes();
581 *array_mut_ref!(out, 11 * 4, 4) = words[11].to_le_bytes();
582 *array_mut_ref!(out, 12 * 4, 4) = words[12].to_le_bytes();
583 *array_mut_ref!(out, 13 * 4, 4) = words[13].to_le_bytes();
584 *array_mut_ref!(out, 14 * 4, 4) = words[14].to_le_bytes();
585 *array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes();
586 out
587}