Skip to content

Commit c8daf44

Browse files
committed
Bitslice DES key setup: Use AVX-512 _mm512_ternarylogic_epi32()
See #5707
1 parent 49ba113 commit c8daf44

File tree

1 file changed

+71
-54
lines changed

1 file changed

+71
-54
lines changed

src/DES_bs_b.c

Lines changed: 71 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -420,43 +420,60 @@ typedef unsigned ARCH_WORD kvtype;
420420
kvtype v7 = *(kvtype *)&vp[7];
421421
#endif
422422

423+
#ifdef vlut3
424+
#define kvand_or(dst, src, mask) \
425+
dst = vlut3(dst, src, mask, 0xf8)
426+
423427
#define kvand_shl1_or(dst, src, mask) \
424-
kvand(tmp, src, mask); \
425-
kvshl1(tmp, tmp); \
426-
kvor(dst, dst, tmp)
428+
kvshl1(tmp, src); \
429+
kvand_or(dst, tmp, mask)
427430

428431
#define kvand_shl_or(dst, src, mask, shift) \
429-
kvand(tmp, src, mask); \
430-
kvshl(tmp, tmp, shift); \
432+
kvshl(tmp, src, shift); \
433+
kvand_or(dst, tmp, mask)
434+
435+
#define kvand_shr_or(dst, src, mask, shift) \
436+
kvshr(tmp, src, shift); \
437+
kvand_or(dst, tmp, mask)
438+
#else
439+
#define kvand_or(dst, src, mask) \
440+
kvand(tmp, src, m); \
431441
kvor(dst, dst, tmp)
432442

433-
#define kvand_shl1(dst, src, mask) \
434-
kvand(tmp, src, mask); \
435-
kvshl1(dst, tmp)
443+
#define kvand_shl1_or(dst, src, mask) \
444+
kvand(tmp, src, m); \
445+
kvshl1(tmp, tmp); \
446+
kvor(dst, dst, tmp)
436447

437-
#define kvand_or(dst, src, mask) \
438-
kvand(tmp, src, mask); \
448+
#define kvand_shl_or(dst, src, mask, shift) \
449+
kvand(tmp, src, m); \
450+
kvshl(tmp, tmp, shift); \
439451
kvor(dst, dst, tmp)
440452

441453
#define kvand_shr_or(dst, src, mask, shift) \
442-
kvand(tmp, src, mask); \
454+
kvand(tmp, src, m); \
443455
kvshr(tmp, tmp, shift); \
444456
kvor(dst, dst, tmp)
457+
#endif
458+
459+
#define kvand_shl1(dst, src, mask) \
460+
kvand(tmp, src, m); \
461+
kvshl1(dst, tmp)
445462

446463
#define kvand_shr(dst, src, mask, shift) \
447-
kvand(tmp, src, mask); \
464+
kvand(tmp, src, m); \
448465
kvshr(dst, tmp, shift)
449466

450467
#define FINALIZE_NEXT_KEY_BIT_0 { \
451468
kvtype m = mask01, va, vb, tmp; \
452469
kvand(va, v0, m); \
453470
kvand_shl1(vb, v1, m); \
454-
kvand_shl_or(va, v2, m, 2); \
455-
kvand_shl_or(vb, v3, m, 3); \
456-
kvand_shl_or(va, v4, m, 4); \
457-
kvand_shl_or(vb, v5, m, 5); \
458-
kvand_shl_or(va, v6, m, 6); \
459-
kvand_shl_or(vb, v7, m, 7); \
471+
kvand_shl_or(va, v2, mask04, 2); \
472+
kvand_shl_or(vb, v3, mask08, 3); \
473+
kvand_shl_or(va, v4, mask10, 4); \
474+
kvand_shl_or(vb, v5, mask20, 5); \
475+
kvand_shl_or(va, v6, mask40, 6); \
476+
kvand_shl_or(vb, v7, mask80, 7); \
460477
kvor(*(kvtype *)kp, va, vb); \
461478
kp++; \
462479
}
@@ -465,12 +482,12 @@ typedef unsigned ARCH_WORD kvtype;
465482
kvtype m = mask02, va, vb, tmp; \
466483
kvand_shr(va, v0, m, 1); \
467484
kvand(vb, v1, m); \
468-
kvand_shl1_or(va, v2, m); \
469-
kvand_shl_or(vb, v3, m, 2); \
470-
kvand_shl_or(va, v4, m, 3); \
471-
kvand_shl_or(vb, v5, m, 4); \
472-
kvand_shl_or(va, v6, m, 5); \
473-
kvand_shl_or(vb, v7, m, 6); \
485+
kvand_shl1_or(va, v2, mask04); \
486+
kvand_shl_or(vb, v3, mask08, 2); \
487+
kvand_shl_or(va, v4, mask10, 3); \
488+
kvand_shl_or(vb, v5, mask20, 4); \
489+
kvand_shl_or(va, v6, mask40, 5); \
490+
kvand_shl_or(vb, v7, mask80, 6); \
474491
kvor(*(kvtype *)kp, va, vb); \
475492
kp++; \
476493
}
@@ -480,11 +497,11 @@ typedef unsigned ARCH_WORD kvtype;
480497
kvand_shr(va, v0, m, 2); \
481498
kvand_shr(vb, v1, m, 1); \
482499
kvand_or(va, v2, m); \
483-
kvand_shl1_or(vb, v3, m); \
484-
kvand_shl_or(va, v4, m, 2); \
485-
kvand_shl_or(vb, v5, m, 3); \
486-
kvand_shl_or(va, v6, m, 4); \
487-
kvand_shl_or(vb, v7, m, 5); \
500+
kvand_shl1_or(vb, v3, mask08); \
501+
kvand_shl_or(va, v4, mask10, 2); \
502+
kvand_shl_or(vb, v5, mask20, 3); \
503+
kvand_shl_or(va, v6, mask40, 4); \
504+
kvand_shl_or(vb, v7, mask80, 5); \
488505
kvor(*(kvtype *)kp, va, vb); \
489506
kp++; \
490507
}
@@ -493,12 +510,12 @@ typedef unsigned ARCH_WORD kvtype;
493510
kvtype m = mask08, va, vb, tmp; \
494511
kvand_shr(va, v0, m, 3); \
495512
kvand_shr(vb, v1, m, 2); \
496-
kvand_shr_or(va, v2, m, 1); \
513+
kvand_shr_or(va, v2, mask04, 1); \
497514
kvand_or(vb, v3, m); \
498-
kvand_shl1_or(va, v4, m); \
499-
kvand_shl_or(vb, v5, m, 2); \
500-
kvand_shl_or(va, v6, m, 3); \
501-
kvand_shl_or(vb, v7, m, 4); \
515+
kvand_shl1_or(va, v4, mask10); \
516+
kvand_shl_or(vb, v5, mask20, 2); \
517+
kvand_shl_or(va, v6, mask40, 3); \
518+
kvand_shl_or(vb, v7, mask80, 4); \
502519
kvor(*(kvtype *)kp, va, vb); \
503520
kp++; \
504521
}
@@ -507,12 +524,12 @@ typedef unsigned ARCH_WORD kvtype;
507524
kvtype m = mask10, va, vb, tmp; \
508525
kvand_shr(va, v0, m, 4); \
509526
kvand_shr(vb, v1, m, 3); \
510-
kvand_shr_or(va, v2, m, 2); \
511-
kvand_shr_or(vb, v3, m, 1); \
527+
kvand_shr_or(va, v2, mask04, 2); \
528+
kvand_shr_or(vb, v3, mask08, 1); \
512529
kvand_or(va, v4, m); \
513-
kvand_shl1_or(vb, v5, m); \
514-
kvand_shl_or(va, v6, m, 2); \
515-
kvand_shl_or(vb, v7, m, 3); \
530+
kvand_shl1_or(vb, v5, mask20); \
531+
kvand_shl_or(va, v6, mask40, 2); \
532+
kvand_shl_or(vb, v7, mask80, 3); \
516533
kvor(*(kvtype *)kp, va, vb); \
517534
kp++; \
518535
}
@@ -521,12 +538,12 @@ typedef unsigned ARCH_WORD kvtype;
521538
kvtype m = mask20, va, vb, tmp; \
522539
kvand_shr(va, v0, m, 5); \
523540
kvand_shr(vb, v1, m, 4); \
524-
kvand_shr_or(va, v2, m, 3); \
525-
kvand_shr_or(vb, v3, m, 2); \
526-
kvand_shr_or(va, v4, m, 1); \
541+
kvand_shr_or(va, v2, mask04, 3); \
542+
kvand_shr_or(vb, v3, mask08, 2); \
543+
kvand_shr_or(va, v4, mask10, 1); \
527544
kvand_or(vb, v5, m); \
528-
kvand_shl1_or(va, v6, m); \
529-
kvand_shl_or(vb, v7, m, 2); \
545+
kvand_shl1_or(va, v6, mask40); \
546+
kvand_shl_or(vb, v7, mask80, 2); \
530547
kvor(*(kvtype *)kp, va, vb); \
531548
kp++; \
532549
}
@@ -535,12 +552,12 @@ typedef unsigned ARCH_WORD kvtype;
535552
kvtype m = mask40, va, vb, tmp; \
536553
kvand_shr(va, v0, m, 6); \
537554
kvand_shr(vb, v1, m, 5); \
538-
kvand_shr_or(va, v2, m, 4); \
539-
kvand_shr_or(vb, v3, m, 3); \
540-
kvand_shr_or(va, v4, m, 2); \
541-
kvand_shr_or(vb, v5, m, 1); \
555+
kvand_shr_or(va, v2, mask04, 4); \
556+
kvand_shr_or(vb, v3, mask08, 3); \
557+
kvand_shr_or(va, v4, mask10, 2); \
558+
kvand_shr_or(vb, v5, mask20, 1); \
542559
kvand_or(va, v6, m); \
543-
kvand_shl1_or(vb, v7, m); \
560+
kvand_shl1_or(vb, v7, mask80); \
544561
kvor(*(kvtype *)kp, va, vb); \
545562
kp++; \
546563
}
@@ -549,11 +566,11 @@ typedef unsigned ARCH_WORD kvtype;
549566
kvtype m = mask80, va, vb, tmp; \
550567
kvand_shr(va, v0, m, 7); \
551568
kvand_shr(vb, v1, m, 6); \
552-
kvand_shr_or(va, v2, m, 5); \
553-
kvand_shr_or(vb, v3, m, 4); \
554-
kvand_shr_or(va, v4, m, 3); \
555-
kvand_shr_or(vb, v5, m, 2); \
556-
kvand_shr_or(va, v6, m, 1); \
569+
kvand_shr_or(va, v2, mask04, 5); \
570+
kvand_shr_or(vb, v3, mask08, 4); \
571+
kvand_shr_or(va, v4, mask10, 3); \
572+
kvand_shr_or(vb, v5, mask20, 2); \
573+
kvand_shr_or(va, v6, mask40, 1); \
557574
kvand_or(vb, v7, m); \
558575
kvor(*(kvtype *)kp, va, vb); \
559576
kp++; \

0 commit comments

Comments
 (0)