mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
Unroll Sha3 inner loop
Issue #699 since fixed. Nearly a x3 perf improvement. Using --release-fast. Sha3_256 (before): 96 Mb/s Sha3_256 (after): 267 Mb/s Sha3_512 (before): 53 Mb/s Sha3_512 (after): 142 Mb/s No real gains from unrolling other initialization loops in crypto functions so have been left as is.
This commit is contained in:
parent
5a7a0e8518
commit
7a893691c0
3 changed files with 10 additions and 14 deletions
|
|
@ -108,7 +108,6 @@ pub const Md5 = struct {
|
||||||
|
|
||||||
var s: [16]u32 = undefined;
|
var s: [16]u32 = undefined;
|
||||||
|
|
||||||
// ERROR: cannot unroll this at comptime
|
|
||||||
var i: usize = 0;
|
var i: usize = 0;
|
||||||
while (i < 16) : (i += 1) {
|
while (i < 16) : (i += 1) {
|
||||||
// NOTE: Performing or's separately improves perf by ~10%
|
// NOTE: Performing or's separately improves perf by ~10%
|
||||||
|
|
|
||||||
|
|
@ -156,7 +156,6 @@ fn Sha2_32(comptime params: Sha2Params32) type { return struct {
|
||||||
|
|
||||||
var s: [64]u32 = undefined;
|
var s: [64]u32 = undefined;
|
||||||
|
|
||||||
// ERROR: Cannot unroll at compile-time.
|
|
||||||
var i: usize = 0;
|
var i: usize = 0;
|
||||||
while (i < 16) : (i += 1) {
|
while (i < 16) : (i += 1) {
|
||||||
s[i] = 0;
|
s[i] = 0;
|
||||||
|
|
@ -472,7 +471,6 @@ fn Sha2_64(comptime params: Sha2Params64) type { return struct {
|
||||||
|
|
||||||
var s: [80]u64 = undefined;
|
var s: [80]u64 = undefined;
|
||||||
|
|
||||||
// ERROR: Cannot unroll at compile-time.
|
|
||||||
var i: usize = 0;
|
var i: usize = 0;
|
||||||
while (i < 16) : (i += 1) {
|
while (i < 16) : (i += 1) {
|
||||||
s[i] = 0;
|
s[i] = 0;
|
||||||
|
|
|
||||||
|
|
@ -123,35 +123,34 @@ fn keccak_f(comptime F: usize, d: []u8) void {
|
||||||
*r = mem.readIntLE(u64, d[8*i .. 8*i + 8]);
|
*r = mem.readIntLE(u64, d[8*i .. 8*i + 8]);
|
||||||
}
|
}
|
||||||
|
|
||||||
var x: usize = 0;
|
comptime var x: usize = 0;
|
||||||
var y: usize = 0;
|
comptime var y: usize = 0;
|
||||||
// TODO: Cannot unroll all loops here due to comptime differences.
|
for (RC[0..no_rounds]) |round| {
|
||||||
inline for (RC[0..no_rounds]) |round| {
|
|
||||||
// theta
|
// theta
|
||||||
x = 0; while (x < 5) : (x += 1) {
|
x = 0; inline while (x < 5) : (x += 1) {
|
||||||
c[x] = s[x] ^ s[x+5] ^ s[x+10] ^ s[x+15] ^ s[x+20];
|
c[x] = s[x] ^ s[x+5] ^ s[x+10] ^ s[x+15] ^ s[x+20];
|
||||||
}
|
}
|
||||||
x = 0; while (x < 5) : (x += 1) {
|
x = 0; inline while (x < 5) : (x += 1) {
|
||||||
t[0] = c[M5[x+4]] ^ math.rotl(u64, c[M5[x+1]], usize(1));
|
t[0] = c[M5[x+4]] ^ math.rotl(u64, c[M5[x+1]], usize(1));
|
||||||
y = 0; while (y < 5) : (y += 1) {
|
y = 0; inline while (y < 5) : (y += 1) {
|
||||||
s[x + y*5] ^= t[0];
|
s[x + y*5] ^= t[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// rho+pi
|
// rho+pi
|
||||||
t[0] = s[1];
|
t[0] = s[1];
|
||||||
x = 0; while (x < 24) : (x += 1) {
|
x = 0; inline while (x < 24) : (x += 1) {
|
||||||
c[0] = s[PIL[x]];
|
c[0] = s[PIL[x]];
|
||||||
s[PIL[x]] = math.rotl(u64, t[0], ROTC[x]);
|
s[PIL[x]] = math.rotl(u64, t[0], ROTC[x]);
|
||||||
t[0] = c[0];
|
t[0] = c[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// chi
|
// chi
|
||||||
y = 0; while (y < 5) : (y += 1) {
|
y = 0; inline while (y < 5) : (y += 1) {
|
||||||
x = 0; while (x < 5) : (x += 1) {
|
x = 0; inline while (x < 5) : (x += 1) {
|
||||||
c[x] = s[x + y*5];
|
c[x] = s[x + y*5];
|
||||||
}
|
}
|
||||||
x = 0; while (x < 5) : (x += 1) {
|
x = 0; inline while (x < 5) : (x += 1) {
|
||||||
s[x + y*5] = c[x] ^ (~c[M5[x+1]] & c[M5[x+2]]);
|
s[x + y*5] = c[x] ^ (~c[M5[x+1]] & c[M5[x+2]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue