zig/lib/std/crypto/ascon.zig
Andrew Kelley e7b18a7ce6 std.crypto: remove inline from most functions
To quote the language reference,

It is generally better to let the compiler decide when to inline a
function, except for these scenarios:

* To change how many stack frames are in the call stack, for debugging
  purposes.
* To force comptime-ness of the arguments to propagate to the return
  value of the function, as in the above example.
* Real world performance measurements demand it. Don't guess!

Note that inline actually restricts what the compiler is allowed to do.
This can harm binary size, compilation speed, and even runtime
performance.

`zig run lib/std/crypto/benchmark.zig -OReleaseFast`
[-before-] vs {+after+}

              md5:        [-990-]        {+998+} MiB/s
             sha1:       [-1144-]       {+1140+} MiB/s
           sha256:       [-2267-]       {+2275+} MiB/s
           sha512:        [-762-]        {+767+} MiB/s
         sha3-256:        [-680-]        {+683+} MiB/s
         sha3-512:        [-362-]        {+363+} MiB/s
        shake-128:        [-835-]        {+839+} MiB/s
        shake-256:        [-680-]        {+681+} MiB/s
   turboshake-128:       [-1567-]       {+1570+} MiB/s
   turboshake-256:       [-1276-]       {+1282+} MiB/s
          blake2s:        [-778-]        {+789+} MiB/s
          blake2b:       [-1071-]       {+1086+} MiB/s
           blake3:       [-1148-]       {+1137+} MiB/s
            ghash:      [-10044-]      {+10033+} MiB/s
          polyval:       [-9726-]      {+10033+} MiB/s
         poly1305:       [-2486-]       {+2703+} MiB/s
         hmac-md5:        [-991-]        {+998+} MiB/s
        hmac-sha1:       [-1134-]       {+1137+} MiB/s
      hmac-sha256:       [-2265-]       {+2288+} MiB/s
      hmac-sha512:        [-765-]        {+764+} MiB/s
      siphash-2-4:       [-4410-]       {+4438+} MiB/s
      siphash-1-3:       [-7144-]       {+7225+} MiB/s
   siphash128-2-4:       [-4397-]       {+4449+} MiB/s
   siphash128-1-3:       [-7281-]       {+7374+} MiB/s
  aegis-128x4 mac:      [-73385-]      {+74523+} MiB/s
  aegis-256x4 mac:      [-30160-]      {+30539+} MiB/s
  aegis-128x2 mac:      [-66662-]      {+67267+} MiB/s
  aegis-256x2 mac:      [-16812-]      {+16806+} MiB/s
   aegis-128l mac:      [-33876-]      {+34055+} MiB/s
    aegis-256 mac:       [-8993-]       {+9087+} MiB/s
         aes-cmac:       2036 MiB/s
           x25519:      [-20670-]      {+16844+} exchanges/s
          ed25519:      [-29763-]      {+29576+} signatures/s
       ecdsa-p256:       [-4762-]       {+4900+} signatures/s
       ecdsa-p384:       [-1465-]       {+1500+} signatures/s
  ecdsa-secp256k1:       [-5643-]       {+5769+} signatures/s
          ed25519:      [-21926-]      {+21721+} verifications/s
          ed25519:      [-51200-]      {+50880+} verifications/s (batch)
 chacha20Poly1305:       [-1189-]       {+1109+} MiB/s
xchacha20Poly1305:       [-1196-]       {+1107+} MiB/s
 xchacha8Poly1305:       [-1466-]       {+1555+} MiB/s
 xsalsa20Poly1305:        [-660-]        {+620+} MiB/s
      aegis-128x4:      [-76389-]      {+78181+} MiB/s
      aegis-128x2:      [-53946-]      {+53495+} MiB/s
       aegis-128l:      [-27219-]      {+25621+} MiB/s
      aegis-256x4:      [-49351-]      {+49542+} MiB/s
      aegis-256x2:      [-32390-]      {+32366+} MiB/s
        aegis-256:       [-8881-]       {+8944+} MiB/s
       aes128-gcm:       [-6095-]       {+6205+} MiB/s
       aes256-gcm:       [-5306-]       {+5427+} MiB/s
       aes128-ocb:       [-8529-]      {+13974+} MiB/s
       aes256-ocb:       [-7241-]       {+9442+} MiB/s
        isapa128a:        [-204-]        {+214+} MiB/s
    aes128-single:  [-133857882-]  {+134170944+} ops/s
    aes256-single:   [-96306962-]   {+96408639+} ops/s
         aes128-8: [-1083210101-] {+1073727253+} ops/s
         aes256-8:  [-762042466-]  {+767091778+} ops/s
           bcrypt:      0.009 s/ops
           scrypt:      [-0.018-]      {+0.017+} s/ops
           argon2:      [-0.037-]      {+0.060+} s/ops
      kyber512d00:     [-206057-]     {+205779+} encaps/s
      kyber768d00:     [-156074-]     {+150711+} encaps/s
     kyber1024d00:     [-116626-]     {+115469+} encaps/s
      kyber512d00:     [-181149-]     {+182046+} decaps/s
      kyber768d00:     [-136965-]     {+135676+} decaps/s
     kyber1024d00:     [-101307-]     {+100643+} decaps/s
      kyber512d00:     [-123624-]     {+123375+} keygen/s
      kyber768d00:      [-69465-]      {+70828+} keygen/s
     kyber1024d00:      [-43117-]      {+43208+} keygen/s
2025-07-13 18:26:13 +02:00

239 lines
9.4 KiB
Zig
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Ascon is a 320-bit permutation, selected as new standard for lightweight cryptography
//! in the NIST Lightweight Cryptography competition (20192023).
//! https://csrc.nist.gov/pubs/sp/800/232/ipd
//!
//! The permutation is compact, and optimized for timing and side channel resistance,
//! making it a good choice for embedded applications.
//!
//! It is not meant to be used directly, but as a building block for symmetric cryptography.
const std = @import("std");
const builtin = @import("builtin");
const debug = std.debug;
const mem = std.mem;
const testing = std.testing;
const rotr = std.math.rotr;
const native_endian = builtin.cpu.arch.endian();
/// An Ascon state.
///
/// The state is represented as 5 64-bit words.
///
/// The original NIST submission (v1.2) serializes these words as big-endian,
/// but NIST SP 800-232 switched to a little-endian representation.
/// Software implementations are free to use native endianness with no security degradation.
pub fn State(comptime endian: std.builtin.Endian) type {
return struct {
const Self = @This();
/// Number of bytes in the state.
pub const block_bytes = 40;
const Block = [5]u64;
st: Block,
/// Initialize the state from a slice of bytes.
pub fn init(initial_state: [block_bytes]u8) Self {
var state = Self{ .st = undefined };
@memcpy(state.asBytes(), &initial_state);
state.endianSwap();
return state;
}
/// Initialize the state from u64 words in native endianness.
pub fn initFromWords(initial_state: [5]u64) Self {
return .{ .st = initial_state };
}
/// Initialize the state for Ascon XOF
pub fn initXof() Self {
return Self{ .st = Block{
0xb57e273b814cd416,
0x2b51042562ae2420,
0x66a3a7768ddf2218,
0x5aad0a7a8153650c,
0x4f3e0e32539493b6,
} };
}
/// Initialize the state for Ascon XOFa
pub fn initXofA() Self {
return Self{ .st = Block{
0x44906568b77b9832,
0xcd8d6cae53455532,
0xf7b5212756422129,
0x246885e1de0d225b,
0xa8cb5ce33449973f,
} };
}
/// A representation of the state as bytes. The byte order is architecture-dependent.
pub fn asBytes(self: *Self) *[block_bytes]u8 {
return mem.asBytes(&self.st);
}
/// Byte-swap the entire state if the architecture doesn't match the required endianness.
pub fn endianSwap(self: *Self) void {
for (&self.st) |*w| {
w.* = mem.toNative(u64, w.*, endian);
}
}
/// Set bytes starting at the beginning of the state.
pub fn setBytes(self: *Self, bytes: []const u8) void {
var i: usize = 0;
while (i + 8 <= bytes.len) : (i += 8) {
self.st[i / 8] = mem.readInt(u64, bytes[i..][0..8], endian);
}
if (i < bytes.len) {
var padded = [_]u8{0} ** 8;
@memcpy(padded[0 .. bytes.len - i], bytes[i..]);
self.st[i / 8] = mem.readInt(u64, padded[0..], endian);
}
}
/// XOR a byte into the state at a given offset.
pub fn addByte(self: *Self, byte: u8, offset: usize) void {
const z = switch (endian) {
.big => 64 - 8 - 8 * @as(u6, @truncate(offset % 8)),
.little => 8 * @as(u6, @truncate(offset % 8)),
};
self.st[offset / 8] ^= @as(u64, byte) << z;
}
/// XOR bytes into the beginning of the state.
pub fn addBytes(self: *Self, bytes: []const u8) void {
var i: usize = 0;
while (i + 8 <= bytes.len) : (i += 8) {
self.st[i / 8] ^= mem.readInt(u64, bytes[i..][0..8], endian);
}
if (i < bytes.len) {
var padded = [_]u8{0} ** 8;
@memcpy(padded[0 .. bytes.len - i], bytes[i..]);
self.st[i / 8] ^= mem.readInt(u64, padded[0..], endian);
}
}
/// Extract the first bytes of the state.
pub fn extractBytes(self: *Self, out: []u8) void {
var i: usize = 0;
while (i + 8 <= out.len) : (i += 8) {
mem.writeInt(u64, out[i..][0..8], self.st[i / 8], endian);
}
if (i < out.len) {
var padded = [_]u8{0} ** 8;
mem.writeInt(u64, padded[0..], self.st[i / 8], endian);
@memcpy(out[i..], padded[0 .. out.len - i]);
}
}
/// XOR the first bytes of the state into a slice of bytes.
pub fn xorBytes(self: *Self, out: []u8, in: []const u8) void {
debug.assert(out.len == in.len);
var i: usize = 0;
while (i + 8 <= in.len) : (i += 8) {
const x = mem.readInt(u64, in[i..][0..8], native_endian) ^ mem.nativeTo(u64, self.st[i / 8], endian);
mem.writeInt(u64, out[i..][0..8], x, native_endian);
}
if (i < in.len) {
var padded = [_]u8{0} ** 8;
@memcpy(padded[0 .. in.len - i], in[i..]);
const x = mem.readInt(u64, &padded, native_endian) ^ mem.nativeTo(u64, self.st[i / 8], endian);
mem.writeInt(u64, &padded, x, native_endian);
@memcpy(out[i..], padded[0 .. in.len - i]);
}
}
/// Set the words storing the bytes of a given range to zero.
pub fn clear(self: *Self, from: usize, to: usize) void {
@memset(self.st[from / 8 .. (to + 7) / 8], 0);
}
/// Clear the entire state, disabling compiler optimizations.
pub fn secureZero(self: *Self) void {
std.crypto.secureZero(u64, &self.st);
}
/// Apply a reduced-round permutation to the state.
pub fn permuteR(state: *Self, comptime rounds: u4) void {
const rks = [16]u64{ 0x3c, 0x2d, 0x1e, 0x0f, 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b };
inline for (rks[rks.len - rounds ..]) |rk| {
state.round(rk);
}
}
/// Apply a full-round permutation to the state.
pub fn permute(state: *Self) void {
state.permuteR(12);
}
/// Apply a permutation to the state and prevent backtracking.
/// The rate is expressed in bytes and must be a multiple of the word size (8).
pub fn permuteRatchet(state: *Self, comptime rounds: u4, comptime rate: u6) void {
const capacity = block_bytes - rate;
debug.assert(capacity > 0 and capacity % 8 == 0); // capacity must be a multiple of 64 bits
var mask: [capacity / 8]u64 = undefined;
inline for (&mask, state.st[state.st.len - mask.len ..]) |*m, x| m.* = x;
state.permuteR(rounds);
inline for (mask, state.st[state.st.len - mask.len ..]) |m, *x| x.* ^= m;
}
// Core Ascon permutation.
fn round(state: *Self, rk: u64) void {
const x = &state.st;
x[2] ^= rk;
x[0] ^= x[4];
x[4] ^= x[3];
x[2] ^= x[1];
var t: Block = .{
x[0] ^ (~x[1] & x[2]),
x[1] ^ (~x[2] & x[3]),
x[2] ^ (~x[3] & x[4]),
x[3] ^ (~x[4] & x[0]),
x[4] ^ (~x[0] & x[1]),
};
t[1] ^= t[0];
t[3] ^= t[2];
t[0] ^= t[4];
x[2] = t[2] ^ rotr(u64, t[2], 6 - 1);
x[3] = t[3] ^ rotr(u64, t[3], 17 - 10);
x[4] = t[4] ^ rotr(u64, t[4], 41 - 7);
x[0] = t[0] ^ rotr(u64, t[0], 28 - 19);
x[1] = t[1] ^ rotr(u64, t[1], 61 - 39);
x[2] = t[2] ^ rotr(u64, x[2], 1);
x[3] = t[3] ^ rotr(u64, x[3], 10);
x[4] = t[4] ^ rotr(u64, x[4], 7);
x[0] = t[0] ^ rotr(u64, x[0], 19);
x[1] = t[1] ^ rotr(u64, x[1], 39);
x[2] = ~x[2];
}
};
}
test "ascon" {
const Ascon = State(.big);
const bytes = [_]u8{0x01} ** Ascon.block_bytes;
var st = Ascon.init(bytes);
var out: [Ascon.block_bytes]u8 = undefined;
st.permute();
st.extractBytes(&out);
const expected1 = [_]u8{ 148, 147, 49, 226, 218, 221, 208, 113, 186, 94, 96, 10, 183, 219, 119, 150, 169, 206, 65, 18, 215, 97, 78, 106, 118, 81, 211, 150, 52, 17, 117, 64, 216, 45, 148, 240, 65, 181, 90, 180 };
try testing.expectEqualSlices(u8, &expected1, &out);
st.clear(0, 10);
st.extractBytes(&out);
const expected2 = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 169, 206, 65, 18, 215, 97, 78, 106, 118, 81, 211, 150, 52, 17, 117, 64, 216, 45, 148, 240, 65, 181, 90, 180 };
try testing.expectEqualSlices(u8, &expected2, &out);
st.addByte(1, 5);
st.addByte(2, 5);
st.extractBytes(&out);
const expected3 = [_]u8{ 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 169, 206, 65, 18, 215, 97, 78, 106, 118, 81, 211, 150, 52, 17, 117, 64, 216, 45, 148, 240, 65, 181, 90, 180 };
try testing.expectEqualSlices(u8, &expected3, &out);
st.addBytes(&bytes);
st.extractBytes(&out);
const expected4 = [_]u8{ 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 168, 207, 64, 19, 214, 96, 79, 107, 119, 80, 210, 151, 53, 16, 116, 65, 217, 44, 149, 241, 64, 180, 91, 181 };
try testing.expectEqualSlices(u8, &expected4, &out);
}