compiler_rt: declutter int.zig, add mulXi3 tests (#14623)

- Combine mulXi3 routines for follow-up cleanup.
- DRY up Dwords and Twords
- rename both to HalveInt and use instance
  * Justification: Not all processors have word size 32 bit.
* remove test file from CMakeLists
* DRY things.
This commit is contained in:
matu3ba 2023-02-24 19:27:02 +01:00 committed by GitHub
parent 5f70c36fa8
commit 97b9facb98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 294 additions and 354 deletions

View file

@ -434,13 +434,12 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/log10.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/log2.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/modti3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulXi3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/muldf3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/muldi3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulf3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulo.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulsf3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/multf3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/multi3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulxf3.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/negXi2.zig"
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/negv.zig"
@ -613,7 +612,6 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/tapi.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse/test.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/yaml.zig"
"${CMAKE_SOURCE_DIR}/src/main.zig"
"${CMAKE_SOURCE_DIR}/src/mingw.zig"
@ -753,7 +751,7 @@ set(BUILD_ZIG2_ARGS
--deps build_options
-target "${HOST_TARGET_TRIPLE}"
)
add_custom_command(
OUTPUT "${ZIG2_C_SOURCE}"
COMMAND zig1 ${BUILD_ZIG2_ARGS}
@ -771,7 +769,7 @@ set(BUILD_COMPILER_RT_ARGS
--deps build_options
-target "${HOST_TARGET_TRIPLE}"
)
add_custom_command(
OUTPUT "${ZIG_COMPILER_RT_C_SOURCE}"
COMMAND zig1 ${BUILD_COMPILER_RT_ARGS}

View file

@ -13,8 +13,7 @@ comptime {
_ = @import("compiler_rt/shift.zig");
_ = @import("compiler_rt/negXi2.zig");
_ = @import("compiler_rt/int.zig");
_ = @import("compiler_rt/muldi3.zig");
_ = @import("compiler_rt/multi3.zig");
_ = @import("compiler_rt/mulXi3.zig");
_ = @import("compiler_rt/divti3.zig");
_ = @import("compiler_rt/udivti3.zig");
_ = @import("compiler_rt/modti3.zig");

View file

@ -1,5 +1,6 @@
const std = @import("std");
const builtin = @import("builtin");
const native_endian = builtin.cpu.arch.endian();
pub const linkage: std.builtin.GlobalLinkage = if (builtin.is_test) .Internal else .Weak;
/// Determines the symbol's visibility to other objects.
@ -221,3 +222,20 @@ pub inline fn fneg(a: anytype) @TypeOf(a) {
const negated = @bitCast(U, a) ^ sign_bit_mask;
return @bitCast(F, negated);
}
/// Allows to access underlying bits as two equally sized lower and higher
/// signed or unsigned integers.
pub fn HalveInt(comptime T: type, comptime signed_half: bool) type {
return extern union {
pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
pub const HalfTU = std.meta.Int(.unsigned, bits);
pub const HalfTS = std.meta.Int(.signed, bits);
pub const HalfT = if (signed_half) HalfTS else HalfTU;
all: T,
s: if (native_endian == .Little)
extern struct { low: HalfT, high: HalfT }
else
extern struct { high: HalfT, low: HalfT },
};
}

View file

@ -16,7 +16,6 @@ pub const panic = common.panic;
comptime {
@export(__divmodti4, .{ .name = "__divmodti4", .linkage = common.linkage, .visibility = common.visibility });
@export(__udivmoddi4, .{ .name = "__udivmoddi4", .linkage = common.linkage, .visibility = common.visibility });
@export(__mulsi3, .{ .name = "__mulsi3", .linkage = common.linkage, .visibility = common.visibility });
@export(__divmoddi4, .{ .name = "__divmoddi4", .linkage = common.linkage, .visibility = common.visibility });
if (common.want_aeabi) {
@export(__aeabi_idiv, .{ .name = "__aeabi_idiv", .linkage = common.linkage, .visibility = common.visibility });
@ -663,59 +662,3 @@ fn test_one_umodsi3(a: u32, b: u32, expected_r: u32) !void {
const r: u32 = __umodsi3(a, b);
try testing.expect(r == expected_r);
}
pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
var ua = @bitCast(u32, a);
var ub = @bitCast(u32, b);
var r: u32 = 0;
while (ua > 0) {
if ((ua & 1) != 0) r +%= ub;
ua >>= 1;
ub <<= 1;
}
return @bitCast(i32, r);
}
fn test_one_mulsi3(a: i32, b: i32, result: i32) !void {
try testing.expectEqual(result, __mulsi3(a, b));
}
test "mulsi3" {
try test_one_mulsi3(0, 0, 0);
try test_one_mulsi3(0, 1, 0);
try test_one_mulsi3(1, 0, 0);
try test_one_mulsi3(0, 10, 0);
try test_one_mulsi3(10, 0, 0);
try test_one_mulsi3(0, maxInt(i32), 0);
try test_one_mulsi3(maxInt(i32), 0, 0);
try test_one_mulsi3(0, -1, 0);
try test_one_mulsi3(-1, 0, 0);
try test_one_mulsi3(0, -10, 0);
try test_one_mulsi3(-10, 0, 0);
try test_one_mulsi3(0, minInt(i32), 0);
try test_one_mulsi3(minInt(i32), 0, 0);
try test_one_mulsi3(1, 1, 1);
try test_one_mulsi3(1, 10, 10);
try test_one_mulsi3(10, 1, 10);
try test_one_mulsi3(1, maxInt(i32), maxInt(i32));
try test_one_mulsi3(maxInt(i32), 1, maxInt(i32));
try test_one_mulsi3(1, -1, -1);
try test_one_mulsi3(1, -10, -10);
try test_one_mulsi3(-10, 1, -10);
try test_one_mulsi3(1, minInt(i32), minInt(i32));
try test_one_mulsi3(minInt(i32), 1, minInt(i32));
try test_one_mulsi3(46340, 46340, 2147395600);
try test_one_mulsi3(-46340, 46340, -2147395600);
try test_one_mulsi3(46340, -46340, -2147395600);
try test_one_mulsi3(-46340, -46340, 2147395600);
try test_one_mulsi3(4194303, 8192, @truncate(i32, 34359730176));
try test_one_mulsi3(-4194303, 8192, @truncate(i32, -34359730176));
try test_one_mulsi3(4194303, -8192, @truncate(i32, -34359730176));
try test_one_mulsi3(-4194303, -8192, @truncate(i32, 34359730176));
try test_one_mulsi3(8192, 4194303, @truncate(i32, 34359730176));
try test_one_mulsi3(-8192, 4194303, @truncate(i32, -34359730176));
try test_one_mulsi3(8192, -4194303, @truncate(i32, -34359730176));
try test_one_mulsi3(-8192, -4194303, @truncate(i32, 34359730176));
}

101
lib/compiler_rt/mulXi3.zig Normal file
View file

@ -0,0 +1,101 @@
const builtin = @import("builtin");
const std = @import("std");
const testing = std.testing;
const common = @import("common.zig");
const native_endian = builtin.cpu.arch.endian();
pub const panic = common.panic;
comptime {
@export(__mulsi3, .{ .name = "__mulsi3", .linkage = common.linkage, .visibility = common.visibility });
if (common.want_aeabi) {
@export(__aeabi_lmul, .{ .name = "__aeabi_lmul", .linkage = common.linkage, .visibility = common.visibility });
} else {
@export(__muldi3, .{ .name = "__muldi3", .linkage = common.linkage, .visibility = common.visibility });
}
if (common.want_windows_v2u64_abi) {
@export(__multi3_windows_x86_64, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
} else {
@export(__multi3, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
}
}
pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
var ua = @bitCast(u32, a);
var ub = @bitCast(u32, b);
var r: u32 = 0;
while (ua > 0) {
if ((ua & 1) != 0) r +%= ub;
ua >>= 1;
ub <<= 1;
}
return @bitCast(i32, r);
}
pub fn __muldi3(a: i64, b: i64) callconv(.C) i64 {
return mulX(i64, a, b);
}
fn __aeabi_lmul(a: i64, b: i64) callconv(.AAPCS) i64 {
return mulX(i64, a, b);
}
inline fn mulX(comptime T: type, a: T, b: T) T {
const word_t = common.HalveInt(T, false);
const x = word_t{ .all = a };
const y = word_t{ .all = b };
var r = switch (T) {
i64, i128 => word_t{ .all = muldXi(word_t.HalfT, x.s.low, y.s.low) },
else => unreachable,
};
r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
return r.all;
}
fn DoubleInt(comptime T: type) type {
return switch (T) {
u32 => i64,
u64 => i128,
i32 => i64,
i64 => i128,
else => unreachable,
};
}
fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) {
const DT = DoubleInt(T);
const word_t = common.HalveInt(DT, false);
const bits_in_word_2 = @sizeOf(T) * 8 / 2;
const lower_mask = (~@as(T, 0)) >> bits_in_word_2;
var r: word_t = undefined;
r.s.low = (a & lower_mask) *% (b & lower_mask);
var t: T = r.s.low >> bits_in_word_2;
r.s.low &= lower_mask;
t += (a >> bits_in_word_2) *% (b & lower_mask);
r.s.low +%= (t & lower_mask) << bits_in_word_2;
r.s.high = t >> bits_in_word_2;
t = r.s.low >> bits_in_word_2;
r.s.low &= lower_mask;
t +%= (b >> bits_in_word_2) *% (a & lower_mask);
r.s.low +%= (t & lower_mask) << bits_in_word_2;
r.s.high +%= t >> bits_in_word_2;
r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2);
return r.all;
}
pub fn __multi3(a: i128, b: i128) callconv(.C) i128 {
return mulX(i128, a, b);
}
const v2u64 = @Vector(2, u64);
fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @bitCast(v2u64, mulX(i128, @bitCast(i128, a), @bitCast(i128, b)));
}
test {
_ = @import("mulXi3_test.zig");
}

View file

@ -0,0 +1,147 @@
const std = @import("std");
const testing = std.testing;
const mulXi3 = @import("mulXi3.zig");
const maxInt = std.math.maxInt;
const minInt = std.math.minInt;
fn test_one_mulsi3(a: i32, b: i32, result: i32) !void {
try testing.expectEqual(result, mulXi3.__mulsi3(a, b));
}
fn test__muldi3(a: i64, b: i64, expected: i64) !void {
const x = mulXi3.__muldi3(a, b);
try testing.expect(x == expected);
}
fn test__multi3(a: i128, b: i128, expected: i128) !void {
const x = mulXi3.__multi3(a, b);
try testing.expect(x == expected);
}
test "mulsi3" {
try test_one_mulsi3(0, 0, 0);
try test_one_mulsi3(0, 1, 0);
try test_one_mulsi3(1, 0, 0);
try test_one_mulsi3(0, 10, 0);
try test_one_mulsi3(10, 0, 0);
try test_one_mulsi3(0, maxInt(i32), 0);
try test_one_mulsi3(maxInt(i32), 0, 0);
try test_one_mulsi3(0, -1, 0);
try test_one_mulsi3(-1, 0, 0);
try test_one_mulsi3(0, -10, 0);
try test_one_mulsi3(-10, 0, 0);
try test_one_mulsi3(0, minInt(i32), 0);
try test_one_mulsi3(minInt(i32), 0, 0);
try test_one_mulsi3(1, 1, 1);
try test_one_mulsi3(1, 10, 10);
try test_one_mulsi3(10, 1, 10);
try test_one_mulsi3(1, maxInt(i32), maxInt(i32));
try test_one_mulsi3(maxInt(i32), 1, maxInt(i32));
try test_one_mulsi3(1, -1, -1);
try test_one_mulsi3(1, -10, -10);
try test_one_mulsi3(-10, 1, -10);
try test_one_mulsi3(1, minInt(i32), minInt(i32));
try test_one_mulsi3(minInt(i32), 1, minInt(i32));
try test_one_mulsi3(46340, 46340, 2147395600);
try test_one_mulsi3(-46340, 46340, -2147395600);
try test_one_mulsi3(46340, -46340, -2147395600);
try test_one_mulsi3(-46340, -46340, 2147395600);
try test_one_mulsi3(4194303, 8192, @truncate(i32, 34359730176));
try test_one_mulsi3(-4194303, 8192, @truncate(i32, -34359730176));
try test_one_mulsi3(4194303, -8192, @truncate(i32, -34359730176));
try test_one_mulsi3(-4194303, -8192, @truncate(i32, 34359730176));
try test_one_mulsi3(8192, 4194303, @truncate(i32, 34359730176));
try test_one_mulsi3(-8192, 4194303, @truncate(i32, -34359730176));
try test_one_mulsi3(8192, -4194303, @truncate(i32, -34359730176));
try test_one_mulsi3(-8192, -4194303, @truncate(i32, 34359730176));
}
test "muldi3" {
try test__muldi3(0, 0, 0);
try test__muldi3(0, 1, 0);
try test__muldi3(1, 0, 0);
try test__muldi3(0, 10, 0);
try test__muldi3(10, 0, 0);
try test__muldi3(0, 81985529216486895, 0);
try test__muldi3(81985529216486895, 0, 0);
try test__muldi3(0, -1, 0);
try test__muldi3(-1, 0, 0);
try test__muldi3(0, -10, 0);
try test__muldi3(-10, 0, 0);
try test__muldi3(0, -81985529216486895, 0);
try test__muldi3(-81985529216486895, 0, 0);
try test__muldi3(1, 1, 1);
try test__muldi3(1, 10, 10);
try test__muldi3(10, 1, 10);
try test__muldi3(1, 81985529216486895, 81985529216486895);
try test__muldi3(81985529216486895, 1, 81985529216486895);
try test__muldi3(1, -1, -1);
try test__muldi3(1, -10, -10);
try test__muldi3(-10, 1, -10);
try test__muldi3(1, -81985529216486895, -81985529216486895);
try test__muldi3(-81985529216486895, 1, -81985529216486895);
try test__muldi3(3037000499, 3037000499, 9223372030926249001);
try test__muldi3(-3037000499, 3037000499, -9223372030926249001);
try test__muldi3(3037000499, -3037000499, -9223372030926249001);
try test__muldi3(-3037000499, -3037000499, 9223372030926249001);
try test__muldi3(4398046511103, 2097152, 9223372036852678656);
try test__muldi3(-4398046511103, 2097152, -9223372036852678656);
try test__muldi3(4398046511103, -2097152, -9223372036852678656);
try test__muldi3(-4398046511103, -2097152, 9223372036852678656);
try test__muldi3(2097152, 4398046511103, 9223372036852678656);
try test__muldi3(-2097152, 4398046511103, -9223372036852678656);
try test__muldi3(2097152, -4398046511103, -9223372036852678656);
try test__muldi3(-2097152, -4398046511103, 9223372036852678656);
}
test "multi3" {
try test__multi3(0, 0, 0);
try test__multi3(0, 1, 0);
try test__multi3(1, 0, 0);
try test__multi3(0, 10, 0);
try test__multi3(10, 0, 0);
try test__multi3(0, 81985529216486895, 0);
try test__multi3(81985529216486895, 0, 0);
try test__multi3(0, -1, 0);
try test__multi3(-1, 0, 0);
try test__multi3(0, -10, 0);
try test__multi3(-10, 0, 0);
try test__multi3(0, -81985529216486895, 0);
try test__multi3(-81985529216486895, 0, 0);
try test__multi3(1, 1, 1);
try test__multi3(1, 10, 10);
try test__multi3(10, 1, 10);
try test__multi3(1, 81985529216486895, 81985529216486895);
try test__multi3(81985529216486895, 1, 81985529216486895);
try test__multi3(1, -1, -1);
try test__multi3(1, -10, -10);
try test__multi3(-10, 1, -10);
try test__multi3(1, -81985529216486895, -81985529216486895);
try test__multi3(-81985529216486895, 1, -81985529216486895);
try test__multi3(3037000499, 3037000499, 9223372030926249001);
try test__multi3(-3037000499, 3037000499, -9223372030926249001);
try test__multi3(3037000499, -3037000499, -9223372030926249001);
try test__multi3(-3037000499, -3037000499, 9223372030926249001);
try test__multi3(4398046511103, 2097152, 9223372036852678656);
try test__multi3(-4398046511103, 2097152, -9223372036852678656);
try test__multi3(4398046511103, -2097152, -9223372036852678656);
try test__multi3(-4398046511103, -2097152, 9223372036852678656);
try test__multi3(2097152, 4398046511103, 9223372036852678656);
try test__multi3(-2097152, 4398046511103, -9223372036852678656);
try test__multi3(2097152, -4398046511103, -9223372036852678656);
try test__multi3(-2097152, -4398046511103, 9223372036852678656);
try test__multi3(0x00000000000000B504F333F9DE5BE000, 0x000000000000000000B504F333F9DE5B, 0x7FFFFFFFFFFFF328DF915DA296E8A000);
}

View file

@ -1,71 +0,0 @@
//! Ported from
//! https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/compiler-rt/lib/builtins/muldi3.c
const std = @import("std");
const builtin = @import("builtin");
const native_endian = builtin.cpu.arch.endian();
const common = @import("common.zig");
pub const panic = common.panic;
comptime {
if (common.want_aeabi) {
@export(__aeabi_lmul, .{ .name = "__aeabi_lmul", .linkage = common.linkage, .visibility = common.visibility });
} else {
@export(__muldi3, .{ .name = "__muldi3", .linkage = common.linkage, .visibility = common.visibility });
}
}
pub fn __muldi3(a: i64, b: i64) callconv(.C) i64 {
return mul(a, b);
}
fn __aeabi_lmul(a: i64, b: i64) callconv(.AAPCS) i64 {
return mul(a, b);
}
inline fn mul(a: i64, b: i64) i64 {
const x = dwords{ .all = a };
const y = dwords{ .all = b };
var r = dwords{ .all = muldsi3(x.s.low, y.s.low) };
r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
return r.all;
}
const dwords = extern union {
all: i64,
s: switch (native_endian) {
.Little => extern struct {
low: u32,
high: u32,
},
.Big => extern struct {
high: u32,
low: u32,
},
},
};
fn muldsi3(a: u32, b: u32) i64 {
const bits_in_word_2 = @sizeOf(i32) * 8 / 2;
const lower_mask = (~@as(u32, 0)) >> bits_in_word_2;
var r: dwords = undefined;
r.s.low = (a & lower_mask) *% (b & lower_mask);
var t: u32 = r.s.low >> bits_in_word_2;
r.s.low &= lower_mask;
t += (a >> bits_in_word_2) *% (b & lower_mask);
r.s.low +%= (t & lower_mask) << bits_in_word_2;
r.s.high = t >> bits_in_word_2;
t = r.s.low >> bits_in_word_2;
r.s.low &= lower_mask;
t +%= (b >> bits_in_word_2) *% (a & lower_mask);
r.s.low +%= (t & lower_mask) << bits_in_word_2;
r.s.high +%= t >> bits_in_word_2;
r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2);
return r.all;
}
test {
_ = @import("muldi3_test.zig");
}

View file

@ -1,51 +0,0 @@
const __muldi3 = @import("muldi3.zig").__muldi3;
const testing = @import("std").testing;
fn test__muldi3(a: i64, b: i64, expected: i64) !void {
const x = __muldi3(a, b);
try testing.expect(x == expected);
}
test "muldi3" {
try test__muldi3(0, 0, 0);
try test__muldi3(0, 1, 0);
try test__muldi3(1, 0, 0);
try test__muldi3(0, 10, 0);
try test__muldi3(10, 0, 0);
try test__muldi3(0, 81985529216486895, 0);
try test__muldi3(81985529216486895, 0, 0);
try test__muldi3(0, -1, 0);
try test__muldi3(-1, 0, 0);
try test__muldi3(0, -10, 0);
try test__muldi3(-10, 0, 0);
try test__muldi3(0, -81985529216486895, 0);
try test__muldi3(-81985529216486895, 0, 0);
try test__muldi3(1, 1, 1);
try test__muldi3(1, 10, 10);
try test__muldi3(10, 1, 10);
try test__muldi3(1, 81985529216486895, 81985529216486895);
try test__muldi3(81985529216486895, 1, 81985529216486895);
try test__muldi3(1, -1, -1);
try test__muldi3(1, -10, -10);
try test__muldi3(-10, 1, -10);
try test__muldi3(1, -81985529216486895, -81985529216486895);
try test__muldi3(-81985529216486895, 1, -81985529216486895);
try test__muldi3(3037000499, 3037000499, 9223372030926249001);
try test__muldi3(-3037000499, 3037000499, -9223372030926249001);
try test__muldi3(3037000499, -3037000499, -9223372030926249001);
try test__muldi3(-3037000499, -3037000499, 9223372030926249001);
try test__muldi3(4398046511103, 2097152, 9223372036852678656);
try test__muldi3(-4398046511103, 2097152, -9223372036852678656);
try test__muldi3(4398046511103, -2097152, -9223372036852678656);
try test__muldi3(-4398046511103, -2097152, 9223372036852678656);
try test__muldi3(2097152, 4398046511103, 9223372036852678656);
try test__muldi3(-2097152, 4398046511103, -9223372036852678656);
try test__muldi3(2097152, -4398046511103, -9223372036852678656);
try test__muldi3(-2097152, -4398046511103, 9223372036852678656);
}

View file

@ -1,75 +0,0 @@
//! Ported from git@github.com:llvm-project/llvm-project-20170507.git
//! ae684fad6d34858c014c94da69c15e7774a633c3
//! 2018-08-13
const std = @import("std");
const builtin = @import("builtin");
const native_endian = builtin.cpu.arch.endian();
const common = @import("common.zig");
pub const panic = common.panic;
comptime {
if (common.want_windows_v2u64_abi) {
@export(__multi3_windows_x86_64, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
} else {
@export(__multi3, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
}
}
pub fn __multi3(a: i128, b: i128) callconv(.C) i128 {
return mul(a, b);
}
const v2u64 = @Vector(2, u64);
fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @bitCast(v2u64, mul(@bitCast(i128, a), @bitCast(i128, b)));
}
inline fn mul(a: i128, b: i128) i128 {
const x = twords{ .all = a };
const y = twords{ .all = b };
var r = twords{ .all = mulddi3(x.s.low, y.s.low) };
r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
return r.all;
}
fn mulddi3(a: u64, b: u64) i128 {
const bits_in_dword_2 = (@sizeOf(i64) * 8) / 2;
const lower_mask = ~@as(u64, 0) >> bits_in_dword_2;
var r: twords = undefined;
r.s.low = (a & lower_mask) *% (b & lower_mask);
var t: u64 = r.s.low >> bits_in_dword_2;
r.s.low &= lower_mask;
t +%= (a >> bits_in_dword_2) *% (b & lower_mask);
r.s.low +%= (t & lower_mask) << bits_in_dword_2;
r.s.high = t >> bits_in_dword_2;
t = r.s.low >> bits_in_dword_2;
r.s.low &= lower_mask;
t +%= (b >> bits_in_dword_2) *% (a & lower_mask);
r.s.low +%= (t & lower_mask) << bits_in_dword_2;
r.s.high +%= t >> bits_in_dword_2;
r.s.high +%= (a >> bits_in_dword_2) *% (b >> bits_in_dword_2);
return r.all;
}
const twords = extern union {
all: i128,
s: S,
const S = if (native_endian == .Little)
extern struct {
low: u64,
high: u64,
}
else
extern struct {
high: u64,
low: u64,
};
};
test {
_ = @import("multi3_test.zig");
}

View file

@ -1,53 +0,0 @@
const __multi3 = @import("multi3.zig").__multi3;
const testing = @import("std").testing;
fn test__multi3(a: i128, b: i128, expected: i128) !void {
const x = __multi3(a, b);
try testing.expect(x == expected);
}
test "multi3" {
try test__multi3(0, 0, 0);
try test__multi3(0, 1, 0);
try test__multi3(1, 0, 0);
try test__multi3(0, 10, 0);
try test__multi3(10, 0, 0);
try test__multi3(0, 81985529216486895, 0);
try test__multi3(81985529216486895, 0, 0);
try test__multi3(0, -1, 0);
try test__multi3(-1, 0, 0);
try test__multi3(0, -10, 0);
try test__multi3(-10, 0, 0);
try test__multi3(0, -81985529216486895, 0);
try test__multi3(-81985529216486895, 0, 0);
try test__multi3(1, 1, 1);
try test__multi3(1, 10, 10);
try test__multi3(10, 1, 10);
try test__multi3(1, 81985529216486895, 81985529216486895);
try test__multi3(81985529216486895, 1, 81985529216486895);
try test__multi3(1, -1, -1);
try test__multi3(1, -10, -10);
try test__multi3(-10, 1, -10);
try test__multi3(1, -81985529216486895, -81985529216486895);
try test__multi3(-81985529216486895, 1, -81985529216486895);
try test__multi3(3037000499, 3037000499, 9223372030926249001);
try test__multi3(-3037000499, 3037000499, -9223372030926249001);
try test__multi3(3037000499, -3037000499, -9223372030926249001);
try test__multi3(-3037000499, -3037000499, 9223372030926249001);
try test__multi3(4398046511103, 2097152, 9223372036852678656);
try test__multi3(-4398046511103, 2097152, -9223372036852678656);
try test__multi3(4398046511103, -2097152, -9223372036852678656);
try test__multi3(-4398046511103, -2097152, 9223372036852678656);
try test__multi3(2097152, 4398046511103, 9223372036852678656);
try test__multi3(-2097152, 4398046511103, -9223372036852678656);
try test__multi3(2097152, -4398046511103, -9223372036852678656);
try test__multi3(-2097152, -4398046511103, 9223372036852678656);
try test__multi3(0x00000000000000B504F333F9DE5BE000, 0x000000000000000000B504F333F9DE5B, 0x7FFFFFFFFFFFF328DF915DA296E8A000);
}

View file

@ -1,7 +1,6 @@
const std = @import("std");
const builtin = @import("builtin");
const Log2Int = std.math.Log2Int;
const native_endian = builtin.cpu.arch.endian();
const common = @import("common.zig");
pub const panic = common.panic;
@ -27,39 +26,24 @@ comptime {
}
}
fn Dwords(comptime T: type, comptime signed_half: bool) type {
return extern union {
const bits = @divExact(@typeInfo(T).Int.bits, 2);
const HalfTU = std.meta.Int(.unsigned, bits);
const HalfTS = std.meta.Int(.signed, bits);
const HalfT = if (signed_half) HalfTS else HalfTU;
all: T,
s: if (native_endian == .Little)
extern struct { low: HalfT, high: HalfT }
else
extern struct { high: HalfT, low: HalfT },
};
}
// Arithmetic shift left: shift in 0 from right to left
// Precondition: 0 <= b < bits_in_dword
inline fn ashlXi3(comptime T: type, a: T, b: i32) T {
const dwords = Dwords(T, false);
const S = Log2Int(dwords.HalfT);
const word_t = common.HalveInt(T, false);
const S = Log2Int(word_t.HalfT);
const input = dwords{ .all = a };
var output: dwords = undefined;
const input = word_t{ .all = a };
var output: word_t = undefined;
if (b >= dwords.bits) {
if (b >= word_t.bits) {
output.s.low = 0;
output.s.high = input.s.low << @intCast(S, b - dwords.bits);
output.s.high = input.s.low << @intCast(S, b - word_t.bits);
} else if (b == 0) {
return a;
} else {
output.s.low = input.s.low << @intCast(S, b);
output.s.high = input.s.high << @intCast(S, b);
output.s.high |= input.s.low >> @intCast(S, dwords.bits - b);
output.s.high |= input.s.low >> @intCast(S, word_t.bits - b);
}
return output.all;
@ -68,24 +52,24 @@ inline fn ashlXi3(comptime T: type, a: T, b: i32) T {
// Arithmetic shift right: shift in 1 from left to right
// Precondition: 0 <= b < T.bit_count
inline fn ashrXi3(comptime T: type, a: T, b: i32) T {
const dwords = Dwords(T, true);
const S = Log2Int(dwords.HalfT);
const word_t = common.HalveInt(T, true);
const S = Log2Int(word_t.HalfT);
const input = dwords{ .all = a };
var output: dwords = undefined;
const input = word_t{ .all = a };
var output: word_t = undefined;
if (b >= dwords.bits) {
output.s.high = input.s.high >> (dwords.bits - 1);
output.s.low = input.s.high >> @intCast(S, b - dwords.bits);
if (b >= word_t.bits) {
output.s.high = input.s.high >> (word_t.bits - 1);
output.s.low = input.s.high >> @intCast(S, b - word_t.bits);
} else if (b == 0) {
return a;
} else {
output.s.high = input.s.high >> @intCast(S, b);
output.s.low = input.s.high << @intCast(S, dwords.bits - b);
output.s.low = input.s.high << @intCast(S, word_t.bits - b);
// Avoid sign-extension here
output.s.low |= @bitCast(
dwords.HalfT,
@bitCast(dwords.HalfTU, input.s.low) >> @intCast(S, b),
word_t.HalfT,
@bitCast(word_t.HalfTU, input.s.low) >> @intCast(S, b),
);
}
@ -95,20 +79,20 @@ inline fn ashrXi3(comptime T: type, a: T, b: i32) T {
// Logical shift right: shift in 0 from left to right
// Precondition: 0 <= b < T.bit_count
inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
const dwords = Dwords(T, false);
const S = Log2Int(dwords.HalfT);
const word_t = common.HalveInt(T, false);
const S = Log2Int(word_t.HalfT);
const input = dwords{ .all = a };
var output: dwords = undefined;
const input = word_t{ .all = a };
var output: word_t = undefined;
if (b >= dwords.bits) {
if (b >= word_t.bits) {
output.s.high = 0;
output.s.low = input.s.high >> @intCast(S, b - dwords.bits);
output.s.low = input.s.high >> @intCast(S, b - word_t.bits);
} else if (b == 0) {
return a;
} else {
output.s.high = input.s.high >> @intCast(S, b);
output.s.low = input.s.high << @intCast(S, dwords.bits - b);
output.s.low = input.s.high << @intCast(S, word_t.bits - b);
output.s.low |= input.s.low >> @intCast(S, b);
}