mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
update libcxx, libcxxabi, and C headers to release/13.x branch
upstream commit 9c49fee5e7ac0ca8bc4ec1c3738ca0d83df65852
This commit is contained in:
parent
6aeab0f323
commit
db4fea6689
12 changed files with 476 additions and 190 deletions
192
lib/include/__clang_cuda_device_functions.h
vendored
192
lib/include/__clang_cuda_device_functions.h
vendored
|
|
@ -34,10 +34,12 @@ __DEVICE__ unsigned long long __brevll(unsigned long long __a) {
|
||||||
return __nv_brevll(__a);
|
return __nv_brevll(__a);
|
||||||
}
|
}
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
|
__DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); }
|
||||||
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
|
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
|
||||||
#else
|
#else
|
||||||
__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
|
__DEVICE__ void __attribute__((overloadable)) __brkpt(void) {
|
||||||
|
__asm__ __volatile__("brkpt;");
|
||||||
|
}
|
||||||
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
|
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
|
||||||
#endif
|
#endif
|
||||||
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
|
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
|
||||||
|
|
@ -507,7 +509,7 @@ __DEVICE__ float __powf(float __a, float __b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parameter must have a known integer value.
|
// Parameter must have a known integer value.
|
||||||
#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a))
|
#define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a))
|
||||||
__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
|
__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
|
||||||
__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
|
__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
|
||||||
return __nv_sad(__a, __b, __c);
|
return __nv_sad(__a, __b, __c);
|
||||||
|
|
@ -526,7 +528,7 @@ __DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }
|
||||||
__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
|
__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
|
||||||
__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
|
__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
|
||||||
__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
|
__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
|
||||||
__DEVICE__ void __trap(void) { asm volatile("trap;"); }
|
__DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); }
|
||||||
__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
|
__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
|
||||||
return __nvvm_atom_add_gen_i((int *)__p, __v);
|
return __nvvm_atom_add_gen_i((int *)__p, __v);
|
||||||
}
|
}
|
||||||
|
|
@ -1051,21 +1053,21 @@ __DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vabs2(unsigned int __a) {
|
__DEVICE__ unsigned int __vabs2(unsigned int __a) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
__asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(0), "r"(0));
|
: "r"(__a), "r"(0), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vabs4(unsigned int __a) {
|
__DEVICE__ unsigned int __vabs4(unsigned int __a) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
__asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(0), "r"(0));
|
: "r"(__a), "r"(0), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
__asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
|
|
@ -1073,100 +1075,114 @@ __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
||||||
__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
__asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
|
__asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
|
__asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
|
__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
|
__asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(0), "r"(0));
|
: "r"(__a), "r"(0), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
|
__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
|
__asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(0), "r"(0));
|
: "r"(__a), "r"(0), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
|
__asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
|
__asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
|
__asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
|
__asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.u32.u32.eq %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1174,7 +1190,9 @@ __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.u32.u32.eq %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1182,7 +1200,9 @@ __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.s32.s32.ge %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1190,7 +1210,9 @@ __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.s32.s32.ge %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1198,7 +1220,9 @@ __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.u32.u32.ge %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1206,7 +1230,9 @@ __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.u32.u32.ge %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1214,7 +1240,9 @@ __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.s32.s32.gt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1222,7 +1250,9 @@ __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.s32.s32.gt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1230,7 +1260,9 @@ __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.u32.u32.gt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1238,7 +1270,9 @@ __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.u32.u32.gt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1246,7 +1280,9 @@ __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.s32.s32.le %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1254,7 +1290,9 @@ __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.s32.s32.le %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1262,7 +1300,9 @@ __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.u32.u32.le %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1270,7 +1310,9 @@ __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.u32.u32.le %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1278,7 +1320,9 @@ __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.s32.s32.lt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1286,7 +1330,9 @@ __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.s32.s32.lt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1294,7 +1340,9 @@ __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.u32.u32.lt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1302,7 +1350,9 @@ __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.u32.u32.lt %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1310,7 +1360,9 @@ __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset2.u32.u32.ne %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1318,7 +1370,9 @@ __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vset4.u32.u32.ne %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
@ -1345,7 +1399,7 @@ __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned mask = __vcmpgts2(__a, __b);
|
unsigned mask = __vcmpgts2(__a, __b);
|
||||||
r = (__a & mask) | (__b & ~mask);
|
r = (__a & mask) | (__b & ~mask);
|
||||||
} else {
|
} else {
|
||||||
asm("vmax2.s32.s32.s32 %0,%1,%2,%3;"
|
__asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
}
|
}
|
||||||
|
|
@ -1353,63 +1407,77 @@ __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
|
__asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
|
__asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
|
__asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
|
__asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
|
|
@ -1417,20 +1485,24 @@ __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
|
||||||
|
|
||||||
__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }
|
__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }
|
||||||
|
|
||||||
__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
__asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;"
|
||||||
|
: "=r"(r)
|
||||||
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }
|
__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }
|
||||||
__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
|
__asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
|
|
@ -1440,7 +1512,7 @@ __DEVICE__ unsigned int __vnegss2(unsigned int __a) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
|
__asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
|
|
@ -1450,14 +1522,14 @@ __DEVICE__ unsigned int __vnegss4(unsigned int __a) {
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
|
__asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
|
__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
|
||||||
unsigned int r;
|
unsigned int r;
|
||||||
asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
|
__asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||||
: "=r"(r)
|
: "=r"(r)
|
||||||
: "r"(__a), "r"(__b), "r"(0));
|
: "r"(__a), "r"(__b), "r"(0));
|
||||||
return r;
|
return r;
|
||||||
|
|
|
||||||
172
lib/include/__clang_hip_cmath.h
vendored
172
lib/include/__clang_hip_cmath.h
vendored
|
|
@ -10,7 +10,7 @@
|
||||||
#ifndef __CLANG_HIP_CMATH_H__
|
#ifndef __CLANG_HIP_CMATH_H__
|
||||||
#define __CLANG_HIP_CMATH_H__
|
#define __CLANG_HIP_CMATH_H__
|
||||||
|
|
||||||
#if !defined(__HIP__)
|
#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
|
||||||
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
|
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -25,31 +25,43 @@
|
||||||
#endif // !defined(__HIPCC_RTC__)
|
#endif // !defined(__HIPCC_RTC__)
|
||||||
|
|
||||||
#pragma push_macro("__DEVICE__")
|
#pragma push_macro("__DEVICE__")
|
||||||
|
#pragma push_macro("__CONSTEXPR__")
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#define __DEVICE__ static __attribute__((always_inline, nothrow))
|
||||||
|
#define __CONSTEXPR__ constexpr
|
||||||
|
#else
|
||||||
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
||||||
|
#define __CONSTEXPR__
|
||||||
|
#endif // __OPENMP_AMDGCN__
|
||||||
|
|
||||||
// Start with functions that cannot be defined by DEF macros below.
|
// Start with functions that cannot be defined by DEF macros below.
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
__DEVICE__ double abs(double __x) { return ::fabs(__x); }
|
#if defined __OPENMP_AMDGCN__
|
||||||
__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
|
__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); }
|
||||||
__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
|
__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); }
|
||||||
__DEVICE__ long abs(long __n) { return ::labs(__n); }
|
__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); }
|
||||||
__DEVICE__ float fma(float __x, float __y, float __z) {
|
#endif
|
||||||
|
__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
|
||||||
|
__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
|
||||||
|
__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
|
||||||
|
__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
|
||||||
|
__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
|
||||||
return ::fmaf(__x, __y, __z);
|
return ::fmaf(__x, __y, __z);
|
||||||
}
|
}
|
||||||
#if !defined(__HIPCC_RTC__)
|
#if !defined(__HIPCC_RTC__)
|
||||||
// The value returned by fpclassify is platform dependent, therefore it is not
|
// The value returned by fpclassify is platform dependent, therefore it is not
|
||||||
// supported by hipRTC.
|
// supported by hipRTC.
|
||||||
__DEVICE__ int fpclassify(float __x) {
|
__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
|
||||||
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
||||||
FP_ZERO, __x);
|
FP_ZERO, __x);
|
||||||
}
|
}
|
||||||
__DEVICE__ int fpclassify(double __x) {
|
__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
|
||||||
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
||||||
FP_ZERO, __x);
|
FP_ZERO, __x);
|
||||||
}
|
}
|
||||||
#endif // !defined(__HIPCC_RTC__)
|
#endif // !defined(__HIPCC_RTC__)
|
||||||
|
|
||||||
__DEVICE__ float frexp(float __arg, int *__exp) {
|
__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
|
||||||
return ::frexpf(__arg, __exp);
|
return ::frexpf(__arg, __exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -71,93 +83,101 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
|
||||||
// of the variants inside the inner region and avoid the clash.
|
// of the variants inside the inner region and avoid the clash.
|
||||||
#pragma omp begin declare variant match(implementation = {vendor(llvm)})
|
#pragma omp begin declare variant match(implementation = {vendor(llvm)})
|
||||||
|
|
||||||
__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
|
__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
|
||||||
__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
|
__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
|
||||||
__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
|
__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
|
||||||
__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
|
__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
|
||||||
__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
|
__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
|
||||||
__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
|
__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }
|
||||||
|
|
||||||
#pragma omp end declare variant
|
#pragma omp end declare variant
|
||||||
#endif // defined(__OPENMP_AMDGCN__)
|
#endif // defined(__OPENMP_AMDGCN__)
|
||||||
|
|
||||||
__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
|
__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
|
||||||
__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
|
__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
|
||||||
__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
|
__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
|
||||||
__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
|
__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
|
||||||
__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
|
__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
|
||||||
__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
|
__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
|
||||||
|
|
||||||
#if defined(__OPENMP_AMDGCN__)
|
#if defined(__OPENMP_AMDGCN__)
|
||||||
#pragma omp end declare variant
|
#pragma omp end declare variant
|
||||||
#endif // defined(__OPENMP_AMDGCN__)
|
#endif // defined(__OPENMP_AMDGCN__)
|
||||||
|
|
||||||
__DEVICE__ bool isgreater(float __x, float __y) {
|
__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
|
||||||
return __builtin_isgreater(__x, __y);
|
return __builtin_isgreater(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool isgreater(double __x, double __y) {
|
__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
|
||||||
return __builtin_isgreater(__x, __y);
|
return __builtin_isgreater(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool isgreaterequal(float __x, float __y) {
|
__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
|
||||||
return __builtin_isgreaterequal(__x, __y);
|
return __builtin_isgreaterequal(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool isgreaterequal(double __x, double __y) {
|
__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
|
||||||
return __builtin_isgreaterequal(__x, __y);
|
return __builtin_isgreaterequal(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool isless(float __x, float __y) {
|
__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
|
||||||
return __builtin_isless(__x, __y);
|
return __builtin_isless(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool isless(double __x, double __y) {
|
__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
|
||||||
return __builtin_isless(__x, __y);
|
return __builtin_isless(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool islessequal(float __x, float __y) {
|
__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
|
||||||
return __builtin_islessequal(__x, __y);
|
return __builtin_islessequal(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool islessequal(double __x, double __y) {
|
__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
|
||||||
return __builtin_islessequal(__x, __y);
|
return __builtin_islessequal(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool islessgreater(float __x, float __y) {
|
__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
|
||||||
return __builtin_islessgreater(__x, __y);
|
return __builtin_islessgreater(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool islessgreater(double __x, double __y) {
|
__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
|
||||||
return __builtin_islessgreater(__x, __y);
|
return __builtin_islessgreater(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
|
__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
|
||||||
__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
|
return __builtin_isnormal(__x);
|
||||||
__DEVICE__ bool isunordered(float __x, float __y) {
|
}
|
||||||
|
__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
|
||||||
|
return __builtin_isnormal(__x);
|
||||||
|
}
|
||||||
|
__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
|
||||||
return __builtin_isunordered(__x, __y);
|
return __builtin_isunordered(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool isunordered(double __x, double __y) {
|
__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
|
||||||
return __builtin_isunordered(__x, __y);
|
return __builtin_isunordered(__x, __y);
|
||||||
}
|
}
|
||||||
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
|
__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
|
||||||
__DEVICE__ float pow(float __base, int __iexp) {
|
return ::modff(__x, __iptr);
|
||||||
|
}
|
||||||
|
__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
|
||||||
return ::powif(__base, __iexp);
|
return ::powif(__base, __iexp);
|
||||||
}
|
}
|
||||||
__DEVICE__ double pow(double __base, int __iexp) {
|
__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
|
||||||
return ::powi(__base, __iexp);
|
return ::powi(__base, __iexp);
|
||||||
}
|
}
|
||||||
__DEVICE__ float remquo(float __x, float __y, int *__quo) {
|
__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
|
||||||
return ::remquof(__x, __y, __quo);
|
return ::remquof(__x, __y, __quo);
|
||||||
}
|
}
|
||||||
__DEVICE__ float scalbln(float __x, long int __n) {
|
__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
|
||||||
return ::scalblnf(__x, __n);
|
return ::scalblnf(__x, __n);
|
||||||
}
|
}
|
||||||
__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
|
__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
|
||||||
__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
|
__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }
|
||||||
|
|
||||||
// Notably missing above is nexttoward. We omit it because
|
// Notably missing above is nexttoward. We omit it because
|
||||||
// ocml doesn't provide an implementation, and we don't want to be in the
|
// ocml doesn't provide an implementation, and we don't want to be in the
|
||||||
// business of implementing tricky libm functions in this header.
|
// business of implementing tricky libm functions in this header.
|
||||||
|
|
||||||
// Other functions.
|
// Other functions.
|
||||||
__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
|
__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
|
||||||
|
_Float16 __z) {
|
||||||
return __ocml_fma_f16(__x, __y, __z);
|
return __ocml_fma_f16(__x, __y, __z);
|
||||||
}
|
}
|
||||||
__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
|
__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
|
||||||
return __ocml_pown_f16(__base, __iexp);
|
return __ocml_pown_f16(__base, __iexp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef __OPENMP_AMDGCN__
|
||||||
// BEGIN DEF_FUN and HIP_OVERLOAD
|
// BEGIN DEF_FUN and HIP_OVERLOAD
|
||||||
|
|
||||||
// BEGIN DEF_FUN
|
// BEGIN DEF_FUN
|
||||||
|
|
@ -168,18 +188,19 @@ __DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
|
||||||
|
|
||||||
// Define cmath functions with float argument and returns __retty.
|
// Define cmath functions with float argument and returns __retty.
|
||||||
#define __DEF_FUN1(__retty, __func) \
|
#define __DEF_FUN1(__retty, __func) \
|
||||||
__DEVICE__ \
|
__DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }
|
||||||
__retty __func(float __x) { return __func##f(__x); }
|
|
||||||
|
|
||||||
// Define cmath functions with two float arguments and returns __retty.
|
// Define cmath functions with two float arguments and returns __retty.
|
||||||
#define __DEF_FUN2(__retty, __func) \
|
#define __DEF_FUN2(__retty, __func) \
|
||||||
__DEVICE__ \
|
__DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \
|
||||||
__retty __func(float __x, float __y) { return __func##f(__x, __y); }
|
return __func##f(__x, __y); \
|
||||||
|
}
|
||||||
|
|
||||||
// Define cmath functions with a float and an int argument and returns __retty.
|
// Define cmath functions with a float and an int argument and returns __retty.
|
||||||
#define __DEF_FUN2_FI(__retty, __func) \
|
#define __DEF_FUN2_FI(__retty, __func) \
|
||||||
__DEVICE__ \
|
__DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \
|
||||||
__retty __func(float __x, int __y) { return __func##f(__x, __y); }
|
return __func##f(__x, __y); \
|
||||||
|
}
|
||||||
|
|
||||||
__DEF_FUN1(float, acos)
|
__DEF_FUN1(float, acos)
|
||||||
__DEF_FUN1(float, acosh)
|
__DEF_FUN1(float, acosh)
|
||||||
|
|
@ -426,7 +447,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
|
||||||
// floor(double).
|
// floor(double).
|
||||||
#define __HIP_OVERLOAD1(__retty, __fn) \
|
#define __HIP_OVERLOAD1(__retty, __fn) \
|
||||||
template <typename __T> \
|
template <typename __T> \
|
||||||
__DEVICE__ \
|
__DEVICE__ __CONSTEXPR__ \
|
||||||
typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \
|
typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \
|
||||||
__fn(__T __x) { \
|
__fn(__T __x) { \
|
||||||
return ::__fn((double)__x); \
|
return ::__fn((double)__x); \
|
||||||
|
|
@ -438,7 +459,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
|
||||||
#if __cplusplus >= 201103L
|
#if __cplusplus >= 201103L
|
||||||
#define __HIP_OVERLOAD2(__retty, __fn) \
|
#define __HIP_OVERLOAD2(__retty, __fn) \
|
||||||
template <typename __T1, typename __T2> \
|
template <typename __T1, typename __T2> \
|
||||||
__DEVICE__ typename __hip_enable_if< \
|
__DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \
|
||||||
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \
|
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \
|
||||||
typename __hip::__promote<__T1, __T2>::type>::type \
|
typename __hip::__promote<__T1, __T2>::type>::type \
|
||||||
__fn(__T1 __x, __T2 __y) { \
|
__fn(__T1 __x, __T2 __y) { \
|
||||||
|
|
@ -448,7 +469,8 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
|
||||||
#else
|
#else
|
||||||
#define __HIP_OVERLOAD2(__retty, __fn) \
|
#define __HIP_OVERLOAD2(__retty, __fn) \
|
||||||
template <typename __T1, typename __T2> \
|
template <typename __T1, typename __T2> \
|
||||||
__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
|
__DEVICE__ __CONSTEXPR__ \
|
||||||
|
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
|
||||||
__hip::is_arithmetic<__T2>::value, \
|
__hip::is_arithmetic<__T2>::value, \
|
||||||
__retty>::type \
|
__retty>::type \
|
||||||
__fn(__T1 __x, __T2 __y) { \
|
__fn(__T1 __x, __T2 __y) { \
|
||||||
|
|
@ -526,7 +548,7 @@ __HIP_OVERLOAD2(double, min)
|
||||||
// Additional Overloads that don't quite match HIP_OVERLOAD.
|
// Additional Overloads that don't quite match HIP_OVERLOAD.
|
||||||
#if __cplusplus >= 201103L
|
#if __cplusplus >= 201103L
|
||||||
template <typename __T1, typename __T2, typename __T3>
|
template <typename __T1, typename __T2, typename __T3>
|
||||||
__DEVICE__ typename __hip_enable_if<
|
__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
|
||||||
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
|
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
|
||||||
__hip::is_arithmetic<__T3>::value,
|
__hip::is_arithmetic<__T3>::value,
|
||||||
typename __hip::__promote<__T1, __T2, __T3>::type>::type
|
typename __hip::__promote<__T1, __T2, __T3>::type>::type
|
||||||
|
|
@ -536,31 +558,32 @@ fma(__T1 __x, __T2 __y, __T3 __z) {
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
template <typename __T1, typename __T2, typename __T3>
|
template <typename __T1, typename __T2, typename __T3>
|
||||||
__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
__DEVICE__ __CONSTEXPR__
|
||||||
|
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||||
__hip::is_arithmetic<__T2>::value &&
|
__hip::is_arithmetic<__T2>::value &&
|
||||||
__hip::is_arithmetic<__T3>::value,
|
__hip::is_arithmetic<__T3>::value,
|
||||||
double>::type
|
double>::type
|
||||||
fma(__T1 __x, __T2 __y, __T3 __z) {
|
fma(__T1 __x, __T2 __y, __T3 __z) {
|
||||||
return ::fma((double)__x, (double)__y, (double)__z);
|
return ::fma((double)__x, (double)__y, (double)__z);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <typename __T>
|
template <typename __T>
|
||||||
__DEVICE__
|
__DEVICE__ __CONSTEXPR__
|
||||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||||
frexp(__T __x, int *__exp) {
|
frexp(__T __x, int *__exp) {
|
||||||
return ::frexp((double)__x, __exp);
|
return ::frexp((double)__x, __exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename __T>
|
template <typename __T>
|
||||||
__DEVICE__
|
__DEVICE__ __CONSTEXPR__
|
||||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||||
ldexp(__T __x, int __exp) {
|
ldexp(__T __x, int __exp) {
|
||||||
return ::ldexp((double)__x, __exp);
|
return ::ldexp((double)__x, __exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename __T>
|
template <typename __T>
|
||||||
__DEVICE__
|
__DEVICE__ __CONSTEXPR__
|
||||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||||
modf(__T __x, double *__exp) {
|
modf(__T __x, double *__exp) {
|
||||||
return ::modf((double)__x, __exp);
|
return ::modf((double)__x, __exp);
|
||||||
|
|
@ -568,7 +591,7 @@ __DEVICE__
|
||||||
|
|
||||||
#if __cplusplus >= 201103L
|
#if __cplusplus >= 201103L
|
||||||
template <typename __T1, typename __T2>
|
template <typename __T1, typename __T2>
|
||||||
__DEVICE__
|
__DEVICE__ __CONSTEXPR__
|
||||||
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||||
__hip::is_arithmetic<__T2>::value,
|
__hip::is_arithmetic<__T2>::value,
|
||||||
typename __hip::__promote<__T1, __T2>::type>::type
|
typename __hip::__promote<__T1, __T2>::type>::type
|
||||||
|
|
@ -578,23 +601,24 @@ __DEVICE__
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
template <typename __T1, typename __T2>
|
template <typename __T1, typename __T2>
|
||||||
__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
__DEVICE__ __CONSTEXPR__
|
||||||
|
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||||
__hip::is_arithmetic<__T2>::value,
|
__hip::is_arithmetic<__T2>::value,
|
||||||
double>::type
|
double>::type
|
||||||
remquo(__T1 __x, __T2 __y, int *__quo) {
|
remquo(__T1 __x, __T2 __y, int *__quo) {
|
||||||
return ::remquo((double)__x, (double)__y, __quo);
|
return ::remquo((double)__x, (double)__y, __quo);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <typename __T>
|
template <typename __T>
|
||||||
__DEVICE__
|
__DEVICE__ __CONSTEXPR__
|
||||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||||
scalbln(__T __x, long int __exp) {
|
scalbln(__T __x, long int __exp) {
|
||||||
return ::scalbln((double)__x, __exp);
|
return ::scalbln((double)__x, __exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename __T>
|
template <typename __T>
|
||||||
__DEVICE__
|
__DEVICE__ __CONSTEXPR__
|
||||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||||
scalbn(__T __x, int __exp) {
|
scalbn(__T __x, int __exp) {
|
||||||
return ::scalbn((double)__x, __exp);
|
return ::scalbn((double)__x, __exp);
|
||||||
|
|
@ -607,8 +631,10 @@ __DEVICE__
|
||||||
|
|
||||||
// END DEF_FUN and HIP_OVERLOAD
|
// END DEF_FUN and HIP_OVERLOAD
|
||||||
|
|
||||||
|
#endif // ifndef __OPENMP_AMDGCN__
|
||||||
#endif // defined(__cplusplus)
|
#endif // defined(__cplusplus)
|
||||||
|
|
||||||
|
#ifndef __OPENMP_AMDGCN__
|
||||||
// Define these overloads inside the namespace our standard library uses.
|
// Define these overloads inside the namespace our standard library uses.
|
||||||
#if !defined(__HIPCC_RTC__)
|
#if !defined(__HIPCC_RTC__)
|
||||||
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
|
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
|
||||||
|
|
@ -781,22 +807,26 @@ _GLIBCXX_END_NAMESPACE_VERSION
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif // defined(__cplusplus)
|
#endif // defined(__cplusplus)
|
||||||
__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) {
|
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
|
||||||
|
double y) {
|
||||||
return cosh(x) * y;
|
return cosh(x) * y;
|
||||||
}
|
}
|
||||||
__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) {
|
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
|
||||||
|
float y) {
|
||||||
return coshf(x) * y;
|
return coshf(x) * y;
|
||||||
}
|
}
|
||||||
__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) {
|
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
|
||||||
return fpclassify(*p);
|
return fpclassify(*p);
|
||||||
}
|
}
|
||||||
__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) {
|
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
|
||||||
return fpclassify(*p);
|
return fpclassify(*p);
|
||||||
}
|
}
|
||||||
__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) {
|
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
|
||||||
|
double y) {
|
||||||
return sinh(x) * y;
|
return sinh(x) * y;
|
||||||
}
|
}
|
||||||
__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
|
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
|
||||||
|
float y) {
|
||||||
return sinhf(x) * y;
|
return sinhf(x) * y;
|
||||||
}
|
}
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
|
|
@ -804,7 +834,9 @@ __DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
|
||||||
#endif // defined(__cplusplus)
|
#endif // defined(__cplusplus)
|
||||||
#endif // defined(_MSC_VER)
|
#endif // defined(_MSC_VER)
|
||||||
#endif // !defined(__HIPCC_RTC__)
|
#endif // !defined(__HIPCC_RTC__)
|
||||||
|
#endif // ifndef __OPENMP_AMDGCN__
|
||||||
|
|
||||||
#pragma pop_macro("__DEVICE__")
|
#pragma pop_macro("__DEVICE__")
|
||||||
|
#pragma pop_macro("__CONSTEXPR__")
|
||||||
|
|
||||||
#endif // __CLANG_HIP_CMATH_H__
|
#endif // __CLANG_HIP_CMATH_H__
|
||||||
|
|
|
||||||
50
lib/include/__clang_hip_math.h
vendored
50
lib/include/__clang_hip_math.h
vendored
|
|
@ -9,7 +9,7 @@
|
||||||
#ifndef __CLANG_HIP_MATH_H__
|
#ifndef __CLANG_HIP_MATH_H__
|
||||||
#define __CLANG_HIP_MATH_H__
|
#define __CLANG_HIP_MATH_H__
|
||||||
|
|
||||||
#if !defined(__HIP__)
|
#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
|
||||||
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
|
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -19,18 +19,30 @@
|
||||||
#endif
|
#endif
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#endif // __HIPCC_RTC__
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
#endif // !defined(__HIPCC_RTC__)
|
||||||
|
|
||||||
#pragma push_macro("__DEVICE__")
|
#pragma push_macro("__DEVICE__")
|
||||||
|
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
|
||||||
|
#else
|
||||||
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
||||||
|
#endif
|
||||||
|
|
||||||
// A few functions return bool type starting only in C++11.
|
// A few functions return bool type starting only in C++11.
|
||||||
#pragma push_macro("__RETURN_TYPE")
|
#pragma push_macro("__RETURN_TYPE")
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#define __RETURN_TYPE int
|
||||||
|
#else
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
#define __RETURN_TYPE bool
|
#define __RETURN_TYPE bool
|
||||||
#else
|
#else
|
||||||
#define __RETURN_TYPE int
|
#define __RETURN_TYPE int
|
||||||
#endif
|
#endif
|
||||||
|
#endif // __OPENMP_AMDGCN__
|
||||||
|
|
||||||
#if defined (__cplusplus) && __cplusplus < 201103L
|
#if defined (__cplusplus) && __cplusplus < 201103L
|
||||||
// emulate static_assert on type sizes
|
// emulate static_assert on type sizes
|
||||||
|
|
@ -249,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
float frexpf(float __x, int *__nptr) {
|
float frexpf(float __x, int *__nptr) {
|
||||||
int __tmp;
|
int __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
float __r =
|
float __r =
|
||||||
__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
||||||
*__nptr = __tmp;
|
*__nptr = __tmp;
|
||||||
|
|
@ -334,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); }
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
float modff(float __x, float *__iptr) {
|
float modff(float __x, float *__iptr) {
|
||||||
float __tmp;
|
float __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
float __r =
|
float __r =
|
||||||
__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||||
*__iptr = __tmp;
|
*__iptr = __tmp;
|
||||||
|
|
@ -414,6 +432,9 @@ float remainderf(float __x, float __y) {
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
float remquof(float __x, float __y, int *__quo) {
|
float remquof(float __x, float __y, int *__quo) {
|
||||||
int __tmp;
|
int __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
float __r = __ocml_remquo_f32(
|
float __r = __ocml_remquo_f32(
|
||||||
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
||||||
*__quo = __tmp;
|
*__quo = __tmp;
|
||||||
|
|
@ -470,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||||
float __tmp;
|
float __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
*__sinptr =
|
*__sinptr =
|
||||||
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||||
*__cosptr = __tmp;
|
*__cosptr = __tmp;
|
||||||
|
|
@ -478,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
void sincospif(float __x, float *__sinptr, float *__cosptr) {
|
void sincospif(float __x, float *__sinptr, float *__cosptr) {
|
||||||
float __tmp;
|
float __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
*__sinptr = __ocml_sincospi_f32(
|
*__sinptr = __ocml_sincospi_f32(
|
||||||
__x, (__attribute__((address_space(5))) float *)&__tmp);
|
__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||||
*__cosptr = __tmp;
|
*__cosptr = __tmp;
|
||||||
|
|
@ -790,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
double frexp(double __x, int *__nptr) {
|
double frexp(double __x, int *__nptr) {
|
||||||
int __tmp;
|
int __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
double __r =
|
double __r =
|
||||||
__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
||||||
*__nptr = __tmp;
|
*__nptr = __tmp;
|
||||||
|
|
@ -874,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); }
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
double modf(double __x, double *__iptr) {
|
double modf(double __x, double *__iptr) {
|
||||||
double __tmp;
|
double __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
double __r =
|
double __r =
|
||||||
__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
|
__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||||
*__iptr = __tmp;
|
*__iptr = __tmp;
|
||||||
|
|
@ -962,6 +995,9 @@ double remainder(double __x, double __y) {
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
double remquo(double __x, double __y, int *__quo) {
|
double remquo(double __x, double __y, int *__quo) {
|
||||||
int __tmp;
|
int __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
double __r = __ocml_remquo_f64(
|
double __r = __ocml_remquo_f64(
|
||||||
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
||||||
*__quo = __tmp;
|
*__quo = __tmp;
|
||||||
|
|
@ -1020,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); }
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
void sincos(double __x, double *__sinptr, double *__cosptr) {
|
void sincos(double __x, double *__sinptr, double *__cosptr) {
|
||||||
double __tmp;
|
double __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
*__sinptr = __ocml_sincos_f64(
|
*__sinptr = __ocml_sincos_f64(
|
||||||
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||||
*__cosptr = __tmp;
|
*__cosptr = __tmp;
|
||||||
|
|
@ -1028,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
void sincospi(double __x, double *__sinptr, double *__cosptr) {
|
void sincospi(double __x, double *__sinptr, double *__cosptr) {
|
||||||
double __tmp;
|
double __tmp;
|
||||||
|
#ifdef __OPENMP_AMDGCN__
|
||||||
|
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||||
|
#endif
|
||||||
*__sinptr = __ocml_sincospi_f64(
|
*__sinptr = __ocml_sincospi_f64(
|
||||||
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||||
*__cosptr = __tmp;
|
*__cosptr = __tmp;
|
||||||
|
|
@ -1262,7 +1304,7 @@ float min(float __x, float __y) { return fminf(__x, __y); }
|
||||||
__DEVICE__
|
__DEVICE__
|
||||||
double min(double __x, double __y) { return fmin(__x, __y); }
|
double min(double __x, double __y) { return fmin(__x, __y); }
|
||||||
|
|
||||||
#if !defined(__HIPCC_RTC__)
|
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||||
__host__ inline static int min(int __arg1, int __arg2) {
|
__host__ inline static int min(int __arg1, int __arg2) {
|
||||||
return std::min(__arg1, __arg2);
|
return std::min(__arg1, __arg2);
|
||||||
}
|
}
|
||||||
|
|
@ -1270,7 +1312,7 @@ __host__ inline static int min(int __arg1, int __arg2) {
|
||||||
__host__ inline static int max(int __arg1, int __arg2) {
|
__host__ inline static int max(int __arg1, int __arg2) {
|
||||||
return std::max(__arg1, __arg2);
|
return std::max(__arg1, __arg2);
|
||||||
}
|
}
|
||||||
#endif // __HIPCC_RTC__
|
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#pragma pop_macro("__DEVICE__")
|
#pragma pop_macro("__DEVICE__")
|
||||||
|
|
|
||||||
3
lib/include/intrin.h
vendored
3
lib/include/intrin.h
vendored
|
|
@ -574,6 +574,9 @@ void _WriteStatusReg(int, __int64);
|
||||||
unsigned short __cdecl _byteswap_ushort(unsigned short val);
|
unsigned short __cdecl _byteswap_ushort(unsigned short val);
|
||||||
unsigned long __cdecl _byteswap_ulong (unsigned long val);
|
unsigned long __cdecl _byteswap_ulong (unsigned long val);
|
||||||
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
|
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
|
||||||
|
|
||||||
|
__int64 __mulh(__int64 __a, __int64 __b);
|
||||||
|
unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------------*\
|
||||||
|
|
|
||||||
|
|
@ -14,13 +14,13 @@
|
||||||
#error "This file is for OpenMP compilation only."
|
#error "This file is for OpenMP compilation only."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#pragma omp begin declare variant match( \
|
|
||||||
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#pragma omp begin declare variant match( \
|
||||||
|
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
|
||||||
|
|
||||||
#define __CUDA__
|
#define __CUDA__
|
||||||
#define __OPENMP_NVPTX__
|
#define __OPENMP_NVPTX__
|
||||||
|
|
||||||
|
|
@ -33,12 +33,34 @@ extern "C" {
|
||||||
#undef __OPENMP_NVPTX__
|
#undef __OPENMP_NVPTX__
|
||||||
#undef __CUDA__
|
#undef __CUDA__
|
||||||
|
|
||||||
|
#pragma omp end declare variant
|
||||||
|
|
||||||
|
#ifdef __AMDGCN__
|
||||||
|
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||||
|
|
||||||
|
// Import types which will be used by __clang_hip_libdevice_declares.h
|
||||||
|
#ifndef __cplusplus
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __OPENMP_AMDGCN__
|
||||||
|
#pragma push_macro("__device__")
|
||||||
|
#define __device__
|
||||||
|
|
||||||
|
/// Include declarations for libdevice functions.
|
||||||
|
#include <__clang_hip_libdevice_declares.h>
|
||||||
|
|
||||||
|
#pragma pop_macro("__device__")
|
||||||
|
#undef __OPENMP_AMDGCN__
|
||||||
|
|
||||||
|
#pragma omp end declare variant
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#pragma omp end declare variant
|
|
||||||
|
|
||||||
// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
|
// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
|
||||||
// need to `include <new>` in C++ mode.
|
// need to `include <new>` in C++ mode.
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
54
lib/include/openmp_wrappers/cmath
vendored
54
lib/include/openmp_wrappers/cmath
vendored
|
|
@ -75,4 +75,58 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
|
||||||
|
|
||||||
#pragma omp end declare variant
|
#pragma omp end declare variant
|
||||||
|
|
||||||
|
#ifdef __AMDGCN__
|
||||||
|
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||||
|
|
||||||
|
#pragma push_macro("__constant__")
|
||||||
|
#define __constant__ __attribute__((constant))
|
||||||
|
#define __OPENMP_AMDGCN__
|
||||||
|
|
||||||
|
#include <__clang_hip_cmath.h>
|
||||||
|
|
||||||
|
#pragma pop_macro("__constant__")
|
||||||
|
#undef __OPENMP_AMDGCN__
|
||||||
|
|
||||||
|
// Define overloads otherwise which are absent
|
||||||
|
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
|
||||||
|
|
||||||
|
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
|
||||||
|
__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
|
||||||
|
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
|
||||||
|
__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
|
||||||
|
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
|
||||||
|
__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
|
||||||
|
__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
|
||||||
|
__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
|
||||||
|
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
|
||||||
|
__DEVICE__ float erf(float __x) { return ::erff(__x); }
|
||||||
|
__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
|
||||||
|
__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
|
||||||
|
__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
|
||||||
|
__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
|
||||||
|
__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
|
||||||
|
__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
|
||||||
|
__DEVICE__ float ldexp(float __arg, int __exp) {
|
||||||
|
return ::ldexpf(__arg, __exp);
|
||||||
|
}
|
||||||
|
__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
|
||||||
|
__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
|
||||||
|
__DEVICE__ float logb(float __x) { return ::logbf(__x); }
|
||||||
|
__DEVICE__ float nextafter(float __x, float __y) {
|
||||||
|
return ::nextafterf(__x, __y);
|
||||||
|
}
|
||||||
|
__DEVICE__ float remainder(float __x, float __y) {
|
||||||
|
return ::remainderf(__x, __y);
|
||||||
|
}
|
||||||
|
__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
|
||||||
|
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
|
||||||
|
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
|
||||||
|
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
|
||||||
|
__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
|
||||||
|
|
||||||
|
#undef __DEVICE__
|
||||||
|
|
||||||
|
#pragma omp end declare variant
|
||||||
|
#endif // __AMDGCN__
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
10
lib/include/openmp_wrappers/math.h
vendored
10
lib/include/openmp_wrappers/math.h
vendored
|
|
@ -48,4 +48,14 @@
|
||||||
|
|
||||||
#pragma omp end declare variant
|
#pragma omp end declare variant
|
||||||
|
|
||||||
|
#ifdef __AMDGCN__
|
||||||
|
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||||
|
|
||||||
|
#define __OPENMP_AMDGCN__
|
||||||
|
#include <__clang_hip_math.h>
|
||||||
|
#undef __OPENMP_AMDGCN__
|
||||||
|
|
||||||
|
#pragma omp end declare variant
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
2
lib/libcxx/include/cwctype
vendored
2
lib/libcxx/include/cwctype
vendored
|
|
@ -59,6 +59,7 @@ wctrans_t wctrans(const char* property);
|
||||||
|
|
||||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||||
|
|
||||||
|
#if defined(_LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H)
|
||||||
using ::wint_t _LIBCPP_USING_IF_EXISTS;
|
using ::wint_t _LIBCPP_USING_IF_EXISTS;
|
||||||
using ::wctrans_t _LIBCPP_USING_IF_EXISTS;
|
using ::wctrans_t _LIBCPP_USING_IF_EXISTS;
|
||||||
using ::wctype_t _LIBCPP_USING_IF_EXISTS;
|
using ::wctype_t _LIBCPP_USING_IF_EXISTS;
|
||||||
|
|
@ -80,6 +81,7 @@ using ::towlower _LIBCPP_USING_IF_EXISTS;
|
||||||
using ::towupper _LIBCPP_USING_IF_EXISTS;
|
using ::towupper _LIBCPP_USING_IF_EXISTS;
|
||||||
using ::towctrans _LIBCPP_USING_IF_EXISTS;
|
using ::towctrans _LIBCPP_USING_IF_EXISTS;
|
||||||
using ::wctrans _LIBCPP_USING_IF_EXISTS;
|
using ::wctrans _LIBCPP_USING_IF_EXISTS;
|
||||||
|
#endif // _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
|
||||||
|
|
||||||
_LIBCPP_END_NAMESPACE_STD
|
_LIBCPP_END_NAMESPACE_STD
|
||||||
|
|
||||||
|
|
|
||||||
19
lib/libcxx/include/string
vendored
19
lib/libcxx/include/string
vendored
|
|
@ -522,6 +522,7 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <compare>
|
#include <compare>
|
||||||
#include <cstdio> // EOF
|
#include <cstdio> // EOF
|
||||||
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cwchar>
|
#include <cwchar>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
|
|
@ -1714,6 +1715,24 @@ private:
|
||||||
return data() <= __p && __p <= data() + size();
|
return data() <= __p && __p <= data() + size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||||
|
void __throw_length_error() const {
|
||||||
|
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||||
|
__basic_string_common<true>::__throw_length_error();
|
||||||
|
#else
|
||||||
|
_VSTD::abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||||
|
void __throw_out_of_range() const {
|
||||||
|
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||||
|
__basic_string_common<true>::__throw_out_of_range();
|
||||||
|
#else
|
||||||
|
_VSTD::abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
friend basic_string operator+<>(const basic_string&, const basic_string&);
|
friend basic_string operator+<>(const basic_string&, const basic_string&);
|
||||||
friend basic_string operator+<>(const value_type*, const basic_string&);
|
friend basic_string operator+<>(const value_type*, const basic_string&);
|
||||||
friend basic_string operator+<>(value_type, const basic_string&);
|
friend basic_string operator+<>(value_type, const basic_string&);
|
||||||
|
|
|
||||||
20
lib/libcxx/include/vector
vendored
20
lib/libcxx/include/vector
vendored
|
|
@ -281,6 +281,7 @@ erase_if(vector<T, Allocator>& c, Predicate pred); // C++20
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <climits>
|
#include <climits>
|
||||||
#include <compare>
|
#include <compare>
|
||||||
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
#include <iosfwd> // for forward declaration of vector
|
#include <iosfwd> // for forward declaration of vector
|
||||||
|
|
@ -390,6 +391,25 @@ protected:
|
||||||
is_nothrow_move_assignable<allocator_type>::value)
|
is_nothrow_move_assignable<allocator_type>::value)
|
||||||
{__move_assign_alloc(__c, integral_constant<bool,
|
{__move_assign_alloc(__c, integral_constant<bool,
|
||||||
__alloc_traits::propagate_on_container_move_assignment::value>());}
|
__alloc_traits::propagate_on_container_move_assignment::value>());}
|
||||||
|
|
||||||
|
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||||
|
void __throw_length_error() const {
|
||||||
|
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||||
|
__vector_base_common<true>::__throw_length_error();
|
||||||
|
#else
|
||||||
|
_VSTD::abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||||
|
void __throw_out_of_range() const {
|
||||||
|
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||||
|
__vector_base_common<true>::__throw_out_of_range();
|
||||||
|
#else
|
||||||
|
_VSTD::abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
void __copy_assign_alloc(const __vector_base& __c, true_type)
|
void __copy_assign_alloc(const __vector_base& __c, true_type)
|
||||||
|
|
|
||||||
10
lib/libcxx/include/wctype.h
vendored
10
lib/libcxx/include/wctype.h
vendored
|
|
@ -50,8 +50,18 @@ wctrans_t wctrans(const char* property);
|
||||||
#pragma GCC system_header
|
#pragma GCC system_header
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
// In the future, we should unconditionally include_next <wctype.h> here and instead
|
||||||
|
// have a mode under which the library does not need libc++'s <wctype.h> or <cwctype>
|
||||||
|
// at all (i.e. a mode without wchar_t). As it stands, we need to do that to completely
|
||||||
|
// bypass the using declarations in <cwctype> when we did not include <wctype.h>.
|
||||||
|
// Otherwise, a using declaration like `using ::wint_t` in <cwctype> will refer to
|
||||||
|
// nothing (with using_if_exists), and if we include another header that defines one
|
||||||
|
// of these declarations (e.g. <wchar.h>), the second `using ::wint_t` with using_if_exists
|
||||||
|
// will fail because it does not refer to the same declaration.
|
||||||
#if __has_include_next(<wctype.h>)
|
#if __has_include_next(<wctype.h>)
|
||||||
# include_next <wctype.h>
|
# include_next <wctype.h>
|
||||||
|
# define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
2
lib/libcxxabi/src/cxa_personality.cpp
vendored
2
lib/libcxxabi/src/cxa_personality.cpp
vendored
|
|
@ -702,10 +702,10 @@ static void scan_eh_tab(scan_results &results, _Unwind_Action actions,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
landingPad = (uintptr_t)lpStart + landingPad;
|
landingPad = (uintptr_t)lpStart + landingPad;
|
||||||
results.landingPad = landingPad;
|
|
||||||
#else // __USING_SJLJ_EXCEPTIONS__
|
#else // __USING_SJLJ_EXCEPTIONS__
|
||||||
++landingPad;
|
++landingPad;
|
||||||
#endif // __USING_SJLJ_EXCEPTIONS__
|
#endif // __USING_SJLJ_EXCEPTIONS__
|
||||||
|
results.landingPad = landingPad;
|
||||||
if (actionEntry == 0)
|
if (actionEntry == 0)
|
||||||
{
|
{
|
||||||
// Found a cleanup
|
// Found a cleanup
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue