Added math.h, stdlib.h, float.h, and stbool.h to std folder (and all their implementations). Opted to NOT implement malloc/free/etc. in favor of having our own functions init_mem, grow_mem, and get_mem.

This commit is contained in:
Taylor Robbins (Piggybank Studios)
2025-08-28 13:20:08 -07:00
parent deae3ccd12
commit d0aa7a1d0e
13 changed files with 4868 additions and 2 deletions

View File

@@ -10,8 +10,13 @@ Description:
#include "std/src/std_main.c"
WASM_EXPORT(HelloFromWasm) int HelloFromWasm(int value)
WASM_EXPORT(HelloFromWasm) float HelloFromWasm(float value, float value2)
{
printf("Called HelloFromWasm(%d)!\n", value);
printf("Called HelloFromWasm(%g)!\n", fmodf(value, value2));
for (int iIndex = 0; iIndex < 1024; iIndex++)
{
void* newMem = grow_mem(1024);
printf("mem[%d] = %p\n", iIndex, newMem);
}
return value*(value+1)*2;
}

12
cwasm.h
View File

@@ -31,6 +31,9 @@ Description:
#error This standard library implementation assumes little-endian byte order
#endif
// +--------------------------------------------------------------+
// | Macros |
// +--------------------------------------------------------------+
#ifdef __cplusplus
#define LANGUAGE_IS_C 0
#define LANGUAGE_IS_CPP 1
@@ -66,8 +69,17 @@ Description:
// | Standard Includes |
// +--------------------------------------------------------------+
// NOTE: These headers are all contained in CWasm's "std" folder, they are not "real" C standard library headers
#include <limits.h>
#include <stdint.h>
#include <uchar.h>
#include <float.h>
#include <stdbool.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <stddef.h>
#include <assert.h>
#include <math.h>
// +--------------------------------------------------------------+
// | Basic Type Names |

67
std/float.h Normal file
View File

@@ -0,0 +1,67 @@
/*
File: float.h
Author: Taylor Robbins
Date: 08\28\2025
*/
#ifndef _FLOAT_H
#define _FLOAT_H
//TODO: Can we comment all of these defines to describe in detail what the values are?
// inline int __flt_rounds() { return FE_TONEAREST; }
// #define FLT_ROUNDS (__flt_rounds())
#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
#define FLT_RADIX 2
#define FLT_TRUE_MIN 1.40129846432481707092e-45F
#define FLT_MIN 1.17549435082228750797e-38F
#define FLT_MAX 3.40282346638528859812e+38F
#define FLT_EPSILON 1.1920928955078125e-07F
#define FLT_MANT_DIG 24
#define FLT_MIN_EXP (-125)
#define FLT_MAX_EXP 128
#define FLT_HAS_SUBNORM 1
#define FLT_DIG 6
#define FLT_DECIMAL_DIG 9
#define FLT_MIN_10_EXP (-37)
#define FLT_MAX_10_EXP 38
#define DBL_TRUE_MIN 4.94065645841246544177e-324
#define DBL_MIN 2.22507385850720138309e-308
#define DBL_MAX 1.79769313486231570815e+308
#define DBL_EPSILON 2.22044604925031308085e-16
#define DBL_MANT_DIG 53
#define DBL_MIN_EXP (-1021)
#define DBL_MAX_EXP 1024
#define DBL_HAS_SUBNORM 1
#define DBL_DIG 15
#define DBL_DECIMAL_DIG 17
#define DBL_MIN_10_EXP (-307)
#define DBL_MAX_10_EXP 308
//NOTE: long double is an allowed type in WASM32 but casting that long double to double results in an import for __trunctfdf2 being generated. We're just not going to use long doubles for now...
// #define LDBL_HAS_SUBNORM 1
// #define LDBL_DECIMAL_DIG DECIMAL_DIG
// #define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
// #define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
// #define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
// #define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
// #define LDBL_MANT_DIG 113
// #define LDBL_MIN_EXP (-16381)
// #define LDBL_MAX_EXP 16384
// #define LDBL_DIG 33
// #define LDBL_MIN_10_EXP (-4931)
// #define LDBL_MAX_10_EXP 4932
#define DECIMAL_DIG 36
#endif // _FLOAT_H

210
std/math.h Normal file
View File

@@ -0,0 +1,210 @@
/*
File: math.h
Author: Taylor Robbins
Date: 08\28\2025
*/
#ifndef _MATH_H
#define _MATH_H
//NOTE: clang was shadowing our intrinsics implementations (like floor, ceil, scalbnf, sqrt, etc.) with it's builtin ones,
// we've decided to route through our functions using macros and functions prefixed with _.
// Even if they route to builtins at the end of the day, this gives us the ability
// to test our implementation and the control to route these however we want.
// +--------------------------------------------------------------+
// | Defines |
// +--------------------------------------------------------------+
#define NAN __builtin_nanf("")
#define INFINITY __builtin_inff()
#define FP_NAN 0
#define FP_INFINITE 1
#define FP_ZERO 2
#define FP_SUBNORMAL 3
#define FP_NORMAL 4
#define M_E 2.7182818284590452354 // e
#define M_LOG2E 1.4426950408889634074 // log_2 e
#define M_LOG10E 0.43429448190325182765 // log_10 e
#define M_LN2 0.69314718055994530942 // log_e 2
#define M_LN10 2.30258509299404568402 // log_e 10
#define M_PI 3.14159265358979323846 // pi
#define M_PI_2 1.57079632679489661923 // pi/2
#define M_PI_4 0.78539816339744830962 // pi/4
#define M_1_PI 0.31830988618379067154 // 1/pi
#define M_2_PI 0.63661977236758134308 // 2/pi
#define M_2_SQRTPI 1.12837916709551257390 // 2/sqrt(pi)
#define M_SQRT2 1.41421356237309504880 // sqrt(2)
#define M_SQRT1_2 0.70710678118654752440 // 1/sqrt(2)
// +--------------------------------------------------------------+
// | Macros |
// +--------------------------------------------------------------+
#define isinf(value) (sizeof(value) == sizeof(float) \
? (__FLOAT_BITS((float)value) & 0x7fffffff) == 0x7f800000 \
: (__DOUBLE_BITS((double)value) & -1ULL>>1) == 0x7ffULL<<52 \
)
#define isnan(value) (sizeof(value) == sizeof(float) \
? (__FLOAT_BITS((float)value) & 0x7fffffff) > 0x7f800000 \
: (__DOUBLE_BITS((double)value) & -1ULL>>1) > 0x7ffULL<<52 \
)
#define signbit(value) (sizeof(value) == sizeof(float) \
? (int)(__FLOAT_BITS((float)value)>>31) \
: (int)(__DOUBLE_BITS((double)value)>>63) \
)
#define isnormal(value) (sizeof(value) == sizeof(float) \
? ((__FLOAT_BITS(value)+0x00800000) & 0x7fffffff) >= 0x01000000 \
: ((__DOUBLE_BITS(value)+(1ULL<<52)) & -1ULL>>1) >= 1ULL<<53 \
)
#define fpclassify(value) (sizeof(value) == sizeof(float) \
? __fpclassifyf(value) \
: __fpclassify(value) \
)
#define FORCE_EVAL(value) do { \
if (sizeof(value) == sizeof(float)) { fp_force_evalf(value); } \
else { fp_force_eval(value); } \
} while(0)
#define asuint(value) ((union { float _value; uint32_t _integer; }){ value })._integer
#define asfloat(value) ((union { uint32_t _value; float _float; }){ value })._float
#define asuint64(value) ((union { double _value; uint64_t _integer; }){ value })._integer
#define asdouble(value) ((union { uint64_t _value; double _double; }){ value })._double
#define EXTRACT_WORDS(highWord, lowWord, value) do { uint64_t __u = asuint64(value); (highWord) = (__u >> 32); (lowWord) = (uint32_t)__u; } while (0)
#define INSERT_WORDS(doubleVar, highWord, lowWord) do { (doubleVar) = asdouble(((uint64_t)(highWord)<<32) | (uint32_t)(lowWord)); } while (0)
#define GET_HIGH_WORD(wordVar, value) do { (wordVar) = asuint64(value) >> 32; } while (0)
#define GET_LOW_WORD(wordVar, value) do { (wordVar) = (uint32_t)asuint64(value); } while (0)
#define SET_HIGH_WORD(doubleVar, wordValue) INSERT_WORDS(doubleVar, wordValue, (uint32_t)asuint64(doubleVar))
#define SET_LOW_WORD(doubleVar, wordValue) INSERT_WORDS(doubleVar, asuint64(doubleVar)>>32, wordValue)
#define GET_FLOAT_WORD(wordVar, value) do { (wordVar) = asuint(value); } while (0)
#define SET_FLOAT_WORD(floatVar, value) do { (floatVar) = asfloat(value); } while (0)
// Helps static branch prediction so hot path can be better optimized.
#define predict_true(condition) __builtin_expect(!!(condition), 1)
#define predict_false(condition) __builtin_expect(condition, 0)
MAYBE_START_EXTERN_C
// +--------------------------------------------------------------+
// | Builtin Functions |
// +--------------------------------------------------------------+
#define copysignf(magnitude, sign) __builtin_copysignf((magnitude), (sign)) //results in f32.copysign instruction
#define copysign(magnitude, sign) __builtin_copysign((magnitude), (sign)) //results in f64.copysign instruction
#define fabsf(value) __builtin_fabsf(value) //results in f32.abs instruction
#define fabs(value) __builtin_fabs(value) //results in f64.abs instruction
#define floorf(value) __builtin_floorf(value) //results in f32.floor instruction
#define floor(value) __builtin_floor(value) //results in f64.floor instruction
#define ceilf(value) __builtin_ceilf(value) //results in f32.ceil instruction
#define ceil(value) __builtin_ceil(value) //results in f64.ceil instruction
// +--------------------------------------------------------------+
// | Helper Functions |
// +--------------------------------------------------------------+
int __fpclassifyf(float value);
int __fpclassify(double value);
unsigned __FLOAT_BITS(float value);
unsigned long long __DOUBLE_BITS(double value);
void fp_force_evalf(float value);
void fp_force_eval(double value);
float __math_invalidf(float value);
double __math_invalid(double value);
float eval_as_float(float x);
double eval_as_double(double x);
float __math_divzerof(uint32_t sign);
double __math_divzero(uint32_t sign);
// Top 16 bits of a double.
uint32_t top16(double x);
// Top 12 bits of a double (sign and exponent bits).
uint32_t top12(double value);
uint32_t top12f(float x);
float fp_barrierf(float x);
double fp_barrier(double x);
// +--------------------------------------------------------------+
// | Functions |
// +--------------------------------------------------------------+
// TODO: long double fabsl(long double value);
// TODO: long double fmodl(long double numer, long double denom);
float fminf(float value1, float value2);
double fmin(double value1, double value2);
float fmaxf(float value1, float value2);
double fmax(double value1, double value2);
float fmodf(float numer, float denom);
double fmod(double numer, double denom);
float roundf(float value);
double round(double value);
float _scalbnf(float value, int power);
double _scalbn(double value, int power);
// TODO: long double _scalbnl(long double value, int power);
#define scalbnf(value, power) _scalbnf(value, power)
#define scalbn(value, power) _scalbn(value, power)
float sqrtf(float value);
double sqrt(double value);
float _cbrtf(float value);
double _cbrt(double value);
#define cbrtf(value) _cbrtf(value)
#define cbrt(value) _cbrt(value)
float sinf(float value);
double sin(double value);
float cosf(float value);
double cos(double value);
float tanf(float value);
double tan(double value);
float asinf(float value);
double asin(double value);
float acosf(float value);
double acos(double value);
float atanf(float value);
double atan(double value);
float atan2f(float numer, float denom);
double atan2(double numer, double denom);
float powf(float value, float exponent);
double pow(double value, double exponent);
float logf(float value);
double log(double value);
float log2f(float value);
double log2(double value);
float log10f(float value);
double log10(double x);
float ldexpf(float value, int exponent);
double ldexp(double value, int exponent);
float expf(float value);
double exp(double value);
// TODO: long double copysignl(long double magnitude, long double sign);
MAYBE_END_EXTERN_C
#endif // _MATH_H

View File

@@ -9,13 +9,26 @@ Description:
#include <limits.h>
#include <stdint.h>
#include <uchar.h>
#include <float.h>
#include <stdbool.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <stddef.h>
#include <assert.h>
#include <math.h>
#include "std_js_imports.h"
MAYBE_START_EXTERN_C
#include "std_memset.c"
#include "std_printf.c"
#include "std_math_helpers.c"
#include "std_math_basic.c"
#include "std_misc.c"
#include "std_math_pow.c"
#include "std_math_trig.c"
#include "std_mem.c"
MAYBE_END_EXTERN_C

271
std/src/std_math_basic.c Normal file
View File

@@ -0,0 +1,271 @@
/*
File: std_math_basic.c
Author: Taylor Robbins
Date: 08\28\2025
Description:
** Holds implementations for "basic" math functions exposed in math.h like round()
** And for various float related functions like __FLOAT_BITS, eval_as_float, __fpclassifyf, etc.
*/
float fminf(float value1, float value2) { return (value1 <= value2) ? value1 : value2; }
double fmin(double value1, double value2) { return (value1 <= value2) ? value1 : value2; }
float fmaxf(float value1, float value2) { return (value1 >= value2) ? value1 : value2; }
double fmax(double value1, double value2) { return (value1 >= value2) ? value1 : value2; }
float fmodf(float numer, float denom)
{
union { float value; uint32_t integer; } numerUnion = {numer}, denomUnion = {denom};
int numerExponent = (numerUnion.integer >> 23) & 0xFF;
int denomExponent = (denomUnion.integer >> 23) & 0xFF;
uint32_t numerSign = numerUnion.integer & 0x80000000;
uint32_t index;
uint32_t numerInteger = numerUnion.integer;
if ((denomUnion.integer << 1) == 0 || isnan(denom) || numerExponent == 0xFF)
{
return (numer*denom) / (numer*denom);
}
if ((numerInteger << 1) <= (denomUnion.integer << 1))
{
if ((numerInteger << 1) == (denomUnion.integer << 1)) { return 0*numer; }
return numer;
}
// normalize numerator and denominator
if (!numerExponent)
{
for (index = (numerInteger << 9); (index >> 31) == 0; numerExponent--, index <<= 1);
numerInteger <<= -numerExponent + 1;
}
else
{
numerInteger &= -1U >> 9;
numerInteger |= 1U << 23;
}
if (!denomExponent)
{
for (index = (denomUnion.integer << 9); (index >> 31) == 0; denomExponent--, index <<= 1);
denomUnion.integer <<= -denomExponent + 1;
}
else
{
denomUnion.integer &= -1U >> 9;
denomUnion.integer |= 1U << 23;
}
// numerator mod denominator
for (; numerExponent > denomExponent; numerExponent--)
{
index = numerInteger - denomUnion.integer;
if (index >> 31 == 0)
{
if (index == 0) { return 0*numer; }
numerInteger = index;
}
numerInteger <<= 1;
}
index = numerInteger - denomUnion.integer;
if (index >> 31 == 0)
{
if (index == 0) { return 0*numer; }
numerInteger = index;
}
for (; (numerInteger >> 23) == 0; numerInteger <<= 1, numerExponent--);
// scale result up
if (numerExponent > 0)
{
numerInteger -= 1U << 23;
numerInteger |= (uint32_t)numerExponent << 23;
}
else
{
numerInteger >>= -numerExponent + 1;
}
numerInteger |= numerSign;
numerUnion.integer = numerInteger;
return numerUnion.value;
}
double fmod(double numer, double denom)
{
union { double value; uint64_t integer; } numerUnion = {numer}, denomUnion = {denom};
int numerExponent = (numerUnion.integer >> 52) & 0x7FF;
int denomExponent = (denomUnion.integer >> 52) & 0x7FF;
int numerSign = (numerUnion.integer >> 63);
uint64_t index;
// in the followings numerInteger should be numerUnion.integer, but then gcc wrongly adds
// float load/store to inner loops ruining performance and code size
uint64_t numerInteger = numerUnion.integer;
if ((denomUnion.integer << 1) == 0 || isnan(denom) || numerExponent == 0x7FF)
{
return (numer*denom)/(numer*denom);
}
if ((numerInteger << 1) <= (denomUnion.integer << 1))
{
if ((numerInteger << 1) == (denomUnion.integer << 1)) { return 0*numer; }
return numer;
}
// normalize numerator and denominator
if (!numerExponent)
{
for (index = (numerInteger << 12); (index >> 63) == 0; numerExponent--, index <<= 1);
numerInteger <<= -numerExponent + 1;
}
else
{
numerInteger &= -1ULL >> 12;
numerInteger |= 1ULL << 52;
}
if (!denomExponent)
{
for (index = (denomUnion.integer << 12); (index >> 63) == 0; denomExponent--, index <<= 1);
denomUnion.integer <<= -denomExponent + 1;
}
else
{
denomUnion.integer &= -1ULL >> 12;
denomUnion.integer |= 1ULL << 52;
}
// numerator mod denominator
for (; numerExponent > denomExponent; numerExponent--)
{
index = numerInteger - denomUnion.integer;
if (index >> 63 == 0)
{
if (index == 0) { return 0*numer; }
numerInteger = index;
}
numerInteger <<= 1;
}
index = numerInteger - denomUnion.integer;
if (index >> 63 == 0)
{
if (index == 0) { return 0*numer; }
numerInteger = index;
}
for (; (numerInteger >> 52) == 0; numerInteger <<= 1, numerExponent--);
// scale result
if (numerExponent > 0)
{
numerInteger -= 1ULL << 52;
numerInteger |= (uint64_t)numerExponent << 52;
}
else
{
numerInteger >>= -numerExponent + 1;
}
numerInteger |= (uint64_t)numerSign << 63;
numerUnion.integer = numerInteger;
return numerUnion.value;
}
float roundf(float value)
{
union { float value; uint32_t integer; } valueUnion = { value };
int valueExponent = ((valueUnion.integer >> 23) & 0xFF);
float_t result;
if (valueExponent >= 0x7F+23) { return value; }
if (valueUnion.integer >> 31) { value = -value; }
if (valueExponent < 0x7F-1)
{
//raise inexact if value!=0
FORCE_EVAL(value + tointf);
return 0*valueUnion.value;
}
result = value + tointf - tointf - value;
if (result > 0.5f) { result = result + value - 1; }
else if (result <= -0.5f) { result = result + value + 1; }
else { result = result + value; }
if (valueUnion.integer >> 31) { result = -result; }
return result;
}
double round(double value)
{
union { double value; uint64_t integer; } valueUnion = { value };
int valueExponent = ((valueUnion.integer >> 52) & 0x7FF);
double_t result;
if (valueExponent >= 0x3FF+52) { return value; }
if (valueUnion.integer >> 63) { value = -value; }
if (valueExponent < 0x3FF-1)
{
//raise inexact if value!=0
FORCE_EVAL(value + tointd);
return 0 * valueUnion.value;
}
result = value + tointd - tointd - value;
if (result > 0.5) { result = result + value - 1; }
else if (result <= -0.5) { result = result + value + 1; }
else { result = result + value; }
if (valueUnion.integer >> 63) { result = -result; }
return result;
}
int __fpclassifyf(float value)
{
union { float value; uint32_t integer; } valueUnion = { value };
int exponent = (valueUnion.integer >> 23) & 0xFF;
if (exponent == 0) { return (valueUnion.integer << 1) ? FP_SUBNORMAL : FP_ZERO; }
if (exponent == 0xFF) { return (valueUnion.integer << 9) ? FP_NAN : FP_INFINITE; }
return FP_NORMAL;
}
int __fpclassify(double value)
{
union { double value; uint64_t integer; } valueUnion = { value };
int e = (valueUnion.integer >> 52) & 0x7FF;
if (!e) { return (valueUnion.integer << 1) ? FP_SUBNORMAL : FP_ZERO; }
if (e==0x7FF) { return (valueUnion.integer << 12) ? FP_NAN : FP_INFINITE; }
return FP_NORMAL;
}
unsigned __FLOAT_BITS(float value)
{
union { float value; unsigned integer; } valueUnion = { value };
return valueUnion.integer;
}
unsigned long long __DOUBLE_BITS(double value)
{
union { double value; unsigned long long integer; } valueUnion = { value };
return valueUnion.integer;
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-but-set-variable"
void fp_force_evalf(float value)
{
volatile float volatileValue;
volatileValue = value;
}
void fp_force_eval(double value)
{
volatile double volatileValue;
volatileValue = value;
}
#pragma clang diagnostic pop
float __math_invalidf(float value)
{
return (value - value) / (value - value);
}
double __math_invalid(double value)
{
return (value - value) / (value - value);
}
float eval_as_float(float x)
{
float y = x;
return y;
}
double eval_as_double(double x)
{
double y = x;
return y;
}

2469
std/src/std_math_helpers.c Normal file

File diff suppressed because it is too large Load Diff

962
std/src/std_math_pow.c Normal file
View File

@@ -0,0 +1,962 @@
/*
File: std_math_pow.c
Author: Taylor Robbins
Date: 08\28\2025
Description:
** Holds implementation for power related functions like pow(), log(), exp(), ldexp(), etc.
*/
float powf(float base, float exponent)
{
uint32_t signBias = 0;
uint32_t baseInt, exponentInt;
baseInt = asuint(base);
exponentInt = asuint(exponent);
if (predict_false(baseInt - 0x00800000 >= 0x7F800000 - 0x00800000 || zeroinfnan32(exponentInt)))
{
// Either (base < 0x1p-126 or inf or nan) or (exponent is 0 or inf or nan).
if (predict_false(zeroinfnan32(exponentInt)))
{
if (2 * exponentInt == 0) { return 1.0f; }
if (baseInt == 0x3F800000) { return 1.0f; }
if (2 * baseInt > 2u * 0x7F800000 || 2 * exponentInt > 2u * 0x7F800000) { return base + exponent; }
if (2 * baseInt == 2 * 0x3F800000) { return 1.0f; }
if ((2 * baseInt < 2 * 0x3F800000) == !(exponentInt & 0x80000000)) { return 0.0f; } // |base|<1 && exponent==inf or |base|>1 && exponent==-inf.
return exponent * exponent;
}
if (predict_false(zeroinfnan32(baseInt)))
{
float_t baseSquared = base * base;
if (baseInt & 0x80000000 && checkint32(exponentInt) == 1) { baseSquared = -baseSquared; }
// Without the barrier some versions of clang hoist the 1/baseSquared and
// thus division by zero exception can be signaled spuriously.
return exponentInt & 0x80000000 ? fp_barrierf(1 / baseSquared) : baseSquared;
}
// base and exponent are non-zero finite.
if (baseInt & 0x80000000)
{
// Finite base < 0.
int exponentType = checkint32(exponentInt);
if (exponentType == 0) { return __math_invalidf(base); }
if (exponentType == 1) { signBias = exp2inline_SIGN_BIAS; }
baseInt &= 0x7FFFFFFF;
}
if (baseInt < 0x00800000)
{
// Normalize subnormal base so exponent becomes negative.
baseInt = asuint(base * 0x1p23F);
baseInt &= 0x7FFFFFFF;
baseInt -= 23 << 23;
}
}
double_t logBase = log2_inline(baseInt);
double_t exponentLogBase = exponent * logBase; // cannot overflow, exponent is single prec.
if (predict_false((asuint64(exponentLogBase) >> 47 & 0xFFFF) >= asuint64(126.0 * POWF_SCALE) >> 47))
{
// |exponent*log(base)| >= 126.
if (exponentLogBase > 0x1.FFFFFFFD1D571p+6 * POWF_SCALE) { return __math_oflowf(signBias); }
if (exponentLogBase <= -150.0 * POWF_SCALE) { return __math_uflowf(signBias); }
}
return exp2_inline(exponentLogBase, signBias);
}
double pow(double base, double exponent)
{
uint32_t signBias = 0;
uint64_t baseInt, exponentInt;
uint32_t baseTop12, exponentTop12;
baseInt = asuint64(base);
exponentInt = asuint64(exponent);
baseTop12 = top12(base);
exponentTop12 = top12(exponent);
if (predict_false(baseTop12 - 0x001 >= 0x7FF - 0x001 || (exponentTop12 & 0x7FF) - 0x3BE >= 0x43E - 0x3BE))
{
// Note: if |exponent| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(base,exponent) = inf/0
// and if |exponent| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(base,exponent) = +-1.
// Special cases: (base < 0x1p-126 or inf or nan) or
// (|exponent| < 0x1p-65 or |exponent| >= 0x1p63 or nan).
if (predict_false(zeroinfnan64(exponentInt)))
{
if (2 * exponentInt == 0) { return 1.0; }
if (baseInt == asuint64(1.0)) { return 1.0; }
if (2 * baseInt > 2 * asuint64(INFINITY) || 2 * exponentInt > 2 * asuint64(INFINITY)) { return base + exponent; }
if (2 * baseInt == 2 * asuint64(1.0)) { return 1.0; }
if ((2 * baseInt < 2 * asuint64(1.0)) == !(exponentInt >> 63)) { return 0.0; } // |base|<1 && exponent==inf or |base|>1 && exponent==-inf.
return exponent * exponent;
}
if (predict_false(zeroinfnan64(baseInt)))
{
double_t xSquared = base * base;
if (baseInt >> 63 && checkint64(exponentInt) == 1) { xSquared = -xSquared; }
// Without the barrier some versions of clang hoist the 1/xSquared and
// thus division by zero exception can be signaled spuriously.
return (exponentInt >> 63) ? fp_barrier(1 / xSquared) : xSquared;
}
// Here base and exponent are non-zero finite.
if (baseInt >> 63)
{
// Finite base < 0.
int exponentType = checkint64(exponentInt);
if (exponentType == 0) { return __math_invalid(base); }
if (exponentType == 1) { signBias = expinline_SIGN_BIAS; }
baseInt &= 0x7FFFFFFFFFFFFFFF;
baseTop12 &= 0x7FF;
}
if ((exponentTop12 & 0X7FF) - 0X3BE >= 0X43E - 0X3BE)
{
// Note: signBias == 0 here because exponent is not odd.
if (baseInt == asuint64(1.0)) { return 1.0; }
if ((exponentTop12 & 0x7FF) < 0x3BE)
{
// |exponent| < 2^-65, base^exponent ~= 1 + exponent*log(base).
return 1.0;
}
return (baseInt > asuint64(1.0)) == (exponentTop12 < 0x800)
? __math_oflow(0)
: __math_uflow(0);
}
if (baseTop12 == 0)
{
// Normalize subnormal base so exponent becomes negative.
baseInt = asuint64(base * 0x1p52);
baseInt &= 0x7FFFFFFFFFFFFFFF;
baseInt -= (52ULL << 52);
}
}
double_t baseLow1;
double_t baseHigh1 = log_inline(baseInt, &baseLow1);
double_t exponentHigh1, exponentLow1;
double_t exponentHigh2 = asdouble(exponentInt & (-1ULL << 27));
double_t exponentLow2 = exponent - exponentHigh2;
double_t baseHigh2 = asdouble(asuint64(baseHigh1) & (-1ULL << 27));
double_t baseLow2 = baseHigh1 - baseHigh2 + baseLow1;
exponentHigh1 = exponentHigh2 * baseHigh2;
exponentLow1 = exponentLow2 * baseHigh2 + exponent * baseLow2; // |exponentLow1| < |exponentHigh1| * 2^-25.
return exp_inline(exponentHigh1, exponentLow1, signBias);
}
float logf(float value)
{
double_t zVar, rVar, rVarSquared, result, yVar, cInverse, logc;
uint32_t valueInt, zVarInt, temp;
int vVar, index;
valueInt = asuint(value);
// Fix sign of zero with downward rounding when value==1.
if (predict_false(valueInt == 0x3F800000)) { return 0; }
if (predict_false(valueInt - 0x00800000 >= 0x7F800000 - 0x00800000))
{
// value < 0x1p-126 or inf or nan.
if (valueInt * 2 == 0) { return __math_divzerof(1); }
if (valueInt == 0x7F800000) { return value; } // log(inf) == inf.
if ((valueInt & 0x80000000) || valueInt * 2 >= 0xFF000000) { return __math_invalidf(value); }
// value is subnormal, normalize it.
valueInt = asuint(value * 0x1p23F);
valueInt -= 23 << 23;
}
// value = 2^vVar zVar; where zVar is in range [OFF,2*OFF] and exact.
// The range is split into N subintervals.
// The ith subinterval contains zVar and c is near its center.
temp = valueInt - logf_OFF;
index = (temp >> (23 - LOGF_TABLE_BITS)) % logf_N;
vVar = ((int32_t)temp >> 23); // arithmetic shift
zVarInt = valueInt - (temp & 0x1FF << 23);
cInverse = logf_T[index].invc;
logc = logf_T[index].logc;
zVar = (double_t)asfloat(zVarInt);
// log(value) = log1p(zVar/c-1) + log(c) + vVar*Ln2
rVar = (zVar * cInverse) - 1;
yVar = logc + ((double_t)vVar * logf_Ln2);
// Pipelined polynomial evaluation to approximate log1p(rVar).
rVarSquared = (rVar * rVar);
result = (logf_A[1] * rVar) + logf_A[2];
result = (logf_A[0] * rVarSquared) + result;
result = (result * rVarSquared) + (yVar + rVar);
return eval_as_float(result);
}
double log(double value)
{
double_t wVar, zVar, rVar, rVarSquared, rVarCubed, result, cInverse, logc, vVarDelta, resultHigh, resultLow;
uint64_t valueInt, zVarInt, tmp;
uint32_t top;
int vVar, index;
valueInt = asuint64(value);
top = top16(value);
if (predict_false(valueInt - asuint64(1.0 - 0x1p-4) < asuint64(1.0 + 0x1.09p-4) - asuint64(1.0 - 0x1p-4)))
{
// Handle close to 1.0 inputs separately.
// Fix sign of zero with downward rounding when value==1.
if (predict_false(valueInt == asuint64(1.0))) { return 0; }
rVar = value - 1.0;
rVarSquared = rVar * rVar;
rVarCubed = rVar * rVarSquared;
result = rVarCubed *
(log_B[1] + rVar * log_B[2] + rVarSquared * log_B[3] +
rVarCubed * (log_B[4] + rVar * log_B[5] + rVarSquared * log_B[6] +
rVarCubed * (log_B[7] + rVar * log_B[8] + rVarSquared * log_B[9] + rVarCubed * log_B[10])));
// Worst-case error is around 0.507 ULP.
wVar = rVar * 0x1p27;
double_t rhi = rVar + wVar - wVar;
double_t rlo = rVar - rhi;
wVar = rhi * rhi * log_B[0]; // log_B[0] == -0.5.
resultHigh = rVar + wVar;
resultLow = rVar - resultHigh + wVar;
resultLow += log_B[0] * rlo * (rhi + rVar);
result += resultLow;
result += resultHigh;
return eval_as_double(result);
}
if (predict_false(top - 0x0010 >= 0x7FF0 - 0x0010))
{
// value < 0x1p-1022 or inf or nan.
if (valueInt * 2 == 0) { return __math_divzero(1); }
if (valueInt == asuint64(INFINITY)) { return value; } // log(inf) == inf.
if ((top & 0x8000) || (top & 0x7FF0) == 0x7FF0) { return __math_invalid(value); }
// value is subnormal, normalize it.
valueInt = asuint64(value * 0x1p52);
valueInt -= 52ULL << 52;
}
// value = 2^vVar zVar; where zVar is in range [OFF,2*OFF) and exact.
// The range is split into N subintervals.
// The ith subinterval contains zVar and c is near its center.
tmp = valueInt - log_OFF;
index = (tmp >> (52 - LOG_TABLE_BITS)) % log_N;
vVar = (int64_t)tmp >> 52; /* arithmetic shift */
zVarInt = valueInt - (tmp & 0xfffULL << 52);
cInverse = log_T[index].invc;
logc = log_T[index].logc;
zVar = asdouble(zVarInt);
// log(value) = log1p(zVar/c-1) + log(c) + vVar*Ln2.
// rVar ~= zVar/c - 1, |rVar| < 1/(2*N).
// rounding error: 0x1p-55/N + 0x1p-66.
rVar = (zVar - log_T2[index].chi - log_T2[index].clo) * cInverse;
vVarDelta = (double_t)vVar;
// resultHigh + resultLow = rVar + log(c) + vVar*Ln2.
wVar = vVarDelta * log_Ln2hi + logc;
resultHigh = wVar + rVar;
resultLow = wVar - resultHigh + rVar + vVarDelta * log_Ln2lo;
// log(value) = resultLow + (log1p(rVar) - rVar) + resultHigh.
rVarSquared = rVar * rVar; // rounding error: 0x1p-54/N^2.
// Worst case error if |result| > 0x1p-5:
// 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
// Worst case error if |result| > 0x1p-4:
// 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).
result = resultLow + (rVarSquared * log_A[0]) +
rVar * rVarSquared * (log_A[1] + rVar * log_A[2] + rVarSquared * (log_A[3] + rVar * log_A[4])) + resultHigh;
return eval_as_double(result);
}
float log2f(float value)
{
double_t zVar, rVar, rVarSquared, pVar, result, yVar, vInverse, logc;
uint32_t valueInt, zVarInt, top, tmp;
int k, i;
valueInt = asuint(value);
// Fix sign of zero with downward rounding when value==1.
if (predict_false(valueInt == 0x3F800000)) { return 0; }
if (predict_false(valueInt - 0x00800000 >= 0x7F800000 - 0x00800000))
{
// value < 0x1p-126 or inf or nan.
if (valueInt * 2 == 0) { return __math_divzerof(1); }
if (valueInt == 0x7F800000) { return value; } // log2(inf) == inf.
if ((valueInt & 0x80000000) || valueInt * 2 >= 0xFF000000) { return __math_invalidf(value); }
// value is subnormal, normalize it.
valueInt = asuint(value * 0x1p23f);
valueInt -= (23 << 23);
}
// value = 2^k zVar; where zVar is in range [OFF,2*OFF] and exact.
// The range is split into N subintervals.
// The ith subinterval contains zVar and c is near its center.
tmp = valueInt - log2f_OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % log2f_N;
top = (tmp & 0xFF800000);
zVarInt = valueInt - top;
k = (int32_t)tmp >> 23; // arithmetic shift
vInverse = log2f_T[i].invc;
logc = log2f_T[i].logc;
zVar = (double_t)asfloat(zVarInt);
// log2(value) = log1p(z/c-1)/ln2 + log2(c) + k
rVar = zVar * vInverse - 1;
yVar = logc + (double_t)k;
// Pipelined polynomial evaluation to approximate log1p(rVar)/ln2.
rVarSquared = rVar * rVar;
result = log2f_A[1] * rVar + log2f_A[2];
result = log2f_A[0] * rVarSquared + result;
pVar = log2f_A[3] * rVar + yVar;
result = result * rVarSquared + pVar;
return eval_as_float(result);
}
double log2(double value)
{
double_t zVar, rVar, rVarSquared, rVarQuad, result, cInverse, logc, tempDouble, resultHigh, resultLow, tVar1, tVar2, tVar3, polyValue;
uint64_t valueInt, iz, tmp;
uint32_t top;
int temp, index;
valueInt = asuint64(value);
top = top16(value);
if (predict_false(valueInt - asuint64(1.0 - 0x1.5B51p-5) < asuint64(1.0 + 0x1.6AB2p-5) - asuint64(1.0 - 0x1.5B51p-5)))
{
// Handle close to 1.0 inputs separately.
// Fix sign of zero with downward rounding when value==1.
if (predict_false(valueInt == asuint64(1.0))) { return 0; }
rVar = value - 1.0;
double_t rVarHigh, rVarLow;
rVarHigh = asdouble(asuint64(rVar) & -1ULL << 32);
rVarLow = rVar - rVarHigh;
resultHigh = (rVarHigh * log2_InvLn2hi);
resultLow = (rVarLow * log2_InvLn2hi) + (rVar * log2_InvLn2lo);
rVarSquared = rVar * rVar; // rounding error: 0x1p-62.
rVarQuad = rVarSquared * rVarSquared;
// Worst-case error is less than 0.54 ULP (0.55 ULP without fma).
polyValue = rVarSquared * (log2_B[0] + (rVar * log2_B[1]));
result = resultHigh + polyValue;
resultLow += resultHigh - result + polyValue;
resultLow += rVarQuad * (log2_B[2] + rVar * log2_B[3] + rVarSquared * (log2_B[4] + rVar * log2_B[5]) +
rVarQuad * (log2_B[6] + rVar * log2_B[7] + rVarSquared * (log2_B[8] + rVar * log2_B[9])));
result += resultLow;
return eval_as_double(result);
}
if (predict_false(top - 0x0010 >= 0x7FF0 - 0x0010))
{
// value < 0x1p-1022 or inf or nan.
if (valueInt * 2 == 0) { return __math_divzero(1); }
if (valueInt == asuint64(INFINITY)) { return value; } // log(inf) == inf.
if ((top & 0x8000) || (top & 0x7FF0) == 0x7FF0) { return __math_invalid(value); }
// value is subnormal, normalize it.
valueInt = asuint64(value * 0x1p52);
valueInt -= 52ULL << 52;
}
// value = 2^temp zVar; where zVar is in range [OFF,2*OFF) and exact.
// The range is split into N subintervals.
// The ith subinterval contains zVar and c is near its center.
tmp = valueInt - log2_OFF;
index = (tmp >> (52 - LOG2_TABLE_BITS)) % log2_N;
temp = (int64_t)tmp >> 52; // arithmetic shift
iz = valueInt - (tmp & 0xfffULL << 52);
cInverse = log2_T[index].invc;
logc = log2_T[index].logc;
zVar = asdouble(iz);
tempDouble = (double_t)temp;
// log2(value) = log2(zVar/c) + log2(c) + temp.
// rVar ~= zVar/c - 1, |rVar| < 1/(2*N).
double_t rVarHigh, rVarLow;
// rounding error: 0x1p-55/N + 0x1p-65.
rVar = (zVar - log2_T2[index].chi - log2_T2[index].clo) * cInverse;
rVarHigh = asdouble(asuint64(rVar) & -1ULL << 32);
rVarLow = rVar - rVarHigh;
tVar1 = rVarHigh * log2_InvLn2hi;
tVar2 = rVarLow * log2_InvLn2hi + rVar * log2_InvLn2lo;
// resultHigh + resultLow = rVar/ln2 + log2(c) + temp.
tVar3 = tempDouble + logc;
resultHigh = tVar3 + tVar1;
resultLow = tVar3 - resultHigh + tVar1 + tVar2;
// log2(rVar+1) = rVar/ln2 + rVar^2*poly(rVar).
// Evaluation is optimized assuming superscalar pipelined execution.
rVarSquared = rVar * rVar; // rounding error: 0x1p-54/N^2.
rVarQuad = rVarSquared * rVarSquared;
// Worst-case error if |result| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
// ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).
polyValue = log2_A[0] + rVar * log2_A[1] + rVarSquared * (log2_A[2] + rVar * log2_A[3]) + rVarQuad * (log2_A[4] + rVar * log2_A[5]);
result = resultLow + (rVarSquared * polyValue) + resultHigh;
return eval_as_double(result);
}
float log10f(float value)
{
union { float value; uint32_t integer; } valueUnion = { value };
float_t hfsq, valueSubOne, sVar, sVarSquared, sVarQuad, tVar1, tVar2, tSum, vVarOriginal, highFloat, lowFloat;
uint32_t valueInt;
int vVar;
valueInt = valueUnion.integer;
vVar = 0;
if (valueInt < 0x00800000 || valueInt>>31) // value < 2**-126
{
if (valueInt<<1 == 0) { return -1 / (value * value); } // log(+-0)=-inf
if (valueInt>>31) { return (value - value) / 0.0f; } // log(-#) = NaN
// subnormal number, scale up value
vVar -= 25;
value *= 0x1p25F;
valueUnion.value = value;
valueInt = valueUnion.integer;
}
else if (valueInt >= 0x7F800000) { return value; }
else if (valueInt == 0x3F800000) { return 0; }
// reduce value into [sqrt(2)/2, sqrt(2)]
valueInt += 0x3F800000 - 0x3F3504F3;
vVar += (int)(valueInt>>23) - 0x7F;
valueInt = (valueInt & 0x007FFFFF) + 0x3F3504F3;
valueUnion.integer = valueInt;
value = valueUnion.value;
valueSubOne = value - 1.0f;
sVar = valueSubOne / (2.0f + valueSubOne);
sVarSquared = sVar * sVar;
sVarQuad = sVarSquared * sVarSquared;
tVar1= sVarQuad * (Lg2 + (sVarQuad * Lg4));
tVar2= sVarSquared * (Lg1 + (sVarQuad * Lg3));
tSum = tVar2 + tVar1;
hfsq = 0.5f * valueSubOne * valueSubOne;
highFloat = valueSubOne - hfsq;
valueUnion.value = highFloat;
valueUnion.integer &= 0xFFFFF000;
highFloat = valueUnion.value;
lowFloat = valueSubOne - highFloat - hfsq + (sVar * (hfsq + tSum));
vVarOriginal = vVar;
return (vVarOriginal * log10_2lo) + ((lowFloat + highFloat) * ivln10lo) + (lowFloat * ivln10hi) + (highFloat * ivln10hi) + (vVarOriginal * log10_2hi);
}
double log10(double value)
{
union { double value; uint64_t integer; } valueUnion = { value };
double_t hfsq, valueSubOne, sVar, sVarSquared, sVarQuad, tVar1, tVar2, tSum, vVarOriginal, result, highDouble, lowDouble, resultHigh, resultLow;
uint32_t valueUpperWord;
int vVar;
valueUpperWord = (valueUnion.integer >> 32);
vVar = 0;
if (valueUpperWord < 0x00100000 || (valueUpperWord >> 31))
{
if ((valueUnion.integer << 1) == 0) { return -1 / (value * value); } // log(+-0)=-inf
if (valueUpperWord >> 31) { return (value - value) / 0.0; } // log(-#) = NaN
// subnormal number, scale value up
vVar -= 54;
value *= 0x1p54;
valueUnion.value = value;
valueUpperWord = (valueUnion.integer >> 32);
}
else if (valueUpperWord >= 0x7FF00000) { return value; }
else if (valueUpperWord == 0x3FF00000 && (valueUnion.integer << 32) == 0) { return 0; }
// reduce value into [sqrt(2)/2, sqrt(2)]
valueUpperWord += 0x3FF00000 - 0x3FE6A09E;
vVar += (int)(valueUpperWord >> 20) - 0x3FF;
valueUpperWord = (valueUpperWord & 0x000FFFFF) + 0x3FE6A09E;
valueUnion.integer = (uint64_t)valueUpperWord << 32 | (valueUnion.integer & 0xFFFFFFFF);
value = valueUnion.value;
valueSubOne = value - 1.0;
hfsq = 0.5 * valueSubOne * valueSubOne;
sVar = valueSubOne / (2.0 + valueSubOne);
sVarSquared = sVar * sVar;
sVarQuad = sVarSquared * sVarSquared;
tVar1 = sVarQuad * (Lg2d + sVarQuad * (Lg4d + sVarQuad * Lg6d));
tVar2 = sVarSquared * (Lg1d + sVarQuad * (Lg3d + sVarQuad * (Lg5d + sVarQuad * Lg7d)));
tSum = tVar2 + tVar1;
// See log2.c for details.
// highDouble+lowDouble = valueSubOne - hfsq + sVar*(hfsq+tSum) ~ log(1+valueSubOne)
highDouble = valueSubOne - hfsq;
valueUnion.value = highDouble;
valueUnion.integer &= (uint64_t)-1 << 32;
highDouble = valueUnion.value;
lowDouble = valueSubOne - highDouble - hfsq + (sVar * (hfsq + tSum));
// resultHigh+resultLow ~ log10(1+valueSubOne) + vVar*log10(2)
resultHigh = highDouble * ivln10hid;
vVarOriginal = vVar;
result = vVarOriginal * log10_2hid;
resultLow = vVarOriginal * log10_2lod + (lowDouble + highDouble) * ivln10lod + lowDouble * ivln10hid;
// Extra precision in for adding result is not strictly needed
// since there is no very large cancellation near value = sqrt(2) or
// value = 1/sqrt(2), but we do it anyway since it costs little on CPUs
// with some parallelism and it reduces the error for many args.
sVarQuad = result + resultHigh;
resultLow += (result - sVarQuad) + resultHigh;
resultHigh = sVarQuad;
return resultLow + resultHigh;
}
float _scalbnf(float value, int power)
{
union { float value; uint32_t integer; } valueUnion;
float_t result = value;
if (power > 127)
{
result *= 0x1p127F;
power -= 127;
if (power > 127)
{
result *= 0x1p127F;
power -= 127;
if (power > 127) { power = 127; }
}
}
else if (power < -126)
{
result *= 0x1p-126F * 0x1p24F;
power += 126 - 24;
if (power < -126)
{
result *= 0x1p-126F * 0x1p24F;
power += 126 - 24;
if (power < -126) { power = -126; }
}
}
valueUnion.integer = ((uint32_t)(0x7F + power) << 23);
result = result * valueUnion.value;
return result;
}
double _scalbn(double value, int power)
{
union { double value; uint64_t integer; } valueUnion;
double_t result = value;
if (power > 1023)
{
result *= 0x1p1023;
power -= 1023;
if (power > 1023)
{
result *= 0x1p1023;
power -= 1023;
if (power > 1023) { power = 1023; }
}
}
else if (power < -1022)
{
// make sure final power < -53 to avoid double
// rounding in the subnormal range
result *= 0x1p-1022 * 0x1p53;
power += 1022 - 53;
if (power < -1022)
{
result *= 0x1p-1022 * 0x1p53;
power += 1022 - 53;
if (power < -1022) { power = -1022; }
}
}
valueUnion.integer = ((uint64_t)(0x3FF + power) << 52);
result = result * valueUnion.value;
return result;
}
float ldexpf(float value, int exponent)
{
return scalbnf(value, exponent);
}
double ldexp(double value, int exponent)
{
return scalbn(value, exponent);
}
float expf(float value)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)value;
abstop = top12(value) & 0x7FF;
if (predict_false(abstop >= top12(88.0f)))
{
// |value| >= 88 or value is nan.
if (asuint(value) == asuint(-INFINITY)) { return 0.0f; }
if (abstop >= top12(INFINITY)) { return value + value; }
if (value > 0x1.62E42Ep6F) { return __math_oflowf(0); } // value > log(0x1p128) ~= 88.72
if (value < -0x1.9FE368p6F) { return __math_uflowf(0); } // value < log(0x1p-150) ~= -103.97
}
// value*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.
z = exp2f_InvLn2N * xd;
// Round and convert z to int, the result is in [-150*N, 128*N] and
// ideally ties-to-even rule is used, otherwise the magnitude of r
// can be bigger which gives larger approximation error.
kd = eval_as_double(z + exp2f_SHIFT);
ki = asuint64(kd);
kd -= exp2f_SHIFT;
r = z - kd;
// exp(value) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)
t = exp2f_T[ki % EXP2F_N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = exp2f_C[0] * r + exp2f_C[1];
r2 = r * r;
y = exp2f_C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}
double exp(double value)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(value) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
// Avoid spurious underflow for tiny value.
// Note: 0 is common input.
return 1.0 + value;
if (abstop >= top12(1024.0)) {
if (asuint64(value) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + value;
if (asuint64(value) >> 63)
return __math_uflow(0);
else
return __math_oflow(0);
}
// Large value is special cased below.
abstop = 0;
}
// exp(value) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].
// value = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].
z = exp_InvLn2N * value;
// z - kd is in [-1, 1] in non-nearest rounding modes.
kd = eval_as_double(z + exp_Shift);
ki = asuint64(kd);
kd -= exp_Shift;
r = value + kd * exp_NegLn2hiN + kd * exp_NegLn2loN;
// 2^(k/N) ~= scale * (1 + tail).
idx = 2 * (ki % exp_N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(exp_T[idx]);
// This is only a valid scale when -1023*N < k < 1024*N.
sbits = exp_T[idx + 1] + top;
// exp(value) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).
// Evaluation is optimized assuming superscalar pipelined execution.
r2 = r * r;
// Without fma the worst case error is 0.25/N ulp larger.
// Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.
tmp = tail + r + r2 * (exp_C2 + r * exp_C3) + r2 * r2 * (exp_C4 + r * exp_C5);
if (predict_false(abstop == 0)) { return exp_specialcase(tmp, sbits, ki); }
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}
//NOTE: sqrt is one of the few functions we can use the __builtin version of
#if 1
float sqrtf(float value) { return __builtin_sqrtf(value); }
double sqrt(double value) { return __builtin_sqrt(value); }
#else
float sqrtf(float value)
{
uint32_t valueInt, mVar, mVar1, mVar0, even, ey;
jsPrintString("Called sqrtf!");
valueInt = asuint(value);
if (predict_false(valueInt - 0x00800000 >= 0x7F800000 - 0x00800000))
{
// value < 0x1p-126 or inf or nan.
if (valueInt * 2 == 0) { return value; }
if (valueInt == 0x7F800000) { return value; }
if (valueInt > 0x7F800000) { return __math_invalidf(value); }
// value is subnormal, normalize it.
valueInt = asuint(value * 0x1p23f);
valueInt -= (23 << 23);
}
// value = 4^e mVar; with int e and mVar in [1, 4).
even = (valueInt & 0x00800000);
mVar1 = ((valueInt << 8) | 0x80000000);
mVar0 = ((valueInt << 7) & 0x7FFFFFFF);
mVar = (even ? mVar0 : mVar1);
// 2^e is the exponent part of the return value.
ey = (valueInt >> 1);
ey += (0x3F800000 >> 1);
ey &= 0x7F800000;
// compute rVar ~ 1/sqrt(mVar), sVar ~ sqrt(mVar) with 2 goldschmidt iterations.
static const uint32_t three = 0xC0000000;
uint32_t rVar, sVar, dVar, uVar, iVar;
iVar = (valueInt >> 17) % 128;
rVar = (uint32_t)__rsqrt_table[iVar] << 16;
// |rVar*sqrt(mVar) - 1| < 0x1p-8
sVar = MultiplyU32Overflow(mVar, rVar);
// |sVar/sqrt(mVar) - 1| < 0x1p-8
dVar = MultiplyU32Overflow(sVar, rVar);
uVar = three - dVar;
rVar = MultiplyU32Overflow(rVar, uVar) << 1;
// |rVar*sqrt(mVar) - 1| < 0x1.7bp-16
sVar = MultiplyU32Overflow(sVar, uVar) << 1;
// |sVar/sqrt(mVar) - 1| < 0x1.7bp-16
dVar = MultiplyU32Overflow(sVar, rVar);
uVar = three - dVar;
sVar = MultiplyU32Overflow(sVar, uVar);
// -0x1.03p-28 < sVar/sqrt(mVar) - 1 < 0x1.fp-31
sVar = (sVar - 1)>>6;
// sVar < sqrt(mVar) < sVar + 0x1.08p-23
// compute nearest rounded result.
uint32_t dVar0, dVar1, dVar2;
float result, tVar;
dVar0 = (mVar << 16) - (sVar * sVar);
dVar1 = sVar - dVar0;
dVar2 = dVar1 + sVar + 1;
sVar += (dVar1 >> 31);
sVar &= 0x007FFFFF;
sVar |= ey;
result = asfloat(sVar);
// handle rounding and inexact exception.
uint32_t tiny = (predict_false(dVar2 == 0) ? 0 : 0x01000000);
tiny |= ((dVar1 ^ dVar2) & 0x80000000);
tVar = asfloat(tiny);
result = eval_as_float(result + tVar);
return result;
}
double sqrt(double value)
{
uint64_t valueInt, top, mVar;
// special case handling.
valueInt = asuint64(value);
top = (valueInt >> 52);
if (predict_false((top - 0x001) >= (0x7FF - 0x001)))
{
// value < 0x1p-1022 or inf or nan.
if (valueInt * 2 == 0) { return value; }
if (valueInt == 0x7FF0000000000000) { return value; }
if (valueInt > 0x7FF0000000000000) { return __math_invalid(value); }
// value is subnormal, normalize it.
valueInt = asuint64(value * 0x1p52);
top = valueInt >> 52;
top -= 52;
}
// argument reduction:
// value = 4^e mVar; with integer e, and mVar in [1, 4)
// mVar: fixed point representation [2.62]
// 2^e is the exponent part of the result.
int even = (top & 1);
mVar = ((valueInt << 11) | 0x8000000000000000);
if (even) { mVar >>= 1; }
top = ((top + 0x3FF) >> 1);
/* approximate rVar ~ 1/sqrt(mVar) and sVar ~ sqrt(mVar) when mVar in [1,4)
initial estimate:
7bit table lookup (1bit exponent and 6bit significand).
iterative approximation:
using 2 goldschmidt iterations with 32bit int arithmetics
and a final iteration with 64bit int arithmetics.
details:
the relative error (e = r0 sqrt(mVar)-1) of a linear estimate
(r0 = a mVar + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
a table lookup is faster and needs one less iteration
6 bit lookup table (128b) gives |e| < 0x1.f9p-8
7 bit lookup table (256b) gives |e| < 0x1.fdp-9
for single and double prec 6bit is enough but for quad
prec 7bit is needed (or modified iterations). to avoid
one more iteration >=13bit table would be needed (16k).
a newton-raphson iteration for rVar is
w = rVar*rVar
uVar = 3 - mVar*w
rVar = rVar*uVar/2
can use a goldschmidt iteration for sVar at the end or
sVar = mVar*rVar
first goldschmidt iteration is
sVar = mVar*rVar
uVar = 3 - sVar*rVar
rVar = rVar*uVar/2
sVar = sVar*uVar/2
next goldschmidt iteration is
uVar = 3 - sVar*rVar
rVar = rVar*uVar/2
sVar = sVar*uVar/2
and at the end rVar is not computed only sVar.
they use the same amount of operations and converge at the
same quadratic rate, i.e. if
r1 sqrt(mVar) - 1 = e, then
r2 sqrt(mVar) - 1 = -3/2 e^2 - 1/2 e^3
the advantage of goldschmidt is that the mul for sVar and rVar
are independent (computed in parallel), however it is not
"self synchronizing": it only uses the input mVar in the
first iteration so rounding errors accumulate. at the end
or when switching to larger precision arithmetics rounding
errors dominate so the first iteration should be used.
the fixed point representations are
mVar: 2.30 rVar: 0.32, sVar: 2.30, dVar: 2.30, uVar: 2.30, three: 2.30
and after switching to 64 bit
mVar: 2.62 rVar: 0.64, sVar: 2.62, dVar: 2.62, uVar: 2.62, three: 2.62
*/
static const uint64_t three = 0xC0000000;
uint64_t rVar, sVar, dVar, uVar, tableIndex;
tableIndex = ((valueInt >> 46) % 128);
rVar = ((uint32_t)__rsqrt_table[tableIndex] << 16);
// |rVar sqrt(mVar) - 1| < 0x1.fdp-9
sVar = MultiplyU32Overflow((mVar >> 32), rVar);
// |sVar/sqrt(mVar) - 1| < 0x1.fdp-9
dVar = MultiplyU32Overflow(sVar, rVar);
uVar = three - dVar;
rVar = (MultiplyU32Overflow(rVar, uVar) << 1);
// |rVar sqrt(mVar) - 1| < 0x1.7bp-16
sVar = (MultiplyU32Overflow(sVar, uVar) << 1);
// |sVar/sqrt(mVar) - 1| < 0x1.7bp-16
dVar = MultiplyU32Overflow(sVar, rVar);
uVar = three - dVar;
rVar = (MultiplyU32Overflow(rVar, uVar) << 1);
// |rVar sqrt(mVar) - 1| < 0x1.3704p-29 (measured worst-case)
rVar = (rVar << 32);
sVar = MultiplyU64Overflow(mVar, rVar);
dVar = MultiplyU64Overflow(sVar, rVar);
uVar = (three << 32) - dVar;
sVar = MultiplyU64Overflow(sVar, uVar); // repr: 3.61
// -0x1p-57 < sVar - sqrt(mVar) < 0x1.8001p-61
sVar = ((sVar - 2) >> 9); // repr: 12.52
// -0x1.09p-52 < sVar - sqrt(mVar) < -0x1.fffcp-63
// sVar < sqrt(mVar) < sVar + 0x1.09p-52,
// compute nearest rounded result:
// the nearest result to 52 bits is either sVar or sVar+0x1p-52,
// we can decide by comparing (2^52 sVar + 0.5)^2 to 2^104 mVar.
uint64_t dVar0, dVar1, dVar2;
double result, tVar;
dVar0 = (mVar << 42) - (sVar * sVar);
dVar1 = sVar - dVar0;
dVar2 = dVar1 + sVar + 1;
sVar += (dVar1 >> 63);
sVar &= 0x000FFFFFFFFFFFFF;
sVar |= (top << 52);
result = asdouble(sVar);
// handle rounding modes and inexact exception:
// only (sVar+1)^2 == 2^42 mVar case is exact otherwise
// add a tiny value to cause the fenv effects.
uint64_t tiny = (predict_false(dVar2==0) ? 0 : 0x0010000000000000);
tiny |= ((dVar1 ^ dVar2) & 0x8000000000000000);
tVar = asdouble(tiny);
result = eval_as_double(result + tVar);
return result;
}
#endif
float _cbrtf(float value)
{
double_t tVarCubed, tVar;
union { float value; uint32_t integer; } valueUnion = { value };
uint32_t valueUnsigned = (valueUnion.integer & 0x7FFFFFFF);
if (valueUnsigned >= 0x7F800000) { return (value + value); } // cbrt(NaN,INF) is itself
// rough cbrt to 5 bits
if (valueUnsigned < 0x00800000) // zero or subnormal?
{
if (valueUnsigned == 0) { return value; } // cbrt(+-0) is itself
valueUnion.value = (value * 0x1p24F);
valueUnsigned = (valueUnion.integer & 0x7FFFFFFF);
valueUnsigned = (valueUnsigned / 3) + B2;
}
else { valueUnsigned = (valueUnsigned / 3) + B1; }
valueUnion.integer &= 0x80000000;
valueUnion.integer |= valueUnsigned;
// First step Newton iteration (solving t*t-value/t == 0) to 16 bits. In
// double precision so that its terms can be arranged for efficiency
// without causing overflow or underflow.
tVar = valueUnion.value;
tVarCubed = (tVar * tVar * tVar);
tVar = tVar * ((double_t)value + value + tVarCubed) / (value + tVarCubed + tVarCubed);
// Second step Newton iteration to 47 bits. In double precision for
// efficiency and accuracy.
tVarCubed = (tVar * tVar * tVar);
tVar = tVar * ((double_t)value + value + tVarCubed) / (value + tVarCubed + tVarCubed);
// rounding to 24 bits is perfect in round-to-nearest mode
return tVar;
}
double _cbrt(double value)
{
union { double value; uint64_t integer; } valueUnion = { value };
double_t rVar, sVar, tVar, wVar;
uint32_t upperUnsigned = ((valueUnion.integer >> 32) & 0x7FFFFFFF);
if (upperUnsigned >= 0x7FF00000) { return (value + value); } // cbrt(NaN,INF) is itself
// Rough cbrt to 5 bits:
// cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
// where e is integral and >= 0, m is real and in [0, 1), and "/" and
// "%" are integer division and modulus with rounding towards minus
// infinity. The RHS is always >= the LHS and has a maximum relative
// error of about 1 in 16. Adding a bias of -0.03306235651 to the
// (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
// floating point representation, for finite positive normal values,
// ordinary integer divison of the value in bits magically gives
// almost exactly the RHS of the above provided we first subtract the
// exponent bias (1023 for doubles) and later add it back. We do the
// subtraction virtually to keep e >= 0 so that ordinary integer
// division rounds towards minus infinity; this is also efficient.
if (upperUnsigned < 0x00100000) // zero or subnormal?
{
valueUnion.value = value * 0x1p54;
upperUnsigned = ((valueUnion.integer >> 32) & 0x7FFFFFFF);
if (upperUnsigned == 0) { return value; } // cbrt(0) is itself
upperUnsigned = upperUnsigned/3 + Bd2;
}
else { upperUnsigned = upperUnsigned/3 + Bd1; }
valueUnion.integer &= (1ULL << 63);
valueUnion.integer |= (uint64_t)upperUnsigned << 32;
tVar = valueUnion.value;
// New cbrt to 23 bits:
// cbrt(value) = tVar*cbrt(value/tVar**3) ~= tVar*P(tVar**3/value)
// where P(rVar) is a polynomial of degree 4 that approximates 1/cbrt(rVar)
// to within 2**-23.5 when |rVar - 1| < 1/10. The rough approximation
// has produced tVar such than |tVar/cbrt(value) - 1| ~< 1/32, and cubing this
// gives us bounds for rVar = tVar**3/value.
//
// Try to optimize for parallel evaluation as in __tanf.c.
rVar = (tVar * tVar) * (tVar / value);
tVar = tVar * ((P0 + (rVar * (P1 + (rVar * P2)))) + ((rVar * rVar) * rVar) * (P3 + (rVar * P4)));
// Round tVar away from zero to 23 bits (sloppily except for ensuring that
// the result is larger in magnitude than cbrt(value) but not much more than
// 2 23-bit ulps larger). With rounding towards zero, the error bound
// would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
// in the rounded tVar, the infinite-precision error in the Newton
// approximation barely affects third digit in the final error
// 0.667; the error in the rounded tVar can be up to about 3 23-bit ulps
// before the final error is larger than 0.667 ulps.
valueUnion.value = tVar;
valueUnion.integer = ((valueUnion.integer + 0x80000000) & 0xFFFFFFFFC0000000ULL);
tVar = valueUnion.value;
// one step Newton iteration to 53 bits with error < 0.667 ulps
sVar = tVar * tVar; // tVar*tVar is exact
rVar = value / sVar; // error <= 0.5 ulps; |rVar| < |tVar|
wVar = tVar + tVar; // tVar+tVar is exact
rVar = (rVar - tVar) / (wVar + rVar); // rVar-tVar is exact; wVar+rVar ~= 3*tVar
tVar = tVar + (tVar * rVar); // error <= 0.5 + 0.5/3 + epsilon
return tVar;
}

722
std/src/std_math_trig.c Normal file
View File

@@ -0,0 +1,722 @@
/*
File: std_math_trig.c
Author: Taylor Robbins
Date: 08\28\2025
Description:
** Holds implementation for trigonometric functions like cos(), sin(), tan(), atan(), etc.
*/
// Small multiples of pi/2 rounded to double precision.
static const double
pio2_1x = 1*M_PI_2, // 0x3FF921FB, 0x54442D18
pio2_2x = 2*M_PI_2, // 0x400921FB, 0x54442D18
pio2_3x = 3*M_PI_2, // 0x4012D97C, 0x7F3321D2
pio2_4x = 4*M_PI_2; // 0x401921FB, 0x54442D18
float sinf(float value)
{
double result;
uint32_t invValue;
int temp, sign;
GET_FLOAT_WORD(invValue, value);
sign = (invValue >> 31);
invValue &= 0x7FFFFFFF;
if (invValue <= 0x3F490FDA) // |value| ~<= pi/4
{
if (invValue < 0x39800000) // |value| < 2**-12
{
// raise inexact if value!=0 and underflow if subnormal
FORCE_EVAL((invValue < 0x00800000) ? value / 0x1p120F : value + 0x1p120F);
return value;
}
return __sindf(value);
}
if (invValue <= 0x407B53D1) // |value| ~<= 5*pi/4
{
if (invValue <= 0x4016CBE3) // |value| ~<= 3pi/4
{
if (sign) { return -__cosdf(value + pio2_1x); }
else { return __cosdf(value - pio2_1x); }
}
return __sindf(sign ? -(value + pio2_2x) : -(value - pio2_2x));
}
if (invValue <= 0x40E231D5) // |value| ~<= 9*pi/4
{
if (invValue <= 0x40AFEDDF) // |value| ~<= 7*pi/4
{
if (sign) { return __cosdf(value + pio2_3x); }
else { return -__cosdf(value - pio2_3x); }
}
return __sindf(sign ? value + pio2_4x : value - pio2_4x);
}
// sin(Inf or NaN) is NaN
if (invValue >= 0x7F800000) { return value - value; }
// general argument reduction needed
temp = __rem_pio2f(value, &result);
switch (temp & 3)
{
case 0: return __sindf(result);
case 1: return __cosdf(result);
case 2: return __sindf(-result);
default: return -__cosdf(result);
}
}
double sin(double value)
{
double result[2];
uint32_t highWord;
unsigned n; //TODO: give this a better name
// High word of value.
GET_HIGH_WORD(highWord, value);
highWord &= 0x7FFFFFFF;
// |value| ~< pi/4
if (highWord <= 0x3FE921FB)
{
if (highWord < 0x3e500000) // |value| < 2**-26
{
// raise inexact if value != 0 and underflow if subnorma
FORCE_EVAL((highWord < 0x00100000) ? value / 0x1p120f : value + 0x1p120f);
return value;
}
return __sin(value, 0.0, 0);
}
// sin(Inf or NaN) is NaN
if (highWord >= 0x7ff00000) { return value - value; }
// argument reduction needed
n = __rem_pio2(value, result);
switch (n&3)
{
case 0: return __sin(result[0], result[1], 1);
case 1: return __cos(result[0], result[1]);
case 2: return -__sin(result[0], result[1], 1);
default: return -__cos(result[0], result[1]);
}
}
float cosf(float value)
{
double result;
uint32_t valueWord;
unsigned n, sign; //TODO: give this a better name
GET_FLOAT_WORD(valueWord, value);
sign = valueWord >> 31;
valueWord &= 0x7FFFFFFF;
if (valueWord <= 0x3F490FDA) // |value| ~<= pi/4
{
if (valueWord < 0x39800000) // |value| < 2**-12
{
// raise inexact if value != 0
FORCE_EVAL(value + 0x1p120F);
return 1.0f;
}
return __cosdf(value);
}
if (valueWord <= 0x407B53D1) // |value| ~<= 5*pi/4
{
if (valueWord > 0x4016CBE3) // |value| ~> 3*pi/4
{
return -__cosdf(sign ? value+pio2_2x : value-pio2_2x);
}
else
{
if (sign) { return __sindf(value + pio2_1x); }
else { return __sindf(pio2_1x - value); }
}
}
if (valueWord <= 0x40E231D5) // |value| ~<= 9*pi/4
{
if (valueWord > 0x40AFEDDF) // |value| ~> 7*pi/4
{
return __cosdf(sign ? value+pio2_4x : value-pio2_4x);
}
else
{
if (sign) { return __sindf(-value - pio2_3x); }
else { return __sindf(value - pio2_3x); }
}
}
// cos(Inf or NaN) is NaN
if (valueWord >= 0x7F800000) { return value-value; }
// general argument reduction needed
n = __rem_pio2f(value, &result);
switch (n & 3)
{
case 0: return __cosdf(result);
case 1: return __sindf(-result);
case 2: return -__cosdf(result);
default: return __sindf(result);
}
}
double cos(double value)
{
double result[2];
uint32_t highWord;
unsigned n; //TODO: give this a better name
GET_HIGH_WORD(highWord, value);
highWord &= 0x7FFFFFFF;
// |value| ~< pi/4
if (highWord <= 0x3FE921FB)
{
if (highWord < 0x3E46A09E) // |value| < 2**-27 * sqrt(2)
{
// raise inexact if value!=0
FORCE_EVAL(value + 0x1p120F);
return 1.0;
}
return __cos(value, 0);
}
// cos(Inf or NaN) is NaN
if (highWord >= 0x7FF00000) { return (value - value); }
// argument reduction
n = __rem_pio2(value, result);
switch (n & 3)
{
case 0: return __cos(result[0], result[1]);
case 1: return -__sin(result[0], result[1], 1);
case 2: return -__cos(result[0], result[1]);
default: return __sin(result[0], result[1], 1);
}
}
float tanf(float value)
{
double result;
uint32_t valueWord;
unsigned n, sign; //TODO: give this a better name
GET_FLOAT_WORD(valueWord, value);
sign = (valueWord >> 31);
valueWord &= 0x7FFFFFFF;
if (valueWord <= 0x3F490FDA) // |value| ~<= pi/4
{
if (valueWord < 0x39800000) // |value| < 2**-12
{
// raise inexact if value!=0 and underflow if subnormal
FORCE_EVAL((valueWord < 0x00800000) ? value / 0x1p120F : value + 0x1p120F);
return value;
}
return __tandf(value, 0);
}
if (valueWord <= 0x407B53D1) // |value| ~<= 5*pi/4
{
if (valueWord <= 0x4016CBE3) // |value| ~<= 3pi/4
{
return __tandf((sign ? value + pio2_1x : value - pio2_1x), 1);
}
else
{
return __tandf((sign ? value + pio2_2x : value - pio2_2x), 0);
}
}
if (valueWord <= 0x40E231D5) // |value| ~<= 9*pi/4
{
if (valueWord <= 0x40AFEDDF) // |value| ~<= 7*pi/4
{
return __tandf((sign ? value + pio2_3x : value - pio2_3x), 1);
}
else
{
return __tandf((sign ? value + pio2_4x : value - pio2_4x), 0);
}
}
// tan(Inf or NaN) is NaN
if (valueWord >= 0x7F800000) { return (value - value); }
// argument reduction
n = __rem_pio2f(value, &result);
return __tandf(result, (n & 1));
}
double tan(double value)
{
double result[2];
uint32_t highWord;
unsigned n; //TODO: give this a better name
GET_HIGH_WORD(highWord, value);
highWord &= 0x7FFFFFFF;
// |value| ~< pi/4
if (highWord <= 0x3FE921FB)
{
if (highWord < 0x3E400000) // |value| < 2**-27
{
// raise inexact if value!=0 and underflow if subnormal
FORCE_EVAL((highWord < 0x00100000) ? value / 0x1p120F : value + 0x1p120F);
return value;
}
return __tan(value, 0.0, 0);
}
// tan(Inf or NaN) is NaN
if (highWord >= 0x7FF00000) { return (value - value); }
// argument reduction
n = __rem_pio2(value, result);
return __tan(result[0], result[1], (n & 1));
}
float asinf(float value)
{
double sqrtZ;
float zVar;
uint32_t wordValue, unsignedValue;
GET_FLOAT_WORD(wordValue, value);
unsignedValue = wordValue & 0x7FFFFFFF;
if (unsignedValue >= 0x3F800000) // |value| >= 1
{
if (unsignedValue == 0x3F800000) // |value| == 1
{
return (value * pio2) + 0x1p-120f; // asin(+-1) = +-pi/2 with inexact
}
return 0 / (value - value); // asin(|value|>1) is NaN
}
if (unsignedValue < 0x3f000000) // |value| < 0.5
{
// if 0x1p-126 <= |value| < 0x1p-12, avoid raising underflow
if (unsignedValue < 0x39800000 && unsignedValue >= 0x00800000) { return value; }
return value + value * asinf_helper(value * value);
}
// 1 > |value| >= 0.5
zVar = (1 - fabsf(value)) * 0.5f;
sqrtZ = sqrt(zVar);
value = pio2 - 2 * (sqrtZ + (sqrtZ * asinf_helper(zVar)));
if (wordValue >> 31) { return -value; }
return value;
}
double asin(double value)
{
double zVar, rVar, sVar;
uint32_t highWord, valueUnsigned;
GET_HIGH_WORD(highWord, value);
valueUnsigned = (highWord & 0x7FFFFFFF);
// |value| >= 1 or nan
if (valueUnsigned >= 0x3FF00000)
{
uint32_t lowWord;
GET_LOW_WORD(lowWord, value);
if (((valueUnsigned - 0x3FF00000) | lowWord) == 0)
{
// asin(1) = +-pi/2 with inexact
return (value * pio2d_hi) + 0x1p-120F;
}
return 0 / (value - value);
}
// |value| < 0.5
if (valueUnsigned < 0x3FE00000)
{
// if 0x1p-1022 <= |value| < 0x1p-26, avoid raising underflow
if (valueUnsigned < 0x3E500000 && valueUnsigned >= 0x00100000) { return value; }
return value + (value * asin_helper(value * value));
}
// 1 > |value| >= 0.5
zVar = (1 - fabs(value)) * 0.5;
sVar = sqrt(zVar);
rVar = asin_helper(zVar);
if (valueUnsigned >= 0x3FEF3333) // if |value| > 0.975
{
value = pio2d_hi - (2 * (sVar + (sVar * rVar)) - pio2d_lo);
}
else
{
double fVar, cVar;
// fVar+cVar = sqrt(zVar)
fVar = sVar;
SET_LOW_WORD(fVar, 0);
cVar = (zVar - (fVar * fVar)) / (sVar + fVar);
value = (0.5 * pio2d_hi) - ((2 * sVar * rVar) - (pio2d_lo - (2 * cVar)) - ((0.5 * pio2d_hi) - (2 * fVar)));
}
if (highWord >> 31) { return -value; }
return value;
}
float acosf(float value)
{
float zVar, wVar, sVar, cVar, df;
uint32_t valueWord, ix;
GET_FLOAT_WORD(valueWord, value);
ix = valueWord & 0x7FFFFFFF;
// |value| >= 1 or nan
if (ix >= 0x3F800000)
{
if (ix == 0x3F800000)
{
if (valueWord >> 31) { return (2 * pio2_hi) + 0x1p-120F; }
return 0;
}
return 0 / (value - value);
}
// |value| < 0.5
if (ix < 0x3F000000)
{
if (ix <= 0x32800000) // |value| < 2**-26
{
return pio2_hi + 0x1p-120F;
}
return pio2_hi - (value - (pio2_lo - value * acosf_helper(value * value)));
}
// value < -0.5
if (valueWord >> 31)
{
zVar = (1 + value) * 0.5f;
sVar = sqrtf(zVar);
wVar = (acosf_helper(zVar) * sVar) - pio2_lo;
return 2 * (pio2_hi - (sVar + wVar));
}
// value > 0.5
zVar = (1 - value) * 0.5f;
sVar = sqrtf(zVar);
GET_FLOAT_WORD(valueWord, sVar);
SET_FLOAT_WORD(df, (valueWord & 0xFFFFF000));
cVar = (zVar - (df * df)) / (sVar + df);
wVar = (acosf_helper(zVar) * sVar) + cVar;
return 2 * (df + wVar);
}
double acos(double value)
{
double zVar, wVar, sVar, cVar, df;
uint32_t highWord, valueUnsigned;
GET_HIGH_WORD(highWord, value);
valueUnsigned = (highWord & 0x7FFFFFFF);
// |value| >= 1 or nan
if (valueUnsigned >= 0x3FF00000)
{
uint32_t lowWord;
GET_LOW_WORD(lowWord, value);
if (((valueUnsigned - 0x3FF00000) | lowWord) == 0)
{
// acos(1)=0, acos(-1)=pi
if (highWord >> 31) { return (2 * pio2d_hi) + 0x1p-120f; }
return 0;
}
return 0 / (value - value);
}
// |value| < 0.5
if (valueUnsigned < 0x3FE00000)
{
if (valueUnsigned <= 0x3C600000) // |value| < 2**-57
{
return pio2d_hi + 0x1p-120f;
}
return pio2d_hi - (value - (pio2d_lo - (value * acos_helper(value * value))));
}
// value < -0.5
if (highWord >> 31)
{
zVar = (1.0 + value) * 0.5;
sVar = sqrt(zVar);
wVar = (acos_helper(zVar) * sVar) - pio2d_lo;
return 2 * (pio2d_hi - (sVar + wVar));
}
// value > 0.5
zVar = (1.0 - value) * 0.5;
sVar = sqrt(zVar);
df = sVar;
SET_LOW_WORD(df, 0);
cVar = (zVar - (df * df)) / (sVar + df);
wVar = (acos_helper(zVar) * sVar) + cVar;
return 2 * (df + wVar);
}
float atanf(float value)
{
float_t wVar, sVar1, sVar2, zVar;
uint32_t valueUnsigned, sign;
int index;
GET_FLOAT_WORD(valueUnsigned, value);
sign = valueUnsigned>>31;
valueUnsigned &= 0x7FFFFFFF;
if (valueUnsigned >= 0x4C800000) // if |value| >= 2**26
{
if (isnan(value)) { return value; }
zVar = atanhi[3] + 0x1p-120f;
return (sign ? -zVar : zVar);
}
if (valueUnsigned < 0x3EE00000) // |value| < 0.4375
{
if (valueUnsigned < 0x39800000) // |value| < 2**-12
{
if (valueUnsigned < 0x00800000)
{
// raise underflow for subnormal value
FORCE_EVAL(value*value);
}
return value;
}
index = -1;
}
else
{
value = fabsf(value);
if (valueUnsigned < 0x3f980000) // |value| < 1.1875
{
if (valueUnsigned < 0x3f300000) // 7/16 <= |value| < 11/16
{
index = 0;
value = (2.0f*value - 1.0f)/(2.0f + value);
}
else // 11/16 <= |value| < 19/16
{
index = 1;
value = (value - 1.0f)/(value + 1.0f);
}
}
else
{
if (valueUnsigned < 0x401c0000) // |value| < 2.4375
{
index = 2;
value = (value - 1.5f)/(1.0f + 1.5f*value);
}
else // 2.4375 <= |value| < 2**26
{
index = 3;
value = -1.0f/value;
}
}
}
// end of argument reduction
zVar = value * value;
wVar = zVar * zVar;
// break sum from i=0 to 10 aT[i]zVar**(i+1) into odd and even poly
sVar1 = zVar * (aT[0] + wVar * (aT[2] + wVar * aT[4]));
sVar2 = wVar * (aT[1] + wVar * aT[3]);
if (index < 0) { return value - value * (sVar1 + sVar2); }
zVar = atanhi[index] - ((value * (sVar1 + sVar2) - atanlo[index]) - value);
return (sign ? -zVar : zVar);
}
double atan(double value)
{
double_t quad, sVar1, sVar2, square;
uint32_t valueUnsigned, sign;
int index;
GET_HIGH_WORD(valueUnsigned, value);
sign = (valueUnsigned >> 31);
valueUnsigned &= 0x7FFFFFFF;
if (valueUnsigned >= 0x44100000) // if |value| >= 2^66
{
if (isnan(value)) { return value; }
square = atanhid[3] + 0x1p-120F;
return (sign ? -square : square);
}
if (valueUnsigned < 0x3FDC0000) // |value| < 0.4375
{
if (valueUnsigned < 0x3E400000) // |value| < 2^-27
{
if (valueUnsigned < 0x00100000)
{
// raise underflow for subnormal value
FORCE_EVAL((float)value);
}
return value;
}
index = -1;
}
else
{
value = fabs(value);
if (valueUnsigned < 0x3FF30000) // |value| < 1.1875
{
if (valueUnsigned < 0x3FE60000) // 7/16 <= |value| < 11/16
{
index = 0;
value = ((2.0 * value) - 1.0) / (2.0 + value);
}
else // 11/16 <= |value| < 19/16
{
index = 1;
value = (value - 1.0) / (value + 1.0);
}
}
else
{
if (valueUnsigned < 0x40038000) // |value| < 2.4375
{
index = 2;
value = (value - 1.5) / (1.0 + (1.5 * value));
}
else // 2.4375 <= |value| < 2^66
{
index = 3;
value = -1.0 / value;
}
}
}
// end of argument reduction
square = (value * value);
quad = (square * square);
// break sum from i=0 to 10 aTd[i]square**(i+1) into odd and even poly
sVar1 = square * (aTd[0] + quad * (aTd[2] + quad * (aTd[4] + quad * (aTd[6] + quad * (aTd[8] + quad * aTd[10])))));
sVar2 = quad * (aTd[1] + quad * (aTd[3] + quad * (aTd[5] + quad * (aTd[7] + quad * aTd[9]))));
if (index < 0) { return value - value * (sVar1 + sVar2); }
square = atanhid[index] - (value * (sVar1 + sVar2) - atanlod[index] - value);
return (sign ? -square : square);
}
float atan2f(float numer, float denom)
{
float zVar;
uint32_t index, denomWord, numerWord;
if (isnan(denom) || isnan(numer)) { return (denom + numer); }
GET_FLOAT_WORD(denomWord, denom);
GET_FLOAT_WORD(numerWord, numer);
if (denomWord == 0x3F800000) { return atanf(numer); } // denom=1.0
index = ((numerWord >> 31) & 1) | ((denomWord >> 30) & 2); // 2*sign(denom)+sign(numer)
denomWord &= 0x7FFFFFFF;
numerWord &= 0x7FFFFFFF;
// when numer = 0
if (numerWord == 0)
{
switch (index)
{
case 0:
case 1: return numer; // atan(+-0,+anything)=+-0
case 2: return pi; // atan(+0,-anything) = pi
case 3: return -pi; // atan(-0,-anything) =-pi
}
}
// when denom = 0
if (denomWord == 0) { return (index & 1) ? -pi/2 : pi/2; }
// when denom is INF
if (denomWord == 0x7F800000)
{
if (numerWord == 0x7F800000)
{
switch (index)
{
case 0: return pi/4; // atan(+INF,+INF)
case 1: return -pi/4; // atan(-INF,+INF)
case 2: return 3*pi/4; // atan(+INF,-INF)
case 3: return -3*pi/4; // atan(-INF,-INF)
}
}
else
{
switch (index)
{
case 0: return 0.0f; // atan(+...,+INF)
case 1: return -0.0f; // atan(-...,+INF)
case 2: return pi; // atan(+...,-INF)
case 3: return -pi; // atan(-...,-INF)
}
}
}
// |numer/denom| > 0x1p26
if (denomWord + (26 << 23) < numerWord || numerWord == 0x7F800000)
{
return (index &1) ? -pi/2 : pi/2;
}
// zVar = atan(|numer/denom|) with correct underflow
if ((index & 2) && numerWord + (26 << 23) < denomWord) //|numer/denom| < 0x1p-26, denom < 0
{
zVar = 0.0;
}
else
{
zVar = atanf(fabsf(numer / denom));
}
switch (index)
{
case 0: return zVar; // atan(+,+)
case 1: return -zVar; // atan(-,+)
case 2: return pi - (zVar-pi_lo); // atan(+,-)
default: return (zVar-pi_lo) - pi; // atan(-,-)
}
}
double atan2(double numer, double denom)
{
double zVar;
uint32_t index, denomLow, numerLow, denomHigh, numerHigh;
if (isnan(denom) || isnan(numer)) { return denom+numer; }
EXTRACT_WORDS(denomHigh, denomLow, denom);
EXTRACT_WORDS(numerHigh, numerLow, numer);
if ((denomHigh-0x3FF00000 | denomLow) == 0) { return atan(numer); } // denom = 1.0
index = ((numerHigh >> 31) & 1) | ((denomHigh >> 30) & 2); // 2*sign(denom)+sign(numer)
denomHigh = (denomHigh & 0x7FFFFFFF);
numerHigh = (numerHigh & 0x7FFFFFFF);
// when numer = 0
if ((numerHigh|numerLow) == 0)
{
switch(index)
{
case 0:
case 1: return numer; // atan(+-0,+anything)=+-0
case 2: return pi; // atan(+0,-anything) = pi
case 3: return -pi; // atan(-0,-anything) =-pi
}
}
// when denom = 0
if ((denomHigh|denomLow) == 0) { return (index & 1) ? -pi/2 : pi/2; }
// when denom is INF
if (denomHigh == 0x7FF00000)
{
if (numerHigh == 0x7FF00000)
{
switch(index)
{
case 0: return pi/4; // atan(+INF,+INF)
case 1: return -pi/4; // atan(-INF,+INF)
case 2: return 3*pi/4; // atan(+INF,-INF)
case 3: return -3*pi/4; // atan(-INF,-INF)
}
}
else
{
switch(index)
{
case 0: return 0.0; // atan(+...,+INF)
case 1: return -0.0; // atan(-...,+INF)
case 2: return pi; // atan(+...,-INF)
case 3: return -pi; // atan(-...,-INF)
}
}
}
// |numer/denom| > 0x1p64
if (denomHigh+(64<<20) < numerHigh || numerHigh == 0x7FF00000) { return (index & 1) ? -pi/2 : pi/2; }
// zVar = atan(|numer/denom|) without spurious underflow
if ((index & 2) && numerHigh + (64 << 20) < denomHigh) // |numer/denom| < 0x1p-64, denom<0
{
zVar = 0;
}
else
{
zVar = atan(fabs(numer / denom));
}
switch (index)
{
case 0: return zVar; // atan(+,+)
case 1: return -zVar; // atan(-,+)
case 2: return pi - (zVar - pi_lo); // atan(+,-)
default: return (zVar - pi_lo) - pi; // atan(-,-)
}
}

47
std/src/std_mem.c Normal file
View File

@@ -0,0 +1,47 @@
/*
File: std_mem.c
Author: Taylor Robbins
Date: 08\28\2025
Description:
** Holds the simple API for growing and query memory inside the WebAssembly memory model
** To grow memory by pages we call out to jsStdGrowMemory
*/
extern unsigned char __heap_base;
static void* heapBaseAddress = 0;
static size_t stdCurrentHeapSize = 0;
static size_t stdCurrentPageCount = 0;
WASM_EXPORT(init_mem) void init_mem(size_t numInitialPages)
{
heapBaseAddress = (void*)&__heap_base;
stdCurrentPageCount = numInitialPages;
}
//These are our own std-like functions for interacting with WebAssembly memory
void* grow_mem(size_t numBytes)
{
assert_msg(heapBaseAddress != nullptr, "growmem called before initmem! Make sure initmem is being called by the Javascript code!");
size_t memorySizeNeeded = ((size_t)heapBaseAddress) + stdCurrentHeapSize;
size_t numPagesNeeded = (memorySizeNeeded / WASM_MEMORY_PAGE_SIZE) + (((memorySizeNeeded % WASM_MEMORY_PAGE_SIZE) > 0) ? 1 : 0);
if (numPagesNeeded > WASM_MEMORY_MAX_NUM_PAGES)
{
assert_msg(numPagesNeeded <= WASM_MEMORY_MAX_NUM_PAGES, "Ran out of memory! Max WebAssembly memory reached!");
return nullptr;
}
else if (numPagesNeeded > stdCurrentPageCount)
{
jsStdGrowMemory(numPagesNeeded - stdCurrentPageCount);
stdCurrentPageCount = numPagesNeeded;
}
void* result = (void*)((uint8_t*)heapBaseAddress + stdCurrentHeapSize);
stdCurrentHeapSize += numBytes;
return result;
}
size_t get_mem()
{
return stdCurrentHeapSize;
}

26
std/src/std_misc.c Normal file
View File

@@ -0,0 +1,26 @@
/*
File: std_misc.c
Author: Taylor Robbins
Date: 08\28\2025
Description:
** Contains random functions that don't really need their own dedicated file
*/
int abs(int value)
{
return ((value > 0) ? value : -value);
}
_Noreturn void exit(int exitCode)
{
jsStdAbort("exit", exitCode);
}
_Noreturn void abort()
{
jsStdAbort("abort", 0);
}
_Noreturn void abort_msg(const char* message)
{
jsStdAbort(message, 0);
}

20
std/stdbool.h Normal file
View File

@@ -0,0 +1,20 @@
/*
File: stdbool.h
Author: Taylor Robbins
Date: 08\28\2025
*/
#ifndef _STDBOOL_H
#define _STDBOOL_H
#if LANGUAGE_IS_C
#define true 1
#define false 0
#define bool _Bool
#endif
#define __bool_true_false_are_defined 1
#endif // _STDBOOL_H

42
std/stdlib.h Normal file
View File

@@ -0,0 +1,42 @@
/*
File: stdlib.h
Author: Taylor Robbins
Date: 08\28\2025
*/
#ifndef _STDLIB_H
#define _STDLIB_H
#include <stdint.h>
MAYBE_START_EXTERN_C
int abs(int value);
_Noreturn void exit(int exitCode);
_Noreturn void abort();
//NOTE: This is not a standard function but we want to pass a message to jsStdAbort so we added this
_Noreturn void abort_msg(const char* message);
//NOTE: These are intentionally not available on WebAssembly because our memory model only allows for growth and
// it's better to have the application decide how to manage memory on top of that simple model
// void* malloc(size_t numBytes);
// void* calloc(size_t numElements, size_t elemSize);
// void* realloc(void* prevAllocPntr, size_t newSize);
// void free(void* allocPntr);
// void* aligned_alloc(size_t numBytes, size_t alignmentSize);
//These are our own std-like functions for interacting with WebAssembly memory
WASM_EXPORT(init_mem) void init_mem(size_t numInitialPages);
void* grow_mem(size_t numBytes);
size_t get_mem();
// TODO: double atof(const char* str);
// TODO: void* alloca(size_t numBytes);
// typedef int (StdCompareFunc_f)(const void* left, const void* right);
// typedef int (StdCompareFuncEx_f)(const void* left, const void* right, void* compareFunc);
//TODO: void qsort(void* basePntr, size_t numItems, size_t itemSize, StdCompareFunc_f* compareFunc);
MAYBE_END_EXTERN_C
#endif // _STDLIB_H