Use the SIMDe 128-bit path for all non-x86 systems

SIMDe includes support for AltiVec, LSX, NEON, and more. It also includes fallbacks that are optimized for compiler autovectorization for other architectures (like RISC-V RVV)

Since there's no runtime dispatching for these, they will be selected depending on which instructions are enabled by the toolchain at compile-time.
This commit is contained in:
Cameron Gutman
2026-02-19 19:29:11 -06:00
parent 5551d29ba2
commit a063522db4
2 changed files with 6 additions and 31 deletions

View File

@@ -6,10 +6,6 @@
#else #else
#if defined(__SSSE3__) || (defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64)) #if defined(__SSSE3__) || (defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64))
#define OBLAS_SSE3 #define OBLAS_SSE3
#else
#if defined(__aarch64__) || (defined(_MSC_VER) && defined(_M_ARM64))
#define OBLAS_NEON
#endif
#endif #endif
#endif #endif
#endif #endif

View File

@@ -124,8 +124,7 @@ void obl_axpyb32_ref(u8 *a, u32 *b, u8 u, unsigned k)
} \ } \
} while (0) } while (0)
#else #elif defined(OBLAS_AVX2)
#if defined(OBLAS_AVX2)
#include <immintrin.h> #include <immintrin.h>
#undef OBLAS_ALIGN #undef OBLAS_ALIGN
@@ -174,13 +173,13 @@ void obl_axpyb32_ref(u8 *a, u32 *b, u8 u, unsigned k)
} while (0) } while (0)
#else #else
#if defined(OBLAS_SSE3) || defined(OBLAS_NEON)
#if defined(OBLAS_NEON) #if defined(OBLAS_SSE3)
#define SIMDE_ENABLE_NATIVE_ALIASES
#include <simde/x86/ssse3.h>
#else
#include <emmintrin.h> #include <emmintrin.h>
#include <tmmintrin.h> #include <tmmintrin.h>
#else
#define SIMDE_ENABLE_NATIVE_ALIASES
#include <simde/x86/ssse3.h>
#endif #endif
#undef OBLAS_ALIGN #undef OBLAS_ALIGN
@@ -231,26 +230,6 @@ void obl_axpyb32_ref(u8 *a, u32 *b, u8 u, unsigned k)
_mm_storeu_si128(ap, _mm_xor_si128(_mm_loadu_si128(ap), ret_hi)); \ _mm_storeu_si128(ap, _mm_xor_si128(_mm_loadu_si128(ap), ret_hi)); \
} \ } \
} while (0) } while (0)
#else
#undef OBLAS_ALIGN
#define OBLAS_ALIGN (sizeof(void *))
#undef OBL_SHUF
#define OBL_SHUF(op, a, b, f) \
do { \
op##_ref(a, b, u, k); \
} while (0)
#undef OBL_SHUF_XOR
#define OBL_SHUF_XOR
#undef OBL_AXPYB32
#define OBL_AXPYB32 obl_axpyb32_ref
#endif
#endif
#endif #endif
#define OBL_NOOP(a, b) (b) #define OBL_NOOP(a, b) (b)