/* Copyright (c) 2018, 2019 Evan Nemerson <evan@nemerson.com>
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#define SIMDE_TESTS_CURRENT_ISAX avx
#if !defined(__clang__) && (defined(__linux__) || defined(__linux) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE)
  #define _GNU_SOURCE 1  // for MAP_ANONYMOUS
#endif
#include <simde/x86/avx.h>
#include <test/x86/test-avx.h>
#if !defined(HEDLEY_MSVC_VERSION) && !defined(__wasi__)
  #include <sys/mman.h>
#endif

HEDLEY_DIAGNOSTIC_PUSH
HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION

static simde_float32 u32_to_f32(uint32_t u32) {
  simde_float32 f32;
  simde_memcpy(&f32, &u32, sizeof(f32));
  return f32;
}

static simde_float64 u64_to_f64(uint64_t u64) {
  simde_float64 f64;
  simde_memcpy(&f64, &u64, sizeof(f64));
  return f64;
}

HEDLEY_DIAGNOSTIC_POP

static int
test_simde_mm256_set_epi8(SIMDE_MUNIT_TEST_ARGS) {
  for (size_t i = 0 ; i < 32 ; i++) {
    int8_t a[32];
    simde__m256i r;

    simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a));
    r = simde_mm256_set_epi8(a[31], a[30], a[29], a[28], a[27], a[26], a[25], a[24],
                             a[23], a[22], a[21], a[20], a[19], a[18], a[17], a[16],
                             a[15], a[14], a[13], a[12], a[11], a[10], a[ 9], a[ 8],
                             a[ 7], a[ 6], a[ 5], a[ 4], a[ 3], a[ 2], a[ 1], a[ 0]);

    simde_test_x86_assert_equal_i8x32(simde_x_mm256_loadu_epi8(a), r);
  }

  return 0;
}

static int
test_simde_mm256_set_epi16(SIMDE_MUNIT_TEST_ARGS) {
  for (size_t i = 0 ; i < 16 ; i++) {
    int16_t a[16];
    simde__m256i r;

    simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a));
    r = simde_mm256_set_epi16(a[15], a[14], a[13], a[12], a[11], a[10], a[ 9], a[ 8],
                              a[ 7], a[ 6], a[ 5], a[ 4], a[ 3], a[ 2], a[ 1], a[ 0]);

    simde_test_x86_assert_equal_i16x16(simde_x_mm256_loadu_epi16(a), r);
  }

  return 0;
}

static int
test_simde_mm256_set_epi32(SIMDE_MUNIT_TEST_ARGS) {
  for (size_t i = 0 ; i < 32 ; i++) {
    int32_t a[8];
    simde__m256i r;

    simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a));
    r = simde_mm256_set_epi32(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0]);

    simde_test_x86_assert_equal_i32x8(simde_x_mm256_loadu_epi32(a), r);
  }

  return 0;
}

static int
test_simde_mm256_set_epi64x(SIMDE_MUNIT_TEST_ARGS) {
  for (size_t i = 0 ; i < 64 ; i++) {
    int64_t a[4];
    simde__m256i r;

    simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a));
    r = simde_mm256_set_epi64x(a[3], a[2], a[1], a[0]);

    simde_test_x86_assert_equal_i64x4(simde_x_mm256_loadu_epi64(a), r);
  }

  return 0;
}

static int
test_simde_mm256_set_ps(SIMDE_MUNIT_TEST_ARGS) {
  struct {
    simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)];
    simde_float32 r[sizeof(simde__m256) / sizeof(simde_float32)];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(  -114.94), SIMDE_FLOAT32_C(  -844.95), SIMDE_FLOAT32_C(   616.69), SIMDE_FLOAT32_C(  -717.57),
        SIMDE_FLOAT32_C(   321.75), SIMDE_FLOAT32_C(   709.09), SIMDE_FLOAT32_C(  -540.81), SIMDE_FLOAT32_C(   218.53) },
      { SIMDE_FLOAT32_C(  -114.94), SIMDE_FLOAT32_C(  -844.95), SIMDE_FLOAT32_C(   616.69), SIMDE_FLOAT32_C(  -717.57),
        SIMDE_FLOAT32_C(   321.75), SIMDE_FLOAT32_C(   709.09), SIMDE_FLOAT32_C(  -540.81), SIMDE_FLOAT32_C(   218.53) } },
    { { SIMDE_FLOAT32_C(  -651.16), SIMDE_FLOAT32_C(  -438.85), SIMDE_FLOAT32_C(     9.87), SIMDE_FLOAT32_C(   429.00),
        SIMDE_FLOAT32_C(   129.97), SIMDE_FLOAT32_C(   774.42), SIMDE_FLOAT32_C(  -864.41), SIMDE_FLOAT32_C(  -659.36) },
      { SIMDE_FLOAT32_C(  -651.16), SIMDE_FLOAT32_C(  -438.85), SIMDE_FLOAT32_C(     9.87), SIMDE_FLOAT32_C(   429.00),
        SIMDE_FLOAT32_C(   129.97), SIMDE_FLOAT32_C(   774.42), SIMDE_FLOAT32_C(  -864.41), SIMDE_FLOAT32_C(  -659.36) } },
    { { SIMDE_FLOAT32_C(   454.07), SIMDE_FLOAT32_C(  -313.38), SIMDE_FLOAT32_C(   658.78), SIMDE_FLOAT32_C(   553.99),
        SIMDE_FLOAT32_C(   841.57), SIMDE_FLOAT32_C(  -926.85), SIMDE_FLOAT32_C(  -371.71), SIMDE_FLOAT32_C(  -519.83) },
      { SIMDE_FLOAT32_C(   454.07), SIMDE_FLOAT32_C(  -313.38), SIMDE_FLOAT32_C(   658.78), SIMDE_FLOAT32_C(   553.99),
        SIMDE_FLOAT32_C(   841.57), SIMDE_FLOAT32_C(  -926.85), SIMDE_FLOAT32_C(  -371.71), SIMDE_FLOAT32_C(  -519.83) } },
    { { SIMDE_FLOAT32_C(  -569.24), SIMDE_FLOAT32_C(  -886.28), SIMDE_FLOAT32_C(   502.22), SIMDE_FLOAT32_C(   647.50),
        SIMDE_FLOAT32_C(   962.06), SIMDE_FLOAT32_C(   396.40), SIMDE_FLOAT32_C(   624.85), SIMDE_FLOAT32_C(  -152.88) },
      { SIMDE_FLOAT32_C(  -569.24), SIMDE_FLOAT32_C(  -886.28), SIMDE_FLOAT32_C(   502.22), SIMDE_FLOAT32_C(   647.50),
        SIMDE_FLOAT32_C(   962.06), SIMDE_FLOAT32_C(   396.40), SIMDE_FLOAT32_C(   624.85), SIMDE_FLOAT32_C(  -152.88) } },
    { { SIMDE_FLOAT32_C(   551.45), SIMDE_FLOAT32_C(   241.55), SIMDE_FLOAT32_C(   129.54), SIMDE_FLOAT32_C(  -126.80),
        SIMDE_FLOAT32_C(   -49.37), SIMDE_FLOAT32_C(   588.74), SIMDE_FLOAT32_C(  -908.27), SIMDE_FLOAT32_C(   299.47) },
      { SIMDE_FLOAT32_C(   551.45), SIMDE_FLOAT32_C(   241.55), SIMDE_FLOAT32_C(   129.54), SIMDE_FLOAT32_C(  -126.80),
        SIMDE_FLOAT32_C(   -49.37), SIMDE_FLOAT32_C(   588.74), SIMDE_FLOAT32_C(  -908.27), SIMDE_FLOAT32_C(   299.47) } },
    { { SIMDE_FLOAT32_C(  -850.12), SIMDE_FLOAT32_C(   101.60), SIMDE_FLOAT32_C(  -271.53), SIMDE_FLOAT32_C(   279.86),
        SIMDE_FLOAT32_C(  -123.98), SIMDE_FLOAT32_C(  -135.94), SIMDE_FLOAT32_C(   620.49), SIMDE_FLOAT32_C(  -669.90) },
      { SIMDE_FLOAT32_C(  -850.12), SIMDE_FLOAT32_C(   101.60), SIMDE_FLOAT32_C(  -271.53), SIMDE_FLOAT32_C(   279.86),
        SIMDE_FLOAT32_C(  -123.98), SIMDE_FLOAT32_C(  -135.94), SIMDE_FLOAT32_C(   620.49), SIMDE_FLOAT32_C(  -669.90) } },
    { { SIMDE_FLOAT32_C(   550.68), SIMDE_FLOAT32_C(   279.27), SIMDE_FLOAT32_C(   884.09), SIMDE_FLOAT32_C(   392.25),
        SIMDE_FLOAT32_C(   352.43), SIMDE_FLOAT32_C(  -487.62), SIMDE_FLOAT32_C(   872.43), SIMDE_FLOAT32_C(   783.18) },
      { SIMDE_FLOAT32_C(   550.68), SIMDE_FLOAT32_C(   279.27), SIMDE_FLOAT32_C(   884.09), SIMDE_FLOAT32_C(   392.25),
        SIMDE_FLOAT32_C(   352.43), SIMDE_FLOAT32_C(  -487.62), SIMDE_FLOAT32_C(   872.43), SIMDE_FLOAT32_C(   783.18) } },
    { { SIMDE_FLOAT32_C(  -373.91), SIMDE_FLOAT32_C(   374.65), SIMDE_FLOAT32_C(   430.69), SIMDE_FLOAT32_C(  -411.85),
        SIMDE_FLOAT32_C(  -228.96), SIMDE_FLOAT32_C(    55.54), SIMDE_FLOAT32_C(   435.27), SIMDE_FLOAT32_C(  -677.51) },
      { SIMDE_FLOAT32_C(  -373.91), SIMDE_FLOAT32_C(   374.65), SIMDE_FLOAT32_C(   430.69), SIMDE_FLOAT32_C(  -411.85),
        SIMDE_FLOAT32_C(  -228.96), SIMDE_FLOAT32_C(    55.54), SIMDE_FLOAT32_C(   435.27), SIMDE_FLOAT32_C(  -677.51) } },
    { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0) },
      { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483650.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde_float32* a = test_vec[i].a;
    simde__m256 r = simde_mm256_set_ps(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0]);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_set_pd(SIMDE_MUNIT_TEST_ARGS) {
  struct {
    simde_float64 a[sizeof(simde__m256) / sizeof(simde_float64)];
    simde_float64 r[sizeof(simde__m256) / sizeof(simde_float64)];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(   237.07), SIMDE_FLOAT64_C(  -691.18), SIMDE_FLOAT64_C(  -606.39), SIMDE_FLOAT64_C(    84.18) },
      { SIMDE_FLOAT64_C(   237.07), SIMDE_FLOAT64_C(  -691.18), SIMDE_FLOAT64_C(  -606.39), SIMDE_FLOAT64_C(    84.18) } },
    { { SIMDE_FLOAT64_C(   106.38), SIMDE_FLOAT64_C(  -554.05), SIMDE_FLOAT64_C(   782.28), SIMDE_FLOAT64_C(  -694.29) },
      { SIMDE_FLOAT64_C(   106.38), SIMDE_FLOAT64_C(  -554.05), SIMDE_FLOAT64_C(   782.28), SIMDE_FLOAT64_C(  -694.29) } },
    { { SIMDE_FLOAT64_C(   250.86), SIMDE_FLOAT64_C(  -269.75), SIMDE_FLOAT64_C(  -927.01), SIMDE_FLOAT64_C(   214.49) },
      { SIMDE_FLOAT64_C(   250.86), SIMDE_FLOAT64_C(  -269.75), SIMDE_FLOAT64_C(  -927.01), SIMDE_FLOAT64_C(   214.49) } },
    { { SIMDE_FLOAT64_C(   515.10), SIMDE_FLOAT64_C(  -597.89), SIMDE_FLOAT64_C(   888.27), SIMDE_FLOAT64_C(  -756.42) },
      { SIMDE_FLOAT64_C(   515.10), SIMDE_FLOAT64_C(  -597.89), SIMDE_FLOAT64_C(   888.27), SIMDE_FLOAT64_C(  -756.42) } },
    { { SIMDE_FLOAT64_C(   947.11), SIMDE_FLOAT64_C(  -148.81), SIMDE_FLOAT64_C(   852.53), SIMDE_FLOAT64_C(   316.80) },
      { SIMDE_FLOAT64_C(   947.11), SIMDE_FLOAT64_C(  -148.81), SIMDE_FLOAT64_C(   852.53), SIMDE_FLOAT64_C(   316.80) } },
    { { SIMDE_FLOAT64_C(  -150.15), SIMDE_FLOAT64_C(  -882.96), SIMDE_FLOAT64_C(   -23.16), SIMDE_FLOAT64_C(   367.96) },
      { SIMDE_FLOAT64_C(  -150.15), SIMDE_FLOAT64_C(  -882.96), SIMDE_FLOAT64_C(   -23.16), SIMDE_FLOAT64_C(   367.96) } },
    { { SIMDE_FLOAT64_C(  -169.14), SIMDE_FLOAT64_C(   745.70), SIMDE_FLOAT64_C(  -976.16), SIMDE_FLOAT64_C(   641.22) },
      { SIMDE_FLOAT64_C(  -169.14), SIMDE_FLOAT64_C(   745.70), SIMDE_FLOAT64_C(  -976.16), SIMDE_FLOAT64_C(   641.22) } },
    { { SIMDE_FLOAT64_C(  -923.21), SIMDE_FLOAT64_C(   559.44), SIMDE_FLOAT64_C(  -648.43), SIMDE_FLOAT64_C(   313.86) },
      { SIMDE_FLOAT64_C(  -923.21), SIMDE_FLOAT64_C(   559.44), SIMDE_FLOAT64_C(  -648.43), SIMDE_FLOAT64_C(   313.86) } },
    { { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C( 2147483649.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C( 2147483649.0) },
      { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C( 2147483649.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C( 2147483649.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde_float64* a = test_vec[i].a;
    simde__m256d r = simde_mm256_set_pd(a[3], a[2], a[1], a[0]);
    simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_set_m128(SIMDE_MUNIT_TEST_ARGS) {
  struct {
    simde_float32 lo[sizeof(simde__m128) / sizeof(simde_float32)];
    simde_float32 hi[sizeof(simde__m128) / sizeof(simde_float32)];
    simde_float32 r[sizeof(simde__m256) / sizeof(simde_float32)];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(   955.26), SIMDE_FLOAT32_C(  -742.43), SIMDE_FLOAT32_C(  -685.02), SIMDE_FLOAT32_C(   649.65) },
      { SIMDE_FLOAT32_C(    76.57), SIMDE_FLOAT32_C(   826.18), SIMDE_FLOAT32_C(   499.67), SIMDE_FLOAT32_C(   965.25) },
      { SIMDE_FLOAT32_C(   955.26), SIMDE_FLOAT32_C(  -742.43), SIMDE_FLOAT32_C(  -685.02), SIMDE_FLOAT32_C(   649.65),
        SIMDE_FLOAT32_C(    76.57), SIMDE_FLOAT32_C(   826.18), SIMDE_FLOAT32_C(   499.67), SIMDE_FLOAT32_C(   965.25) } },
    { { SIMDE_FLOAT32_C(  -924.37), SIMDE_FLOAT32_C(  -870.83), SIMDE_FLOAT32_C(   379.26), SIMDE_FLOAT32_C(   192.12) },
      { SIMDE_FLOAT32_C(   260.12), SIMDE_FLOAT32_C(   -54.88), SIMDE_FLOAT32_C(  -275.93), SIMDE_FLOAT32_C(    53.40) },
      { SIMDE_FLOAT32_C(  -924.37), SIMDE_FLOAT32_C(  -870.83), SIMDE_FLOAT32_C(   379.26), SIMDE_FLOAT32_C(   192.12),
        SIMDE_FLOAT32_C(   260.12), SIMDE_FLOAT32_C(   -54.88), SIMDE_FLOAT32_C(  -275.93), SIMDE_FLOAT32_C(    53.40) } },
    { { SIMDE_FLOAT32_C(   572.77), SIMDE_FLOAT32_C(   -29.00), SIMDE_FLOAT32_C(   535.58), SIMDE_FLOAT32_C(  -761.05) },
      { SIMDE_FLOAT32_C(  -139.13), SIMDE_FLOAT32_C(   923.28), SIMDE_FLOAT32_C(   643.94), SIMDE_FLOAT32_C(  -866.67) },
      { SIMDE_FLOAT32_C(   572.77), SIMDE_FLOAT32_C(   -29.00), SIMDE_FLOAT32_C(   535.58), SIMDE_FLOAT32_C(  -761.05),
        SIMDE_FLOAT32_C(  -139.13), SIMDE_FLOAT32_C(   923.28), SIMDE_FLOAT32_C(   643.94), SIMDE_FLOAT32_C(  -866.67) } },
    { { SIMDE_FLOAT32_C(   430.48), SIMDE_FLOAT32_C(   138.84), SIMDE_FLOAT32_C(  -254.67), SIMDE_FLOAT32_C(  -492.95) },
      { SIMDE_FLOAT32_C(  -242.21), SIMDE_FLOAT32_C(  -193.36), SIMDE_FLOAT32_C(  -353.17), SIMDE_FLOAT32_C(   -95.67) },
      { SIMDE_FLOAT32_C(   430.48), SIMDE_FLOAT32_C(   138.84), SIMDE_FLOAT32_C(  -254.67), SIMDE_FLOAT32_C(  -492.95),
        SIMDE_FLOAT32_C(  -242.21), SIMDE_FLOAT32_C(  -193.36), SIMDE_FLOAT32_C(  -353.17), SIMDE_FLOAT32_C(   -95.67) } },
    { { SIMDE_FLOAT32_C(  -497.42), SIMDE_FLOAT32_C(  -212.71), SIMDE_FLOAT32_C(  -430.07), SIMDE_FLOAT32_C(   762.70) },
      { SIMDE_FLOAT32_C(   -34.98), SIMDE_FLOAT32_C(  -754.99), SIMDE_FLOAT32_C(  -527.69), SIMDE_FLOAT32_C(   -79.72) },
      { SIMDE_FLOAT32_C(  -497.42), SIMDE_FLOAT32_C(  -212.71), SIMDE_FLOAT32_C(  -430.07), SIMDE_FLOAT32_C(   762.70),
        SIMDE_FLOAT32_C(   -34.98), SIMDE_FLOAT32_C(  -754.99), SIMDE_FLOAT32_C(  -527.69), SIMDE_FLOAT32_C(   -79.72) } },
    { { SIMDE_FLOAT32_C(   423.17), SIMDE_FLOAT32_C(  -804.64), SIMDE_FLOAT32_C(     0.17), SIMDE_FLOAT32_C(  -715.96) },
      { SIMDE_FLOAT32_C(   732.41), SIMDE_FLOAT32_C(   294.00), SIMDE_FLOAT32_C(  -183.90), SIMDE_FLOAT32_C(   808.04) },
      { SIMDE_FLOAT32_C(   423.17), SIMDE_FLOAT32_C(  -804.64), SIMDE_FLOAT32_C(     0.17), SIMDE_FLOAT32_C(  -715.96),
        SIMDE_FLOAT32_C(   732.41), SIMDE_FLOAT32_C(   294.00), SIMDE_FLOAT32_C(  -183.90), SIMDE_FLOAT32_C(   808.04) } },
    { { SIMDE_FLOAT32_C(   615.10), SIMDE_FLOAT32_C(   952.95), SIMDE_FLOAT32_C(   930.37), SIMDE_FLOAT32_C(  -627.11) },
      { SIMDE_FLOAT32_C(  -881.36), SIMDE_FLOAT32_C(  -355.90), SIMDE_FLOAT32_C(  -582.63), SIMDE_FLOAT32_C(   691.42) },
      { SIMDE_FLOAT32_C(   615.10), SIMDE_FLOAT32_C(   952.95), SIMDE_FLOAT32_C(   930.37), SIMDE_FLOAT32_C(  -627.11),
        SIMDE_FLOAT32_C(  -881.36), SIMDE_FLOAT32_C(  -355.90), SIMDE_FLOAT32_C(  -582.63), SIMDE_FLOAT32_C(   691.42) } },
    { { SIMDE_FLOAT32_C(   716.04), SIMDE_FLOAT32_C(  -977.44), SIMDE_FLOAT32_C(  -302.88), SIMDE_FLOAT32_C(  -318.94) },
      { SIMDE_FLOAT32_C(  -240.41), SIMDE_FLOAT32_C(  -422.80), SIMDE_FLOAT32_C(   277.23), SIMDE_FLOAT32_C(  -809.93) },
      { SIMDE_FLOAT32_C(   716.04), SIMDE_FLOAT32_C(  -977.44), SIMDE_FLOAT32_C(  -302.88), SIMDE_FLOAT32_C(  -318.94),
        SIMDE_FLOAT32_C(  -240.41), SIMDE_FLOAT32_C(  -422.80), SIMDE_FLOAT32_C(   277.23), SIMDE_FLOAT32_C(  -809.93) } },
    { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) } },
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m128
      lo = simde_mm_loadu_ps(test_vec[i].lo),
      hi = simde_mm_loadu_ps(test_vec[i].hi);
    simde__m256 r = simde_mm256_set_m128(hi, lo);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_set_m128d(SIMDE_MUNIT_TEST_ARGS) {
  struct {
    simde_float64 lo[sizeof(simde__m128d) / sizeof(simde_float64)];
    simde_float64 hi[sizeof(simde__m128d) / sizeof(simde_float64)];
    simde_float64 r[sizeof(simde__m256) / sizeof(simde_float64)];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(   102.87), SIMDE_FLOAT64_C(   237.14) },
      { SIMDE_FLOAT64_C(   970.02), SIMDE_FLOAT64_C(   617.63) },
      { SIMDE_FLOAT64_C(   102.87), SIMDE_FLOAT64_C(   237.14), SIMDE_FLOAT64_C(   970.02), SIMDE_FLOAT64_C(   617.63) } },
    { { SIMDE_FLOAT64_C(  -786.65), SIMDE_FLOAT64_C(   343.67) },
      { SIMDE_FLOAT64_C(  -249.99), SIMDE_FLOAT64_C(  -396.81) },
      { SIMDE_FLOAT64_C(  -786.65), SIMDE_FLOAT64_C(   343.67), SIMDE_FLOAT64_C(  -249.99), SIMDE_FLOAT64_C(  -396.81) } },
    { { SIMDE_FLOAT64_C(   334.77), SIMDE_FLOAT64_C(   890.51) },
      { SIMDE_FLOAT64_C(   -50.87), SIMDE_FLOAT64_C(  -723.47) },
      { SIMDE_FLOAT64_C(   334.77), SIMDE_FLOAT64_C(   890.51), SIMDE_FLOAT64_C(   -50.87), SIMDE_FLOAT64_C(  -723.47) } },
    { { SIMDE_FLOAT64_C(  -834.03), SIMDE_FLOAT64_C(  -242.09) },
      { SIMDE_FLOAT64_C(  -638.54), SIMDE_FLOAT64_C(   -98.76) },
      { SIMDE_FLOAT64_C(  -834.03), SIMDE_FLOAT64_C(  -242.09), SIMDE_FLOAT64_C(  -638.54), SIMDE_FLOAT64_C(   -98.76) } },
    { { SIMDE_FLOAT64_C(   453.20), SIMDE_FLOAT64_C(   958.59) },
      { SIMDE_FLOAT64_C(   231.94), SIMDE_FLOAT64_C(  -131.61) },
      { SIMDE_FLOAT64_C(   453.20), SIMDE_FLOAT64_C(   958.59), SIMDE_FLOAT64_C(   231.94), SIMDE_FLOAT64_C(  -131.61) } },
    { { SIMDE_FLOAT64_C(  -981.17), SIMDE_FLOAT64_C(  -996.20) },
      { SIMDE_FLOAT64_C(   210.92), SIMDE_FLOAT64_C(  -800.51) },
      { SIMDE_FLOAT64_C(  -981.17), SIMDE_FLOAT64_C(  -996.20), SIMDE_FLOAT64_C(   210.92), SIMDE_FLOAT64_C(  -800.51) } },
    { { SIMDE_FLOAT64_C(   774.24), SIMDE_FLOAT64_C(  -383.33) },
      { SIMDE_FLOAT64_C(  -513.10), SIMDE_FLOAT64_C(   -84.69) },
      { SIMDE_FLOAT64_C(   774.24), SIMDE_FLOAT64_C(  -383.33), SIMDE_FLOAT64_C(  -513.10), SIMDE_FLOAT64_C(   -84.69) } },
    { { SIMDE_FLOAT64_C(   101.10), SIMDE_FLOAT64_C(  -236.55) },
      { SIMDE_FLOAT64_C(  -206.58), SIMDE_FLOAT64_C(    21.06) },
      { SIMDE_FLOAT64_C(   101.10), SIMDE_FLOAT64_C(  -236.55), SIMDE_FLOAT64_C(  -206.58), SIMDE_FLOAT64_C(    21.06) } },
    { { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0) },
      { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0) },
      { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m128d
      lo = simde_mm_loadu_pd(test_vec[i].lo),
      hi = simde_mm_loadu_pd(test_vec[i].hi);
    simde__m256d r = simde_mm256_set_m128d(hi, lo);
    simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_set_m128i(SIMDE_MUNIT_TEST_ARGS) {
  struct {
    simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)];
    simde_float32 r[sizeof(simde__m256) / sizeof(simde_float32)];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(  -862.79), SIMDE_FLOAT32_C(   575.51), SIMDE_FLOAT32_C(  -229.06), SIMDE_FLOAT32_C(    41.19),
        SIMDE_FLOAT32_C(  -323.42), SIMDE_FLOAT32_C(  -210.46), SIMDE_FLOAT32_C(   770.69), SIMDE_FLOAT32_C(   519.80) },
      { SIMDE_FLOAT32_C(  -862.79), SIMDE_FLOAT32_C(   575.51), SIMDE_FLOAT32_C(  -229.06), SIMDE_FLOAT32_C(    41.19),
        SIMDE_FLOAT32_C(  -323.42), SIMDE_FLOAT32_C(  -210.46), SIMDE_FLOAT32_C(   770.69), SIMDE_FLOAT32_C(   519.80) } },
    { { SIMDE_FLOAT32_C(   547.64), SIMDE_FLOAT32_C(  -280.68), SIMDE_FLOAT32_C(  -533.93), SIMDE_FLOAT32_C(  -474.55),
        SIMDE_FLOAT32_C(   108.16), SIMDE_FLOAT32_C(   945.55), SIMDE_FLOAT32_C(  -578.81), SIMDE_FLOAT32_C(   909.72) },
      { SIMDE_FLOAT32_C(   547.64), SIMDE_FLOAT32_C(  -280.68), SIMDE_FLOAT32_C(  -533.93), SIMDE_FLOAT32_C(  -474.55),
        SIMDE_FLOAT32_C(   108.16), SIMDE_FLOAT32_C(   945.55), SIMDE_FLOAT32_C(  -578.81), SIMDE_FLOAT32_C(   909.72) } },
    { { SIMDE_FLOAT32_C(   712.09), SIMDE_FLOAT32_C(   149.26), SIMDE_FLOAT32_C(  -126.56), SIMDE_FLOAT32_C(  -722.11),
        SIMDE_FLOAT32_C(    49.51), SIMDE_FLOAT32_C(   630.17), SIMDE_FLOAT32_C(   155.12), SIMDE_FLOAT32_C(   912.29) },
      { SIMDE_FLOAT32_C(   712.09), SIMDE_FLOAT32_C(   149.26), SIMDE_FLOAT32_C(  -126.56), SIMDE_FLOAT32_C(  -722.11),
        SIMDE_FLOAT32_C(    49.51), SIMDE_FLOAT32_C(   630.17), SIMDE_FLOAT32_C(   155.12), SIMDE_FLOAT32_C(   912.29) } },
    { { SIMDE_FLOAT32_C(  -870.50), SIMDE_FLOAT32_C(  -720.59), SIMDE_FLOAT32_C(   906.13), SIMDE_FLOAT32_C(  -438.81),
        SIMDE_FLOAT32_C(    17.74), SIMDE_FLOAT32_C(   559.71), SIMDE_FLOAT32_C(  -313.51), SIMDE_FLOAT32_C(   154.94) },
      { SIMDE_FLOAT32_C(  -870.50), SIMDE_FLOAT32_C(  -720.59), SIMDE_FLOAT32_C(   906.13), SIMDE_FLOAT32_C(  -438.81),
        SIMDE_FLOAT32_C(    17.74), SIMDE_FLOAT32_C(   559.71), SIMDE_FLOAT32_C(  -313.51), SIMDE_FLOAT32_C(   154.94) } },
    { { SIMDE_FLOAT32_C(   135.22), SIMDE_FLOAT32_C(   457.42), SIMDE_FLOAT32_C(  -803.87), SIMDE_FLOAT32_C(   811.80),
        SIMDE_FLOAT32_C(  -753.03), SIMDE_FLOAT32_C(   966.83), SIMDE_FLOAT32_C(   331.60), SIMDE_FLOAT32_C(   794.61) },
      { SIMDE_FLOAT32_C(   135.22), SIMDE_FLOAT32_C(   457.42), SIMDE_FLOAT32_C(  -803.87), SIMDE_FLOAT32_C(   811.80),
        SIMDE_FLOAT32_C(  -753.03), SIMDE_FLOAT32_C(   966.83), SIMDE_FLOAT32_C(   331.60), SIMDE_FLOAT32_C(   794.61) } },
    { { SIMDE_FLOAT32_C(  -313.86), SIMDE_FLOAT32_C(   797.67), SIMDE_FLOAT32_C(  -679.93), SIMDE_FLOAT32_C(   794.30),
        SIMDE_FLOAT32_C(   743.22), SIMDE_FLOAT32_C(  -258.75), SIMDE_FLOAT32_C(   704.02), SIMDE_FLOAT32_C(   455.31) },
      { SIMDE_FLOAT32_C(  -313.86), SIMDE_FLOAT32_C(   797.67), SIMDE_FLOAT32_C(  -679.93), SIMDE_FLOAT32_C(   794.30),
        SIMDE_FLOAT32_C(   743.22), SIMDE_FLOAT32_C(  -258.75), SIMDE_FLOAT32_C(   704.02), SIMDE_FLOAT32_C(   455.31) } },
    { { SIMDE_FLOAT32_C(   890.51), SIMDE_FLOAT32_C(  -422.54), SIMDE_FLOAT32_C(   733.20), SIMDE_FLOAT32_C(   -59.98),
        SIMDE_FLOAT32_C(  -792.37), SIMDE_FLOAT32_C(  -111.68), SIMDE_FLOAT32_C(  -147.69), SIMDE_FLOAT32_C(  -662.87) },
      { SIMDE_FLOAT32_C(   890.51), SIMDE_FLOAT32_C(  -422.54), SIMDE_FLOAT32_C(   733.20), SIMDE_FLOAT32_C(   -59.98),
        SIMDE_FLOAT32_C(  -792.37), SIMDE_FLOAT32_C(  -111.68), SIMDE_FLOAT32_C(  -147.69), SIMDE_FLOAT32_C(  -662.87) } },
    { { SIMDE_FLOAT32_C(   167.73), SIMDE_FLOAT32_C(  -241.56), SIMDE_FLOAT32_C(  -101.68), SIMDE_FLOAT32_C(  -814.53),
        SIMDE_FLOAT32_C(  -681.85), SIMDE_FLOAT32_C(   584.81), SIMDE_FLOAT32_C(   340.41), SIMDE_FLOAT32_C(   453.36) },
      { SIMDE_FLOAT32_C(   167.73), SIMDE_FLOAT32_C(  -241.56), SIMDE_FLOAT32_C(  -101.68), SIMDE_FLOAT32_C(  -814.53),
        SIMDE_FLOAT32_C(  -681.85), SIMDE_FLOAT32_C(   584.81), SIMDE_FLOAT32_C(   340.41), SIMDE_FLOAT32_C(   453.36) } },
    { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde_float32* a = test_vec[i].a;
    simde__m256 r = simde_mm256_set_ps(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0]);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_set1_epi8(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int8_t a;
    simde__m256i r;
  } test_vec[8] = {
    { 41,
      simde_mm256_set_epi8(INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41),
                           INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41),
                           INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41),
                           INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41),
                           INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41),
                           INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41),
                           INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41),
                           INT8_C(  41), INT8_C(  41), INT8_C(  41), INT8_C(  41)) },
    { -5,
      simde_mm256_set_epi8(INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5),
                           INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5),
                           INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5),
                           INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5),
                           INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5),
                           INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5),
                           INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5),
                           INT8_C(  -5), INT8_C(  -5), INT8_C(  -5), INT8_C(  -5)) },
    { -85,
      simde_mm256_set_epi8(INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85),
                           INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85),
                           INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85),
                           INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85),
                           INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85),
                           INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85),
                           INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85),
                           INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85)) },
    { 61,
      simde_mm256_set_epi8(INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61),
                           INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61),
                           INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61),
                           INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61),
                           INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61),
                           INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61),
                           INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61),
                           INT8_C(  61), INT8_C(  61), INT8_C(  61), INT8_C(  61)) },
    { -125,
      simde_mm256_set_epi8(INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125),
                           INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125),
                           INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125),
                           INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125),
                           INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125),
                           INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125),
                           INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125),
                           INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125)) },
    { -117,
      simde_mm256_set_epi8(INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
                           INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
                           INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
                           INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
                           INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
                           INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
                           INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
                           INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117)) },
    { 5,
      simde_mm256_set_epi8(INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5),
                           INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5),
                           INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5),
                           INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5),
                           INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5),
                           INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5),
                           INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5),
                           INT8_C(   5), INT8_C(   5), INT8_C(   5), INT8_C(   5)) },
    { -41,
      simde_mm256_set_epi8(INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41),
                           INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41),
                           INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41),
                           INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41),
                           INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41),
                           INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41),
                           INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41),
                           INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_set1_epi8(test_vec[i].a);
    simde_assert_m256i_i8(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_set1_epi16(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int16_t a;
    simde__m256i r;
  } test_vec[8] = {
    { INT16_C( -7117),
      simde_mm256_set_epi16(INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117),
                            INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117),
                            INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117),
                            INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117)) },
    { INT16_C( -4832),
      simde_mm256_set_epi16(INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832),
                            INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832),
                            INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832),
                            INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832)) },
    { INT16_C( 20615),
      simde_mm256_set_epi16(INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615),
                            INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615),
                            INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615),
                            INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615)) },
    { INT16_C(-30117),
      simde_mm256_set_epi16(INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117),
                            INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117),
                            INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117),
                            INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117)) },
    { INT16_C( 32088),
      simde_mm256_set_epi16(INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088),
                            INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088),
                            INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088),
                            INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088)) },
    { INT16_C(-14076),
      simde_mm256_set_epi16(INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076),
                            INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076),
                            INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076),
                            INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076)) },
    { INT16_C( -9132),
      simde_mm256_set_epi16(INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132),
                            INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132),
                            INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132),
                            INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132)) },
    { INT16_C(-24801),
      simde_mm256_set_epi16(INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801),
                            INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801),
                            INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801),
                            INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_set1_epi16(test_vec[i].a);
    simde_assert_m256i_i16(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_set1_epi32(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int32_t a;
    simde__m256i r;
  } test_vec[8] = {
    { INT32_C( 1458307866),
      simde_mm256_set_epi32(INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866),
                            INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866)) },
    { INT32_C(-1231481357),
      simde_mm256_set_epi32(INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357),
                            INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357)) },
    { INT32_C( 1330347041),
      simde_mm256_set_epi32(INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041),
                            INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041)) },
    { INT32_C(-2031969158),
      simde_mm256_set_epi32(INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158),
                            INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158)) },
    { INT32_C(  138293031),
      simde_mm256_set_epi32(INT32_C(  138293031), INT32_C(  138293031), INT32_C(  138293031), INT32_C(  138293031),
                            INT32_C(  138293031), INT32_C(  138293031), INT32_C(  138293031), INT32_C(  138293031)) },
    { INT32_C( -870589211),
      simde_mm256_set_epi32(INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211),
                            INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211)) },
    { INT32_C( 1439279097),
      simde_mm256_set_epi32(INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097),
                            INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097)) },
    { INT32_C( -102725779),
      simde_mm256_set_epi32(INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779),
                            INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_set1_epi32(test_vec[i].a);
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_set1_epi64x(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int64_t a;
    simde__m256i r;
  } test_vec[8] = {
    { INT64_C( 5105791061004147197),
      simde_mm256_set_epi64x(INT64_C( 5105791061004147197), INT64_C( 5105791061004147197),
                             INT64_C( 5105791061004147197), INT64_C( 5105791061004147197)) },
    { INT64_C(-2078502026959165134),
      simde_mm256_set_epi64x(INT64_C(-2078502026959165134), INT64_C(-2078502026959165134),
                             INT64_C(-2078502026959165134), INT64_C(-2078502026959165134)) },
    { INT64_C( 3468007801991671414),
      simde_mm256_set_epi64x(INT64_C( 3468007801991671414), INT64_C( 3468007801991671414),
                             INT64_C( 3468007801991671414), INT64_C( 3468007801991671414)) },
    { INT64_C(-1026830878024479084),
      simde_mm256_set_epi64x(INT64_C(-1026830878024479084), INT64_C(-1026830878024479084),
                             INT64_C(-1026830878024479084), INT64_C(-1026830878024479084)) },
    { INT64_C(-5815393469667065909),
      simde_mm256_set_epi64x(INT64_C(-5815393469667065909), INT64_C(-5815393469667065909),
                             INT64_C(-5815393469667065909), INT64_C(-5815393469667065909)) },
    { INT64_C(-1858841878581774308),
      simde_mm256_set_epi64x(INT64_C(-1858841878581774308), INT64_C(-1858841878581774308),
                             INT64_C(-1858841878581774308), INT64_C(-1858841878581774308)) },
    { INT64_C( 6272125310275044670),
      simde_mm256_set_epi64x(INT64_C( 6272125310275044670), INT64_C( 6272125310275044670),
                             INT64_C( 6272125310275044670), INT64_C( 6272125310275044670)) },
    { INT64_C( 6393396529564376044),
      simde_mm256_set_epi64x(INT64_C( 6393396529564376044), INT64_C( 6393396529564376044),
                             INT64_C( 6393396529564376044), INT64_C( 6393396529564376044)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_set1_epi64x(test_vec[i].a);
    simde_assert_m256i_i64(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_set1_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float32 a;
    simde__m256 r;
  } test_vec[10] = {
    { SIMDE_FLOAT32_C(  -73.91),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -73.91), SIMDE_FLOAT32_C(  -73.91),
                         SIMDE_FLOAT32_C(  -73.91), SIMDE_FLOAT32_C(  -73.91),
                         SIMDE_FLOAT32_C(  -73.91), SIMDE_FLOAT32_C(  -73.91),
                         SIMDE_FLOAT32_C(  -73.91), SIMDE_FLOAT32_C(  -73.91)) },
    { SIMDE_FLOAT32_C(  953.36),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  953.36), SIMDE_FLOAT32_C(  953.36),
                         SIMDE_FLOAT32_C(  953.36), SIMDE_FLOAT32_C(  953.36),
                         SIMDE_FLOAT32_C(  953.36), SIMDE_FLOAT32_C(  953.36),
                         SIMDE_FLOAT32_C(  953.36), SIMDE_FLOAT32_C(  953.36)) },
    { SIMDE_FLOAT32_C(  888.18),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  888.18), SIMDE_FLOAT32_C(  888.18),
                         SIMDE_FLOAT32_C(  888.18), SIMDE_FLOAT32_C(  888.18),
                         SIMDE_FLOAT32_C(  888.18), SIMDE_FLOAT32_C(  888.18),
                         SIMDE_FLOAT32_C(  888.18), SIMDE_FLOAT32_C(  888.18)) },
    { SIMDE_FLOAT32_C(  232.71),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  232.71), SIMDE_FLOAT32_C(  232.71),
                         SIMDE_FLOAT32_C(  232.71), SIMDE_FLOAT32_C(  232.71),
                         SIMDE_FLOAT32_C(  232.71), SIMDE_FLOAT32_C(  232.71),
                         SIMDE_FLOAT32_C(  232.71), SIMDE_FLOAT32_C(  232.71)) },
    { SIMDE_FLOAT32_C( -598.38),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38),
                         SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38),
                         SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38),
                         SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38)) },
    { SIMDE_FLOAT32_C(  762.88),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  762.88), SIMDE_FLOAT32_C(  762.88),
                         SIMDE_FLOAT32_C(  762.88), SIMDE_FLOAT32_C(  762.88),
                         SIMDE_FLOAT32_C(  762.88), SIMDE_FLOAT32_C(  762.88),
                         SIMDE_FLOAT32_C(  762.88), SIMDE_FLOAT32_C(  762.88)) },
    { SIMDE_FLOAT32_C( -225.89),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89),
                         SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89),
                         SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89),
                         SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89)) },
    { SIMDE_FLOAT32_C( -115.32),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32),
                         SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32),
                         SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32),
                         SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32)) },
    { SIMDE_FLOAT32_C(-2147483650.0),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0),
                         SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0),
                         SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0),
                         SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0)) },
    { SIMDE_FLOAT32_C(2147483649.0),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0),
                         SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0),
                         SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0),
                         SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_set1_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_set1_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float64 a;
    simde__m256d r;
  } test_vec[10] = {
    { SIMDE_FLOAT64_C( -494.25),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -494.25), SIMDE_FLOAT64_C( -494.25),
                         SIMDE_FLOAT64_C( -494.25), SIMDE_FLOAT64_C( -494.25)) },
    { SIMDE_FLOAT64_C(  610.20),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  610.20), SIMDE_FLOAT64_C(  610.20),
                         SIMDE_FLOAT64_C(  610.20), SIMDE_FLOAT64_C(  610.20)) },
    { SIMDE_FLOAT64_C( -471.35),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -471.35), SIMDE_FLOAT64_C( -471.35),
                         SIMDE_FLOAT64_C( -471.35), SIMDE_FLOAT64_C( -471.35)) },
    { SIMDE_FLOAT64_C( -211.28),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -211.28), SIMDE_FLOAT64_C( -211.28),
                         SIMDE_FLOAT64_C( -211.28), SIMDE_FLOAT64_C( -211.28)) },
    { SIMDE_FLOAT64_C(  -27.80),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -27.80), SIMDE_FLOAT64_C(  -27.80),
                         SIMDE_FLOAT64_C(  -27.80), SIMDE_FLOAT64_C(  -27.80)) },
    { SIMDE_FLOAT64_C(  866.02),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  866.02), SIMDE_FLOAT64_C(  866.02),
                         SIMDE_FLOAT64_C(  866.02), SIMDE_FLOAT64_C(  866.02)) },
    { SIMDE_FLOAT64_C( -278.22),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -278.22), SIMDE_FLOAT64_C( -278.22),
                         SIMDE_FLOAT64_C( -278.22), SIMDE_FLOAT64_C( -278.22)) },
    { SIMDE_FLOAT64_C(  868.90),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  868.90), SIMDE_FLOAT64_C(  868.90),
                         SIMDE_FLOAT64_C(  868.90), SIMDE_FLOAT64_C(  868.90)) },
    { SIMDE_FLOAT64_C(  -2147483650.0),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -2147483650.0), SIMDE_FLOAT64_C(  -2147483650.0),
                         SIMDE_FLOAT64_C(  -2147483650.0), SIMDE_FLOAT64_C(  -2147483650.0)) },
    { SIMDE_FLOAT64_C(  2147483649.0),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  2147483649.0), SIMDE_FLOAT64_C(  2147483649.0),
                         SIMDE_FLOAT64_C(  2147483649.0), SIMDE_FLOAT64_C(  2147483649.0)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_set1_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveeven_epi16 (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const int16_t a[16];
    const int16_t b[16];
    const int16_t r[16];
  } test_vec[] = {
    { { -INT16_C( 29098),  INT16_C( 21049),  INT16_C( 28104),  INT16_C(  2629), -INT16_C( 20812), -INT16_C(  9859), -INT16_C( 24603), -INT16_C( 22056),
        -INT16_C(  6828),  INT16_C(  2737),  INT16_C( 22872), -INT16_C(   974), -INT16_C( 27405),  INT16_C( 22221),  INT16_C(  1092), -INT16_C( 26097) },
      {  INT16_C( 18834),  INT16_C( 23277),  INT16_C( 12982),  INT16_C( 27236), -INT16_C(  7711), -INT16_C( 14781),  INT16_C(  7040), -INT16_C( 10896),
         INT16_C(  8448),  INT16_C( 23007),  INT16_C(  4731),  INT16_C( 28245),  INT16_C(  8870), -INT16_C(  5436), -INT16_C( 11226), -INT16_C( 18300) },
      { -INT16_C( 29098),  INT16_C( 28104), -INT16_C( 20812), -INT16_C( 24603),  INT16_C( 18834),  INT16_C( 12982), -INT16_C(  7711),  INT16_C(  7040),
        -INT16_C(  6828),  INT16_C( 22872), -INT16_C( 27405),  INT16_C(  1092),  INT16_C(  8448),  INT16_C(  4731),  INT16_C(  8870), -INT16_C( 11226) } },
    { {  INT16_C( 28957), -INT16_C( 11502),  INT16_C( 30372), -INT16_C( 31427), -INT16_C( 32425), -INT16_C( 10165), -INT16_C( 17508), -INT16_C( 25171),
        -INT16_C( 29475),  INT16_C( 22774),  INT16_C( 19358),  INT16_C( 17606), -INT16_C( 30099), -INT16_C( 27602), -INT16_C( 19618),  INT16_C( 31564) },
      {  INT16_C( 24356), -INT16_C( 14258), -INT16_C( 29483),  INT16_C( 11597), -INT16_C( 26355), -INT16_C( 22267), -INT16_C( 19884),  INT16_C( 12614),
         INT16_C( 15422), -INT16_C(  8823),  INT16_C( 20359), -INT16_C(  2783),  INT16_C( 20698),  INT16_C( 14473), -INT16_C( 11005),  INT16_C( 10164) },
      {  INT16_C( 28957),  INT16_C( 30372), -INT16_C( 32425), -INT16_C( 17508),  INT16_C( 24356), -INT16_C( 29483), -INT16_C( 26355), -INT16_C( 19884),
        -INT16_C( 29475),  INT16_C( 19358), -INT16_C( 30099), -INT16_C( 19618),  INT16_C( 15422),  INT16_C( 20359),  INT16_C( 20698), -INT16_C( 11005) } },
    { {  INT16_C(   564),  INT16_C(  2800),  INT16_C( 15758), -INT16_C( 25801),  INT16_C( 15574),  INT16_C( 11077), -INT16_C( 29714),  INT16_C( 11356),
        -INT16_C(  6456),  INT16_C( 20233),  INT16_C( 11061),  INT16_C(  3908), -INT16_C( 12933),  INT16_C( 32328), -INT16_C(   861), -INT16_C( 10331) },
      { -INT16_C( 27138), -INT16_C( 29215),  INT16_C(  6355), -INT16_C( 22232),  INT16_C( 27988),  INT16_C( 17108),  INT16_C( 12793), -INT16_C( 16017),
         INT16_C( 30743),  INT16_C( 19472),  INT16_C( 21923),  INT16_C(  7772), -INT16_C( 23518), -INT16_C( 14948),  INT16_C( 17056), -INT16_C( 24931) },
      {  INT16_C(   564),  INT16_C( 15758),  INT16_C( 15574), -INT16_C( 29714), -INT16_C( 27138),  INT16_C(  6355),  INT16_C( 27988),  INT16_C( 12793),
        -INT16_C(  6456),  INT16_C( 11061), -INT16_C( 12933), -INT16_C(   861),  INT16_C( 30743),  INT16_C( 21923), -INT16_C( 23518),  INT16_C( 17056) } },
    { {  INT16_C( 32471), -INT16_C( 21973),  INT16_C( 21655), -INT16_C(  5292),  INT16_C( 10433), -INT16_C( 17874), -INT16_C( 25255),  INT16_C( 28795),
        -INT16_C( 29675), -INT16_C( 17987),  INT16_C(  6625),  INT16_C(   983),  INT16_C( 29885),  INT16_C( 24009),  INT16_C( 26294), -INT16_C( 29189) },
      {  INT16_C( 10212),  INT16_C( 31544), -INT16_C( 29573),  INT16_C( 15463), -INT16_C( 27212),  INT16_C(  3831),  INT16_C( 29234),  INT16_C( 18302),
         INT16_C( 15358), -INT16_C(  8448), -INT16_C( 10156),  INT16_C(  4579), -INT16_C( 21428),  INT16_C(   622),  INT16_C( 27154), -INT16_C(  2417) },
      {  INT16_C( 32471),  INT16_C( 21655),  INT16_C( 10433), -INT16_C( 25255),  INT16_C( 10212), -INT16_C( 29573), -INT16_C( 27212),  INT16_C( 29234),
        -INT16_C( 29675),  INT16_C(  6625),  INT16_C( 29885),  INT16_C( 26294),  INT16_C( 15358), -INT16_C( 10156), -INT16_C( 21428),  INT16_C( 27154) } },
    { { -INT16_C( 14447),  INT16_C(  3186), -INT16_C(  9901),  INT16_C(  2120),  INT16_C( 16238), -INT16_C( 24554), -INT16_C( 27470), -INT16_C( 20249),
        -INT16_C(  5936),  INT16_C(  9360),  INT16_C( 29632),  INT16_C(  3126), -INT16_C( 23521),  INT16_C( 12558), -INT16_C( 25330), -INT16_C( 24793) },
      { -INT16_C( 26267), -INT16_C( 18261), -INT16_C(  2958), -INT16_C(  8000), -INT16_C( 10701), -INT16_C(  6784),  INT16_C( 26731),  INT16_C( 15254),
         INT16_C(  9808),  INT16_C(  4191), -INT16_C( 27239), -INT16_C( 18404),  INT16_C( 10810),  INT16_C( 18665),  INT16_C(  4295),  INT16_C( 11496) },
      { -INT16_C( 14447), -INT16_C(  9901),  INT16_C( 16238), -INT16_C( 27470), -INT16_C( 26267), -INT16_C(  2958), -INT16_C( 10701),  INT16_C( 26731),
        -INT16_C(  5936),  INT16_C( 29632), -INT16_C( 23521), -INT16_C( 25330),  INT16_C(  9808), -INT16_C( 27239),  INT16_C( 10810),  INT16_C(  4295) } },
    { { -INT16_C( 27734),  INT16_C(  7397), -INT16_C( 23161), -INT16_C( 17411),  INT16_C( 32124), -INT16_C(  6240),  INT16_C( 14053),  INT16_C( 13602),
        -INT16_C( 32420), -INT16_C(  2747),  INT16_C( 24855),  INT16_C( 20909), -INT16_C( 26997),  INT16_C( 21401), -INT16_C( 32345),  INT16_C( 20863) },
      {  INT16_C( 25621), -INT16_C( 25491),  INT16_C( 27146), -INT16_C( 31145), -INT16_C(  1816), -INT16_C( 12947), -INT16_C( 28882), -INT16_C( 29949),
         INT16_C( 18448),  INT16_C( 10112),  INT16_C( 11946),  INT16_C( 13688),  INT16_C(  4804),  INT16_C( 27528),  INT16_C(  2195), -INT16_C( 22340) },
      { -INT16_C( 27734), -INT16_C( 23161),  INT16_C( 32124),  INT16_C( 14053),  INT16_C( 25621),  INT16_C( 27146), -INT16_C(  1816), -INT16_C( 28882),
        -INT16_C( 32420),  INT16_C( 24855), -INT16_C( 26997), -INT16_C( 32345),  INT16_C( 18448),  INT16_C( 11946),  INT16_C(  4804),  INT16_C(  2195) } },
    { {  INT16_C( 10860),  INT16_C( 30277), -INT16_C( 25452),  INT16_C( 31996),  INT16_C( 27028), -INT16_C( 15542),  INT16_C( 19960),  INT16_C(  2382),
        -INT16_C( 12651),  INT16_C( 16176), -INT16_C( 22020), -INT16_C( 16011), -INT16_C(   581),  INT16_C( 20012), -INT16_C(  5883),  INT16_C( 29431) },
      {  INT16_C( 15379), -INT16_C( 22552), -INT16_C(  6696),  INT16_C( 27940),  INT16_C( 28238),  INT16_C( 18224),  INT16_C( 32443),  INT16_C( 20560),
        -INT16_C( 32692),  INT16_C( 18832),  INT16_C(  1321), -INT16_C(  7158),  INT16_C( 13826),  INT16_C(  2099),  INT16_C( 10783),  INT16_C( 12922) },
      {  INT16_C( 10860), -INT16_C( 25452),  INT16_C( 27028),  INT16_C( 19960),  INT16_C( 15379), -INT16_C(  6696),  INT16_C( 28238),  INT16_C( 32443),
        -INT16_C( 12651), -INT16_C( 22020), -INT16_C(   581), -INT16_C(  5883), -INT16_C( 32692),  INT16_C(  1321),  INT16_C( 13826),  INT16_C( 10783) } },
    { {  INT16_C( 25190),  INT16_C( 16090), -INT16_C(   441), -INT16_C( 26965), -INT16_C(  9364),  INT16_C( 10205),  INT16_C( 11609), -INT16_C( 22921),
         INT16_C(  1965), -INT16_C( 10257), -INT16_C(  1780),  INT16_C(  4027), -INT16_C(  4561),  INT16_C( 20247), -INT16_C( 28392),  INT16_C( 32385) },
      {  INT16_C( 23539),  INT16_C( 15293),  INT16_C( 26713), -INT16_C( 14895), -INT16_C( 20924), -INT16_C( 25108),  INT16_C( 25819), -INT16_C( 30653),
         INT16_C( 12907),  INT16_C( 30815),  INT16_C(  6955),  INT16_C( 23431), -INT16_C( 25079),  INT16_C(  8874),  INT16_C( 11055),  INT16_C(  8864) },
      {  INT16_C( 25190), -INT16_C(   441), -INT16_C(  9364),  INT16_C( 11609),  INT16_C( 23539),  INT16_C( 26713), -INT16_C( 20924),  INT16_C( 25819),
         INT16_C(  1965), -INT16_C(  1780), -INT16_C(  4561), -INT16_C( 28392),  INT16_C( 12907),  INT16_C(  6955), -INT16_C( 25079),  INT16_C( 11055) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a);
    simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b);
    simde__m256i r = simde_x_mm256_deinterleaveeven_epi16(a, b);
    simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r));
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveodd_epi16 (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const int16_t a[16];
    const int16_t b[16];
    const int16_t r[16];
  } test_vec[] = {
    { {  INT16_C( 15247), -INT16_C( 26208), -INT16_C( 25489),  INT16_C( 16456), -INT16_C( 16003),  INT16_C( 13526),  INT16_C( 13559), -INT16_C( 25115),
        -INT16_C(  9262),  INT16_C(  3203), -INT16_C( 31835),  INT16_C(   158),  INT16_C( 13038), -INT16_C( 14959), -INT16_C( 10715), -INT16_C( 19425) },
      { -INT16_C( 16623), -INT16_C( 32435), -INT16_C( 27301), -INT16_C( 10047), -INT16_C( 26538),  INT16_C( 19725), -INT16_C(  3380), -INT16_C( 24854),
         INT16_C( 28110),  INT16_C( 29610),  INT16_C( 18929), -INT16_C(  8333),  INT16_C(  1147), -INT16_C( 24412), -INT16_C( 15398), -INT16_C(  5292) },
      { -INT16_C( 26208),  INT16_C( 16456),  INT16_C( 13526), -INT16_C( 25115), -INT16_C( 32435), -INT16_C( 10047),  INT16_C( 19725), -INT16_C( 24854),
         INT16_C(  3203),  INT16_C(   158), -INT16_C( 14959), -INT16_C( 19425),  INT16_C( 29610), -INT16_C(  8333), -INT16_C( 24412), -INT16_C(  5292) } },
    { { -INT16_C( 24190), -INT16_C(  8852),  INT16_C( 11830), -INT16_C( 29258), -INT16_C( 15418), -INT16_C( 27942), -INT16_C( 14923), -INT16_C( 31952),
        -INT16_C(  9678),  INT16_C(  9206),  INT16_C( 26915), -INT16_C( 24829), -INT16_C( 22675),  INT16_C( 18239), -INT16_C( 27541), -INT16_C(  4813) },
      { -INT16_C( 24779),  INT16_C( 27851), -INT16_C( 32307), -INT16_C( 27655), -INT16_C( 11452), -INT16_C(  1755),  INT16_C( 21912), -INT16_C( 13443),
         INT16_C( 29488),  INT16_C( 21486), -INT16_C(  3619),  INT16_C( 19186),  INT16_C( 12953),  INT16_C(  1170), -INT16_C( 14906), -INT16_C(  1039) },
      { -INT16_C(  8852), -INT16_C( 29258), -INT16_C( 27942), -INT16_C( 31952),  INT16_C( 27851), -INT16_C( 27655), -INT16_C(  1755), -INT16_C( 13443),
         INT16_C(  9206), -INT16_C( 24829),  INT16_C( 18239), -INT16_C(  4813),  INT16_C( 21486),  INT16_C( 19186),  INT16_C(  1170), -INT16_C(  1039) } },
    { { -INT16_C( 17308),  INT16_C( 12903),  INT16_C( 24637), -INT16_C( 32315), -INT16_C(  5324), -INT16_C( 13189), -INT16_C(  1984),  INT16_C( 28823),
        -INT16_C( 31125),  INT16_C( 18628), -INT16_C( 18825),  INT16_C(  4243),  INT16_C(  9704), -INT16_C( 20972),  INT16_C(  1770),  INT16_C( 20138) },
      {  INT16_C(  4546),  INT16_C(   128),  INT16_C( 18034), -INT16_C( 22911), -INT16_C(   975),  INT16_C( 29042),  INT16_C(  2804),  INT16_C( 24802),
        -INT16_C( 22896),  INT16_C(  1960),  INT16_C( 15196),  INT16_C( 17688),  INT16_C( 11360),  INT16_C( 19187), -INT16_C( 25294), -INT16_C(  2663) },
      {  INT16_C( 12903), -INT16_C( 32315), -INT16_C( 13189),  INT16_C( 28823),  INT16_C(   128), -INT16_C( 22911),  INT16_C( 29042),  INT16_C( 24802),
         INT16_C( 18628),  INT16_C(  4243), -INT16_C( 20972),  INT16_C( 20138),  INT16_C(  1960),  INT16_C( 17688),  INT16_C( 19187), -INT16_C(  2663) } },
    { {  INT16_C(  6575),  INT16_C(  8693),  INT16_C( 30303), -INT16_C( 28473),  INT16_C( 14707),  INT16_C( 26370), -INT16_C(  7101), -INT16_C( 11321),
         INT16_C( 28810), -INT16_C(  6437), -INT16_C(  3157),  INT16_C(  3115),  INT16_C(  7967),  INT16_C( 21078), -INT16_C(  4164),  INT16_C( 27463) },
      {  INT16_C( 15369),  INT16_C( 26764),  INT16_C( 21426),  INT16_C(  9721), -INT16_C(  1139), -INT16_C( 12147),  INT16_C( 21727),  INT16_C( 27044),
         INT16_C( 32708),  INT16_C( 28751),  INT16_C( 31602), -INT16_C( 28292), -INT16_C( 11622),  INT16_C( 22243),  INT16_C( 10946), -INT16_C( 13374) },
      {  INT16_C(  8693), -INT16_C( 28473),  INT16_C( 26370), -INT16_C( 11321),  INT16_C( 26764),  INT16_C(  9721), -INT16_C( 12147),  INT16_C( 27044),
        -INT16_C(  6437),  INT16_C(  3115),  INT16_C( 21078),  INT16_C( 27463),  INT16_C( 28751), -INT16_C( 28292),  INT16_C( 22243), -INT16_C( 13374) } },
    { {  INT16_C( 20070),  INT16_C(  6451),  INT16_C( 11426),  INT16_C( 12094), -INT16_C( 13529),  INT16_C(  1791), -INT16_C( 23776), -INT16_C(  7057),
        -INT16_C( 16606), -INT16_C( 27564), -INT16_C( 12230), -INT16_C( 11226),  INT16_C(  2467),  INT16_C( 25898), -INT16_C(  5068), -INT16_C( 26064) },
      {  INT16_C( 25403), -INT16_C(  8781), -INT16_C(  3440), -INT16_C( 18676),  INT16_C(  3005), -INT16_C(  8770),  INT16_C( 11695), -INT16_C( 11838),
         INT16_C(  5868),  INT16_C(  9830), -INT16_C( 29465), -INT16_C( 29958),  INT16_C(  9621), -INT16_C( 13841),  INT16_C(  7953),  INT16_C( 19556) },
      {  INT16_C(  6451),  INT16_C( 12094),  INT16_C(  1791), -INT16_C(  7057), -INT16_C(  8781), -INT16_C( 18676), -INT16_C(  8770), -INT16_C( 11838),
        -INT16_C( 27564), -INT16_C( 11226),  INT16_C( 25898), -INT16_C( 26064),  INT16_C(  9830), -INT16_C( 29958), -INT16_C( 13841),  INT16_C( 19556) } },
    { {  INT16_C(  6018),  INT16_C(  4649),  INT16_C( 13577), -INT16_C( 14390), -INT16_C( 30655), -INT16_C(  3932),  INT16_C( 26293), -INT16_C( 23871),
         INT16_C( 10109),  INT16_C( 25800), -INT16_C( 15437),  INT16_C( 18926), -INT16_C(  8728), -INT16_C(  1774),  INT16_C( 30460),  INT16_C( 32326) },
      {  INT16_C( 28558), -INT16_C( 26735),  INT16_C( 23461), -INT16_C(  6562),  INT16_C(   995), -INT16_C( 26410), -INT16_C( 26775), -INT16_C(  6598),
         INT16_C(   959),  INT16_C( 29258),  INT16_C( 14534), -INT16_C( 20805), -INT16_C( 12779),  INT16_C(  4519), -INT16_C(  4796), -INT16_C( 11632) },
      {  INT16_C(  4649), -INT16_C( 14390), -INT16_C(  3932), -INT16_C( 23871), -INT16_C( 26735), -INT16_C(  6562), -INT16_C( 26410), -INT16_C(  6598),
         INT16_C( 25800),  INT16_C( 18926), -INT16_C(  1774),  INT16_C( 32326),  INT16_C( 29258), -INT16_C( 20805),  INT16_C(  4519), -INT16_C( 11632) } },
    { {  INT16_C(  8541),  INT16_C(   618), -INT16_C( 14212),  INT16_C( 24552), -INT16_C( 16693),  INT16_C( 13815),  INT16_C( 12885),  INT16_C(  5147),
         INT16_C( 26165), -INT16_C(  1145),  INT16_C( 17054), -INT16_C( 19287),  INT16_C( 20496),  INT16_C( 21957),  INT16_C( 21822), -INT16_C( 25817) },
      { -INT16_C( 28298), -INT16_C(  3427), -INT16_C( 31398),  INT16_C(  9553),  INT16_C( 18755), -INT16_C( 26534),  INT16_C( 30331), -INT16_C( 20307),
         INT16_C( 13532),  INT16_C( 31403),  INT16_C( 21622), -INT16_C( 30930), -INT16_C(  2908), -INT16_C(  7460),  INT16_C(   841), -INT16_C( 16259) },
      {  INT16_C(   618),  INT16_C( 24552),  INT16_C( 13815),  INT16_C(  5147), -INT16_C(  3427),  INT16_C(  9553), -INT16_C( 26534), -INT16_C( 20307),
        -INT16_C(  1145), -INT16_C( 19287),  INT16_C( 21957), -INT16_C( 25817),  INT16_C( 31403), -INT16_C( 30930), -INT16_C(  7460), -INT16_C( 16259) } },
    { {  INT16_C(  6805), -INT16_C(  4174),  INT16_C(  1183), -INT16_C(  7660),  INT16_C( 28493), -INT16_C( 14213),  INT16_C( 10469), -INT16_C( 16008),
         INT16_C(  9052), -INT16_C( 11717),  INT16_C( 27255),  INT16_C(  7001),  INT16_C( 13662), -INT16_C( 22530),  INT16_C( 31545), -INT16_C( 12697) },
      {  INT16_C(  6806),  INT16_C( 13757), -INT16_C( 12002),  INT16_C( 27416), -INT16_C( 27840),  INT16_C(  9523), -INT16_C( 21573),  INT16_C(  6118),
         INT16_C(  8910),  INT16_C( 17897),  INT16_C( 17292), -INT16_C(  5536),  INT16_C( 24184), -INT16_C( 20079), -INT16_C(  1574),  INT16_C( 28799) },
      { -INT16_C(  4174), -INT16_C(  7660), -INT16_C( 14213), -INT16_C( 16008),  INT16_C( 13757),  INT16_C( 27416),  INT16_C(  9523),  INT16_C(  6118),
        -INT16_C( 11717),  INT16_C(  7001), -INT16_C( 22530), -INT16_C( 12697),  INT16_C( 17897), -INT16_C(  5536), -INT16_C( 20079),  INT16_C( 28799) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a);
    simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b);
    simde__m256i r = simde_x_mm256_deinterleaveodd_epi16(a, b);
    simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r));
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveeven_epi32 (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const int32_t a[8];
    const int32_t b[8];
    const int32_t r[8];
  } test_vec[] = {
    { {  INT32_C(   504841746), -INT32_C(   184668304),  INT32_C(  2075750092), -INT32_C(  1750132198), -INT32_C(  1448517056), -INT32_C(   409880474), -INT32_C(  1199547026),  INT32_C(   732426777) },
      { -INT32_C(  1454784967), -INT32_C(  1164097298), -INT32_C(   785033546), -INT32_C(  1318525839), -INT32_C(  1386606010),  INT32_C(   949283786),  INT32_C(  1643124040),  INT32_C(  1082955783) },
      {  INT32_C(   504841746),  INT32_C(  2075750092), -INT32_C(  1454784967), -INT32_C(   785033546), -INT32_C(  1448517056), -INT32_C(  1199547026), -INT32_C(  1386606010),  INT32_C(  1643124040) } },
    { {  INT32_C(  1156240982), -INT32_C(   721516770),  INT32_C(  1336226782),  INT32_C(  1577061911), -INT32_C(   368354784), -INT32_C(  1927045307), -INT32_C(  1125182540),  INT32_C(    50101163) },
      {  INT32_C(  1866917457),  INT32_C(  1279542638), -INT32_C(  1868830344),  INT32_C(   401513463),  INT32_C(   973273589),  INT32_C(  1304896920), -INT32_C(   469125576), -INT32_C(  2082077390) },
      {  INT32_C(  1156240982),  INT32_C(  1336226782),  INT32_C(  1866917457), -INT32_C(  1868830344), -INT32_C(   368354784), -INT32_C(  1125182540),  INT32_C(   973273589), -INT32_C(   469125576) } },
    { {  INT32_C(  1525886188), -INT32_C(   358205583),  INT32_C(   410665248), -INT32_C(   785422116), -INT32_C(   116706975), -INT32_C(  1891184042), -INT32_C(  1150070903),  INT32_C(  1094605141) },
      { -INT32_C(   140824187), -INT32_C(  1981726360),  INT32_C(  1587633026),  INT32_C(   607113411),  INT32_C(  1478310402), -INT32_C(  1779997684),  INT32_C(   139483827),  INT32_C(   961122227) },
      {  INT32_C(  1525886188),  INT32_C(   410665248), -INT32_C(   140824187),  INT32_C(  1587633026), -INT32_C(   116706975), -INT32_C(  1150070903),  INT32_C(  1478310402),  INT32_C(   139483827) } },
    { {  INT32_C(   691070144), -INT32_C(  1481502427),  INT32_C(   788878188),  INT32_C(   626209827),  INT32_C(  2071883886), -INT32_C(  2012191276),  INT32_C(  1938842048), -INT32_C(  1330849040) },
      { -INT32_C(   455484226),  INT32_C(  1502317549),  INT32_C(    42504670),  INT32_C(   875027397),  INT32_C(   548382028), -INT32_C(   878133493),  INT32_C(   272513312), -INT32_C(   775820525) },
      {  INT32_C(   691070144),  INT32_C(   788878188), -INT32_C(   455484226),  INT32_C(    42504670),  INT32_C(  2071883886),  INT32_C(  1938842048),  INT32_C(   548382028),  INT32_C(   272513312) } },
    { { -INT32_C(  1246389561),  INT32_C(    68043046), -INT32_C(  1761175598), -INT32_C(  1093980558), -INT32_C(   555779373),  INT32_C(  1521059642), -INT32_C(   747902784), -INT32_C(  1700451117) },
      { -INT32_C(   330343738),  INT32_C(  1844534939),  INT32_C(  1728444405), -INT32_C(   114896859), -INT32_C(  2049505973),  INT32_C(  1306493324),  INT32_C(  1008749161),  INT32_C(  1037485430) },
      { -INT32_C(  1246389561), -INT32_C(  1761175598), -INT32_C(   330343738),  INT32_C(  1728444405), -INT32_C(   555779373), -INT32_C(   747902784), -INT32_C(  2049505973),  INT32_C(  1008749161) } },
    { { -INT32_C(  1154931168),  INT32_C(  2032736900),  INT32_C(   937438738),  INT32_C(  1227884286), -INT32_C(  1731327989), -INT32_C(   219828599),  INT32_C(  1865287160), -INT32_C(   341048117) },
      { -INT32_C(  1364798166),  INT32_C(    36163568), -INT32_C(    63371011),  INT32_C(   440756750), -INT32_C(    72215438), -INT32_C(  1158834238),  INT32_C(  1730747292),  INT32_C(  1246942495) },
      { -INT32_C(  1154931168),  INT32_C(   937438738), -INT32_C(  1364798166), -INT32_C(    63371011), -INT32_C(  1731327989),  INT32_C(  1865287160), -INT32_C(    72215438),  INT32_C(  1730747292) } },
    { { -INT32_C(  1678181973), -INT32_C(   962780984),  INT32_C(   918738472), -INT32_C(  1303378112), -INT32_C(   575864293),  INT32_C(   915970713), -INT32_C(   711081547),  INT32_C(  1109389463) },
      { -INT32_C(  1310910487),  INT32_C(  1601665591), -INT32_C(  1852425904),  INT32_C(  1547953729), -INT32_C(  2110066199),  INT32_C(  1102631563),  INT32_C(   706107027),  INT32_C(   795620678) },
      { -INT32_C(  1678181973),  INT32_C(   918738472), -INT32_C(  1310910487), -INT32_C(  1852425904), -INT32_C(   575864293), -INT32_C(   711081547), -INT32_C(  2110066199),  INT32_C(   706107027) } },
    { { -INT32_C(  2065675956),  INT32_C(   350443715), -INT32_C(   760907375),  INT32_C(  1227876448),  INT32_C(  1707829721), -INT32_C(   827947973),  INT32_C(   553237722),  INT32_C(  1028613617) },
      {  INT32_C(  1925263279),  INT32_C(   428254599),  INT32_C(  2129341214), -INT32_C(   305718764), -INT32_C(  1101884541), -INT32_C(   242354153), -INT32_C(  1525578060), -INT32_C(  1696374549) },
      { -INT32_C(  2065675956), -INT32_C(   760907375),  INT32_C(  1925263279),  INT32_C(  2129341214),  INT32_C(  1707829721),  INT32_C(   553237722), -INT32_C(  1101884541), -INT32_C(  1525578060) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a);
    simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b);
    simde__m256i r = simde_x_mm256_deinterleaveeven_epi32(a, b);
    simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r));
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveodd_epi32 (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const int32_t a[8];
    const int32_t b[8];
    const int32_t r[8];
  } test_vec[] = {
    { {  INT32_C(  1074233397),  INT32_C(   226594984), -INT32_C(  1186849467),  INT32_C(  1628384119), -INT32_C(   330208028),  INT32_C(  1002695508),  INT32_C(  1501784430), -INT32_C(   932890733) },
      {  INT32_C(  1611230648),  INT32_C(  1114540797),  INT32_C(   536588456), -INT32_C(  1082061861), -INT32_C(   877931913),  INT32_C(   738619069),  INT32_C(  1803913944),  INT32_C(  2033511361) },
      {  INT32_C(   226594984),  INT32_C(  1628384119),  INT32_C(  1114540797), -INT32_C(  1082061861),  INT32_C(  1002695508), -INT32_C(   932890733),  INT32_C(   738619069),  INT32_C(  2033511361) } },
    { {  INT32_C(  1440365912),  INT32_C(  1872251079), -INT32_C(   745565192),  INT32_C(   378671262), -INT32_C(  1595851294), -INT32_C(  2066945876),  INT32_C(   871321970), -INT32_C(  1800592580) },
      {  INT32_C(   686458720), -INT32_C(   946371889), -INT32_C(  1264966123),  INT32_C(   432679990),  INT32_C(   364489577),  INT32_C(    93947283),  INT32_C(   322537942),  INT32_C(   229107372) },
      {  INT32_C(  1872251079),  INT32_C(   378671262), -INT32_C(   946371889),  INT32_C(   432679990), -INT32_C(  2066945876), -INT32_C(  1800592580),  INT32_C(    93947283),  INT32_C(   229107372) } },
    { {  INT32_C(  1010143597),  INT32_C(   688114707),  INT32_C(   702389747),  INT32_C(   843229129), -INT32_C(   431424686),  INT32_C(  1475076480),  INT32_C(   392832106),  INT32_C(  1998852362) },
      { -INT32_C(  1229760093),  INT32_C(   417314341),  INT32_C(   474135635), -INT32_C(  1236302749),  INT32_C(    10262400), -INT32_C(   480802952), -INT32_C(  1225080404),  INT32_C(  1982734035) },
      {  INT32_C(   688114707),  INT32_C(   843229129),  INT32_C(   417314341), -INT32_C(  1236302749),  INT32_C(  1475076480),  INT32_C(  1998852362), -INT32_C(   480802952),  INT32_C(  1982734035) } },
    { { -INT32_C(  1674780297), -INT32_C(   340456296),  INT32_C(   738785224), -INT32_C(    69052549),  INT32_C(  1727823598), -INT32_C(  1320594683), -INT32_C(   395820267), -INT32_C(   664889759) },
      {  INT32_C(   259361399),  INT32_C(  1610295958), -INT32_C(  1668611295),  INT32_C(  1217949018), -INT32_C(   256994069), -INT32_C(    56428313), -INT32_C(  1645999557),  INT32_C(   393560736) },
      { -INT32_C(   340456296), -INT32_C(    69052549),  INT32_C(  1610295958),  INT32_C(  1217949018), -INT32_C(  1320594683), -INT32_C(   664889759), -INT32_C(    56428313),  INT32_C(   393560736) } },
    { {  INT32_C(  1663560397),  INT32_C(   901915156),  INT32_C(  2144488741), -INT32_C(  1513657670), -INT32_C(   443124226), -INT32_C(  1444792211), -INT32_C(   498678206), -INT32_C(   688276727) },
      { -INT32_C(  1170661210),  INT32_C(  1743846466),  INT32_C(    82231881),  INT32_C(   715762988), -INT32_C(  1878048989), -INT32_C(  1187384969), -INT32_C(  1046773832), -INT32_C(   510159557) },
      {  INT32_C(   901915156), -INT32_C(  1513657670),  INT32_C(  1743846466),  INT32_C(   715762988), -INT32_C(  1444792211), -INT32_C(   688276727), -INT32_C(  1187384969), -INT32_C(   510159557) } },
    { { -INT32_C(   124006219),  INT32_C(   375360460),  INT32_C(  2031765069),  INT32_C(   379831283),  INT32_C(  2057810691),  INT32_C(  1546969252), -INT32_C(  1692545184),  INT32_C(   444380260) },
      {  INT32_C(  1360140165), -INT32_C(   261656157), -INT32_C(  1419083337),  INT32_C(  1220611397),  INT32_C(  1707239616), -INT32_C(  1446906295),  INT32_C(   709156806),  INT32_C(   407158931) },
      {  INT32_C(   375360460),  INT32_C(   379831283), -INT32_C(   261656157),  INT32_C(  1220611397),  INT32_C(  1546969252),  INT32_C(   444380260), -INT32_C(  1446906295),  INT32_C(   407158931) } },
    { {  INT32_C(  2053789399),  INT32_C(  2137772488), -INT32_C(  1742023341), -INT32_C(  1545540382), -INT32_C(  1660378540),  INT32_C(  1598474649),  INT32_C(  1015646888),  INT32_C(   575983178) },
      { -INT32_C(   325272028), -INT32_C(   479459440), -INT32_C(  1082419492), -INT32_C(   681419902), -INT32_C(  1770755331), -INT32_C(   587875533), -INT32_C(  1877442747),  INT32_C(  1907518541) },
      {  INT32_C(  2137772488), -INT32_C(  1545540382), -INT32_C(   479459440), -INT32_C(   681419902),  INT32_C(  1598474649),  INT32_C(   575983178), -INT32_C(   587875533),  INT32_C(  1907518541) } },
    { { -INT32_C(  1151447509),  INT32_C(   849267286), -INT32_C(   470738592),  INT32_C(  1908036468), -INT32_C(   251121987),  INT32_C(   802029033), -INT32_C(   910170756),  INT32_C(  2084270417) },
      {  INT32_C(   355965375), -INT32_C(  1018636957),  INT32_C(  1655060974),  INT32_C(  1255432333),  INT32_C(  2017189007),  INT32_C(  1453787353),  INT32_C(  1059022573), -INT32_C(  1749329193) },
      {  INT32_C(   849267286),  INT32_C(  1908036468), -INT32_C(  1018636957),  INT32_C(  1255432333),  INT32_C(   802029033),  INT32_C(  2084270417),  INT32_C(  1453787353), -INT32_C(  1749329193) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a);
    simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b);
    simde__m256i r = simde_x_mm256_deinterleaveodd_epi32(a, b);
    simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r));
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveeven_ps (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float32 a[8];
    const simde_float32 b[8];
    const simde_float32 r[8];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(  -685.40), SIMDE_FLOAT32_C(   -82.27), SIMDE_FLOAT32_C(  -652.43), SIMDE_FLOAT32_C(   401.52),
        SIMDE_FLOAT32_C(   925.39), SIMDE_FLOAT32_C(  -588.47), SIMDE_FLOAT32_C(  -408.21), SIMDE_FLOAT32_C(   201.70) },
      { SIMDE_FLOAT32_C(    -2.27), SIMDE_FLOAT32_C(  -451.03), SIMDE_FLOAT32_C(   989.34), SIMDE_FLOAT32_C(  -840.15),
        SIMDE_FLOAT32_C(   879.25), SIMDE_FLOAT32_C(   -69.37), SIMDE_FLOAT32_C(   209.43), SIMDE_FLOAT32_C(  -340.80) },
      { SIMDE_FLOAT32_C(  -685.40), SIMDE_FLOAT32_C(  -652.43), SIMDE_FLOAT32_C(    -2.27), SIMDE_FLOAT32_C(   989.34),
        SIMDE_FLOAT32_C(   925.39), SIMDE_FLOAT32_C(  -408.21), SIMDE_FLOAT32_C(   879.25), SIMDE_FLOAT32_C(   209.43) } },
    { { SIMDE_FLOAT32_C(  -774.02), SIMDE_FLOAT32_C(   172.30), SIMDE_FLOAT32_C(  -637.75), SIMDE_FLOAT32_C(   965.23),
        SIMDE_FLOAT32_C(   366.44), SIMDE_FLOAT32_C(   -40.30), SIMDE_FLOAT32_C(   124.64), SIMDE_FLOAT32_C(  -125.21) },
      { SIMDE_FLOAT32_C(   935.91), SIMDE_FLOAT32_C(   798.97), SIMDE_FLOAT32_C(  -165.02), SIMDE_FLOAT32_C(   726.84),
        SIMDE_FLOAT32_C(   834.23), SIMDE_FLOAT32_C(   584.97), SIMDE_FLOAT32_C(   734.98), SIMDE_FLOAT32_C(  -851.17) },
      { SIMDE_FLOAT32_C(  -774.02), SIMDE_FLOAT32_C(  -637.75), SIMDE_FLOAT32_C(   935.91), SIMDE_FLOAT32_C(  -165.02),
        SIMDE_FLOAT32_C(   366.44), SIMDE_FLOAT32_C(   124.64), SIMDE_FLOAT32_C(   834.23), SIMDE_FLOAT32_C(   734.98) } },
    { { SIMDE_FLOAT32_C(  -497.30), SIMDE_FLOAT32_C(  -917.45), SIMDE_FLOAT32_C(   550.35), SIMDE_FLOAT32_C(  -571.91),
        SIMDE_FLOAT32_C(  -505.92), SIMDE_FLOAT32_C(  -857.86), SIMDE_FLOAT32_C(   629.79), SIMDE_FLOAT32_C(   491.80) },
      { SIMDE_FLOAT32_C(  -308.89), SIMDE_FLOAT32_C(   619.14), SIMDE_FLOAT32_C(   651.65), SIMDE_FLOAT32_C(  -429.64),
        SIMDE_FLOAT32_C(  -450.24), SIMDE_FLOAT32_C(  -138.92), SIMDE_FLOAT32_C(   229.56), SIMDE_FLOAT32_C(  -224.25) },
      { SIMDE_FLOAT32_C(  -497.30), SIMDE_FLOAT32_C(   550.35), SIMDE_FLOAT32_C(  -308.89), SIMDE_FLOAT32_C(   651.65),
        SIMDE_FLOAT32_C(  -505.92), SIMDE_FLOAT32_C(   629.79), SIMDE_FLOAT32_C(  -450.24), SIMDE_FLOAT32_C(   229.56) } },
    { { SIMDE_FLOAT32_C(  -966.62), SIMDE_FLOAT32_C(   591.82), SIMDE_FLOAT32_C(  -259.02), SIMDE_FLOAT32_C(   399.82),
        SIMDE_FLOAT32_C(  -448.49), SIMDE_FLOAT32_C(   865.62), SIMDE_FLOAT32_C(  -725.39), SIMDE_FLOAT32_C(  -512.58) },
      { SIMDE_FLOAT32_C(   664.59), SIMDE_FLOAT32_C(   109.59), SIMDE_FLOAT32_C(  -785.74), SIMDE_FLOAT32_C(   498.82),
        SIMDE_FLOAT32_C(  -305.44), SIMDE_FLOAT32_C(   949.24), SIMDE_FLOAT32_C(   647.65), SIMDE_FLOAT32_C(   197.26) },
      { SIMDE_FLOAT32_C(  -966.62), SIMDE_FLOAT32_C(  -259.02), SIMDE_FLOAT32_C(   664.59), SIMDE_FLOAT32_C(  -785.74),
        SIMDE_FLOAT32_C(  -448.49), SIMDE_FLOAT32_C(  -725.39), SIMDE_FLOAT32_C(  -305.44), SIMDE_FLOAT32_C(   647.65) } },
    { { SIMDE_FLOAT32_C(  -968.21), SIMDE_FLOAT32_C(   198.01), SIMDE_FLOAT32_C(   625.35), SIMDE_FLOAT32_C(  -474.13),
        SIMDE_FLOAT32_C(   340.14), SIMDE_FLOAT32_C(   255.15), SIMDE_FLOAT32_C(  -982.32), SIMDE_FLOAT32_C(  -968.75) },
      { SIMDE_FLOAT32_C(  -125.72), SIMDE_FLOAT32_C(   669.33), SIMDE_FLOAT32_C(  -398.39), SIMDE_FLOAT32_C(   424.05),
        SIMDE_FLOAT32_C(  -469.59), SIMDE_FLOAT32_C(   831.17), SIMDE_FLOAT32_C(  -800.21), SIMDE_FLOAT32_C(  -436.21) },
      { SIMDE_FLOAT32_C(  -968.21), SIMDE_FLOAT32_C(   625.35), SIMDE_FLOAT32_C(  -125.72), SIMDE_FLOAT32_C(  -398.39),
        SIMDE_FLOAT32_C(   340.14), SIMDE_FLOAT32_C(  -982.32), SIMDE_FLOAT32_C(  -469.59), SIMDE_FLOAT32_C(  -800.21) } },
    { { SIMDE_FLOAT32_C(   422.99), SIMDE_FLOAT32_C(   -59.23), SIMDE_FLOAT32_C(   963.60), SIMDE_FLOAT32_C(   974.50),
        SIMDE_FLOAT32_C(  -193.61), SIMDE_FLOAT32_C(  -761.79), SIMDE_FLOAT32_C(  -538.08), SIMDE_FLOAT32_C(  -529.01) },
      { SIMDE_FLOAT32_C(   347.80), SIMDE_FLOAT32_C(  -323.81), SIMDE_FLOAT32_C(   969.81), SIMDE_FLOAT32_C(  -957.64),
        SIMDE_FLOAT32_C(  -374.57), SIMDE_FLOAT32_C(   617.46), SIMDE_FLOAT32_C(   239.62), SIMDE_FLOAT32_C(  -342.78) },
      { SIMDE_FLOAT32_C(   422.99), SIMDE_FLOAT32_C(   963.60), SIMDE_FLOAT32_C(   347.80), SIMDE_FLOAT32_C(   969.81),
        SIMDE_FLOAT32_C(  -193.61), SIMDE_FLOAT32_C(  -538.08), SIMDE_FLOAT32_C(  -374.57), SIMDE_FLOAT32_C(   239.62) } },
    { { SIMDE_FLOAT32_C(  -184.53), SIMDE_FLOAT32_C(  -135.02), SIMDE_FLOAT32_C(   183.10), SIMDE_FLOAT32_C(  -844.38),
        SIMDE_FLOAT32_C(  -879.88), SIMDE_FLOAT32_C(   200.77), SIMDE_FLOAT32_C(  -813.13), SIMDE_FLOAT32_C(    -5.59) },
      { SIMDE_FLOAT32_C(  -129.90), SIMDE_FLOAT32_C(  -211.52), SIMDE_FLOAT32_C(  -581.55), SIMDE_FLOAT32_C(   400.51),
        SIMDE_FLOAT32_C(  -380.35), SIMDE_FLOAT32_C(  -381.75), SIMDE_FLOAT32_C(   964.30), SIMDE_FLOAT32_C(  -957.36) },
      { SIMDE_FLOAT32_C(  -184.53), SIMDE_FLOAT32_C(   183.10), SIMDE_FLOAT32_C(  -129.90), SIMDE_FLOAT32_C(  -581.55),
        SIMDE_FLOAT32_C(  -879.88), SIMDE_FLOAT32_C(  -813.13), SIMDE_FLOAT32_C(  -380.35), SIMDE_FLOAT32_C(   964.30) } },
    { { SIMDE_FLOAT32_C(   559.02), SIMDE_FLOAT32_C(   927.90), SIMDE_FLOAT32_C(  -982.86), SIMDE_FLOAT32_C(  -634.59),
        SIMDE_FLOAT32_C(  -833.88), SIMDE_FLOAT32_C(  -520.93), SIMDE_FLOAT32_C(  -163.60), SIMDE_FLOAT32_C(   513.91) },
      { SIMDE_FLOAT32_C(   155.25), SIMDE_FLOAT32_C(  -193.79), SIMDE_FLOAT32_C(   556.27), SIMDE_FLOAT32_C(   780.68),
        SIMDE_FLOAT32_C(  -576.33), SIMDE_FLOAT32_C(  -204.10), SIMDE_FLOAT32_C(  -562.10), SIMDE_FLOAT32_C(   239.14) },
      { SIMDE_FLOAT32_C(   559.02), SIMDE_FLOAT32_C(  -982.86), SIMDE_FLOAT32_C(   155.25), SIMDE_FLOAT32_C(   556.27),
        SIMDE_FLOAT32_C(  -833.88), SIMDE_FLOAT32_C(  -163.60), SIMDE_FLOAT32_C(  -576.33), SIMDE_FLOAT32_C(  -562.10) } },
    { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a);
    simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b);
    simde__m256 r = simde_x_mm256_deinterleaveeven_ps(a, b);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveodd_ps (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float32 a[8];
    const simde_float32 b[8];
    const simde_float32 r[8];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(   646.61), SIMDE_FLOAT32_C(   356.61), SIMDE_FLOAT32_C(    18.50), SIMDE_FLOAT32_C(   854.67),
        SIMDE_FLOAT32_C(   135.33), SIMDE_FLOAT32_C(   893.27), SIMDE_FLOAT32_C(  -572.24), SIMDE_FLOAT32_C(  -566.66) },
      { SIMDE_FLOAT32_C(    44.03), SIMDE_FLOAT32_C(   606.85), SIMDE_FLOAT32_C(   868.96), SIMDE_FLOAT32_C(  -267.27),
        SIMDE_FLOAT32_C(   902.13), SIMDE_FLOAT32_C(  -607.89), SIMDE_FLOAT32_C(  -775.37), SIMDE_FLOAT32_C(   704.62) },
      { SIMDE_FLOAT32_C(   356.61), SIMDE_FLOAT32_C(   854.67), SIMDE_FLOAT32_C(   606.85), SIMDE_FLOAT32_C(  -267.27),
        SIMDE_FLOAT32_C(   893.27), SIMDE_FLOAT32_C(  -566.66), SIMDE_FLOAT32_C(  -607.89), SIMDE_FLOAT32_C(   704.62) } },
    { { SIMDE_FLOAT32_C(  -749.05), SIMDE_FLOAT32_C(   899.25), SIMDE_FLOAT32_C(  -160.48), SIMDE_FLOAT32_C(  -536.81),
        SIMDE_FLOAT32_C(  -788.17), SIMDE_FLOAT32_C(   841.08), SIMDE_FLOAT32_C(  -487.56), SIMDE_FLOAT32_C(     7.91) },
      { SIMDE_FLOAT32_C(  -727.94), SIMDE_FLOAT32_C(  -117.44), SIMDE_FLOAT32_C(   410.25), SIMDE_FLOAT32_C(   324.46),
        SIMDE_FLOAT32_C(   559.11), SIMDE_FLOAT32_C(   628.63), SIMDE_FLOAT32_C(  -801.85), SIMDE_FLOAT32_C(   205.72) },
      { SIMDE_FLOAT32_C(   899.25), SIMDE_FLOAT32_C(  -536.81), SIMDE_FLOAT32_C(  -117.44), SIMDE_FLOAT32_C(   324.46),
        SIMDE_FLOAT32_C(   841.08), SIMDE_FLOAT32_C(     7.91), SIMDE_FLOAT32_C(   628.63), SIMDE_FLOAT32_C(   205.72) } },
    { { SIMDE_FLOAT32_C(   -14.76), SIMDE_FLOAT32_C(   216.65), SIMDE_FLOAT32_C(    60.39), SIMDE_FLOAT32_C(  -879.43),
        SIMDE_FLOAT32_C(   109.91), SIMDE_FLOAT32_C(   488.15), SIMDE_FLOAT32_C(  -446.09), SIMDE_FLOAT32_C(  -846.06) },
      { SIMDE_FLOAT32_C(    95.01), SIMDE_FLOAT32_C(  -577.14), SIMDE_FLOAT32_C(  -113.32), SIMDE_FLOAT32_C(    -2.87),
        SIMDE_FLOAT32_C(  -185.03), SIMDE_FLOAT32_C(   111.30), SIMDE_FLOAT32_C(  -298.24), SIMDE_FLOAT32_C(    65.92) },
      { SIMDE_FLOAT32_C(   216.65), SIMDE_FLOAT32_C(  -879.43), SIMDE_FLOAT32_C(  -577.14), SIMDE_FLOAT32_C(    -2.87),
        SIMDE_FLOAT32_C(   488.15), SIMDE_FLOAT32_C(  -846.06), SIMDE_FLOAT32_C(   111.30), SIMDE_FLOAT32_C(    65.92) } },
    { { SIMDE_FLOAT32_C(    10.55), SIMDE_FLOAT32_C(   541.28), SIMDE_FLOAT32_C(   529.11), SIMDE_FLOAT32_C(   222.38),
        SIMDE_FLOAT32_C(   382.36), SIMDE_FLOAT32_C(  -958.44), SIMDE_FLOAT32_C(  -769.71), SIMDE_FLOAT32_C(   654.42) },
      { SIMDE_FLOAT32_C(   -75.88), SIMDE_FLOAT32_C(   640.54), SIMDE_FLOAT32_C(   -21.12), SIMDE_FLOAT32_C(  -516.77),
        SIMDE_FLOAT32_C(   269.17), SIMDE_FLOAT32_C(   177.02), SIMDE_FLOAT32_C(   688.96), SIMDE_FLOAT32_C(  -745.60) },
      { SIMDE_FLOAT32_C(   541.28), SIMDE_FLOAT32_C(   222.38), SIMDE_FLOAT32_C(   640.54), SIMDE_FLOAT32_C(  -516.77),
        SIMDE_FLOAT32_C(  -958.44), SIMDE_FLOAT32_C(   654.42), SIMDE_FLOAT32_C(   177.02), SIMDE_FLOAT32_C(  -745.60) } },
    { { SIMDE_FLOAT32_C(  -606.33), SIMDE_FLOAT32_C(  -250.65), SIMDE_FLOAT32_C(  -625.03), SIMDE_FLOAT32_C(   503.58),
        SIMDE_FLOAT32_C(  -762.50), SIMDE_FLOAT32_C(   -71.12), SIMDE_FLOAT32_C(   657.53), SIMDE_FLOAT32_C(   332.51) },
      { SIMDE_FLOAT32_C(   351.74), SIMDE_FLOAT32_C(  -455.80), SIMDE_FLOAT32_C(  -670.36), SIMDE_FLOAT32_C(  -833.29),
        SIMDE_FLOAT32_C(   655.50), SIMDE_FLOAT32_C(    31.40), SIMDE_FLOAT32_C(   232.63), SIMDE_FLOAT32_C(  -333.95) },
      { SIMDE_FLOAT32_C(  -250.65), SIMDE_FLOAT32_C(   503.58), SIMDE_FLOAT32_C(  -455.80), SIMDE_FLOAT32_C(  -833.29),
        SIMDE_FLOAT32_C(   -71.12), SIMDE_FLOAT32_C(   332.51), SIMDE_FLOAT32_C(    31.40), SIMDE_FLOAT32_C(  -333.95) } },
    { { SIMDE_FLOAT32_C(  -427.32), SIMDE_FLOAT32_C(  -238.26), SIMDE_FLOAT32_C(   888.43), SIMDE_FLOAT32_C(   955.03),
        SIMDE_FLOAT32_C(  -196.70), SIMDE_FLOAT32_C(  -881.28), SIMDE_FLOAT32_C(   609.45), SIMDE_FLOAT32_C(   727.42) },
      { SIMDE_FLOAT32_C(   759.26), SIMDE_FLOAT32_C(  -411.67), SIMDE_FLOAT32_C(  -789.35), SIMDE_FLOAT32_C(    28.42),
        SIMDE_FLOAT32_C(   765.35), SIMDE_FLOAT32_C(   899.61), SIMDE_FLOAT32_C(   282.83), SIMDE_FLOAT32_C(  -840.98) },
      { SIMDE_FLOAT32_C(  -238.26), SIMDE_FLOAT32_C(   955.03), SIMDE_FLOAT32_C(  -411.67), SIMDE_FLOAT32_C(    28.42),
        SIMDE_FLOAT32_C(  -881.28), SIMDE_FLOAT32_C(   727.42), SIMDE_FLOAT32_C(   899.61), SIMDE_FLOAT32_C(  -840.98) } },
    { { SIMDE_FLOAT32_C(  -351.05), SIMDE_FLOAT32_C(   657.80), SIMDE_FLOAT32_C(   662.60), SIMDE_FLOAT32_C(  -113.55),
        SIMDE_FLOAT32_C(  -413.32), SIMDE_FLOAT32_C(   320.13), SIMDE_FLOAT32_C(  -781.04), SIMDE_FLOAT32_C(   938.42) },
      { SIMDE_FLOAT32_C(   864.33), SIMDE_FLOAT32_C(  -451.40), SIMDE_FLOAT32_C(  -894.87), SIMDE_FLOAT32_C(   519.84),
        SIMDE_FLOAT32_C(   580.00), SIMDE_FLOAT32_C(   337.76), SIMDE_FLOAT32_C(  -814.11), SIMDE_FLOAT32_C(  -847.32) },
      { SIMDE_FLOAT32_C(   657.80), SIMDE_FLOAT32_C(  -113.55), SIMDE_FLOAT32_C(  -451.40), SIMDE_FLOAT32_C(   519.84),
        SIMDE_FLOAT32_C(   320.13), SIMDE_FLOAT32_C(   938.42), SIMDE_FLOAT32_C(   337.76), SIMDE_FLOAT32_C(  -847.32) } },
    { { SIMDE_FLOAT32_C(  -900.50), SIMDE_FLOAT32_C(  -925.68), SIMDE_FLOAT32_C(  -892.29), SIMDE_FLOAT32_C(   -97.20),
        SIMDE_FLOAT32_C(  -806.96), SIMDE_FLOAT32_C(   717.16), SIMDE_FLOAT32_C(  -369.78), SIMDE_FLOAT32_C(   952.30) },
      { SIMDE_FLOAT32_C(  -694.51), SIMDE_FLOAT32_C(  -159.13), SIMDE_FLOAT32_C(   -19.28), SIMDE_FLOAT32_C(  -929.16),
        SIMDE_FLOAT32_C(  -259.52), SIMDE_FLOAT32_C(  -736.45), SIMDE_FLOAT32_C(  -770.14), SIMDE_FLOAT32_C(   389.43) },
      { SIMDE_FLOAT32_C(  -925.68), SIMDE_FLOAT32_C(   -97.20), SIMDE_FLOAT32_C(  -159.13), SIMDE_FLOAT32_C(  -929.16),
        SIMDE_FLOAT32_C(   717.16), SIMDE_FLOAT32_C(   952.30), SIMDE_FLOAT32_C(  -736.45), SIMDE_FLOAT32_C(   389.43) } },
    { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(2147483649.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a);
    simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b);
    simde__m256 r = simde_x_mm256_deinterleaveodd_ps(a, b);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveeven_pd (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float64 a[4];
    const simde_float64 b[4];
    const simde_float64 r[4];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(  -917.73), SIMDE_FLOAT64_C(  -652.52), SIMDE_FLOAT64_C(   938.00), SIMDE_FLOAT64_C(   628.32) },
      { SIMDE_FLOAT64_C(  -379.39), SIMDE_FLOAT64_C(  -366.67), SIMDE_FLOAT64_C(    60.53), SIMDE_FLOAT64_C(   -36.15) },
      { SIMDE_FLOAT64_C(  -917.73), SIMDE_FLOAT64_C(  -379.39), SIMDE_FLOAT64_C(   938.00), SIMDE_FLOAT64_C(    60.53) } },
    { { SIMDE_FLOAT64_C(   719.20), SIMDE_FLOAT64_C(   657.85), SIMDE_FLOAT64_C(   649.12), SIMDE_FLOAT64_C(  -199.69) },
      { SIMDE_FLOAT64_C(  -297.79), SIMDE_FLOAT64_C(  -137.86), SIMDE_FLOAT64_C(  -554.27), SIMDE_FLOAT64_C(  -359.22) },
      { SIMDE_FLOAT64_C(   719.20), SIMDE_FLOAT64_C(  -297.79), SIMDE_FLOAT64_C(   649.12), SIMDE_FLOAT64_C(  -554.27) } },
    { { SIMDE_FLOAT64_C(  -704.27), SIMDE_FLOAT64_C(  -704.53), SIMDE_FLOAT64_C(   753.64), SIMDE_FLOAT64_C(   -12.36) },
      { SIMDE_FLOAT64_C(  -714.88), SIMDE_FLOAT64_C(   976.56), SIMDE_FLOAT64_C(  -603.38), SIMDE_FLOAT64_C(   193.76) },
      { SIMDE_FLOAT64_C(  -704.27), SIMDE_FLOAT64_C(  -714.88), SIMDE_FLOAT64_C(   753.64), SIMDE_FLOAT64_C(  -603.38) } },
    { { SIMDE_FLOAT64_C(  -307.02), SIMDE_FLOAT64_C(   740.87), SIMDE_FLOAT64_C(  -356.03), SIMDE_FLOAT64_C(   819.67) },
      { SIMDE_FLOAT64_C(  -962.98), SIMDE_FLOAT64_C(   552.09), SIMDE_FLOAT64_C(  -784.02), SIMDE_FLOAT64_C(  -880.71) },
      { SIMDE_FLOAT64_C(  -307.02), SIMDE_FLOAT64_C(  -962.98), SIMDE_FLOAT64_C(  -356.03), SIMDE_FLOAT64_C(  -784.02) } },
    { { SIMDE_FLOAT64_C(   899.57), SIMDE_FLOAT64_C(  -846.01), SIMDE_FLOAT64_C(   747.61), SIMDE_FLOAT64_C(  -479.82) },
      { SIMDE_FLOAT64_C(  -212.68), SIMDE_FLOAT64_C(  -191.86), SIMDE_FLOAT64_C(   484.03), SIMDE_FLOAT64_C(  -493.48) },
      { SIMDE_FLOAT64_C(   899.57), SIMDE_FLOAT64_C(  -212.68), SIMDE_FLOAT64_C(   747.61), SIMDE_FLOAT64_C(   484.03) } },
    { { SIMDE_FLOAT64_C(  -534.01), SIMDE_FLOAT64_C(   133.14), SIMDE_FLOAT64_C(   306.83), SIMDE_FLOAT64_C(   168.20) },
      { SIMDE_FLOAT64_C(   995.29), SIMDE_FLOAT64_C(   752.56), SIMDE_FLOAT64_C(   808.98), SIMDE_FLOAT64_C(  -708.99) },
      { SIMDE_FLOAT64_C(  -534.01), SIMDE_FLOAT64_C(   995.29), SIMDE_FLOAT64_C(   306.83), SIMDE_FLOAT64_C(   808.98) } },
    { { SIMDE_FLOAT64_C(  -951.97), SIMDE_FLOAT64_C(   562.62), SIMDE_FLOAT64_C(   278.66), SIMDE_FLOAT64_C(  -666.85) },
      { SIMDE_FLOAT64_C(   539.18), SIMDE_FLOAT64_C(   675.28), SIMDE_FLOAT64_C(   526.91), SIMDE_FLOAT64_C(  -767.85) },
      { SIMDE_FLOAT64_C(  -951.97), SIMDE_FLOAT64_C(   539.18), SIMDE_FLOAT64_C(   278.66), SIMDE_FLOAT64_C(   526.91) } },
    { { SIMDE_FLOAT64_C(   416.15), SIMDE_FLOAT64_C(  -829.13), SIMDE_FLOAT64_C(  -948.18), SIMDE_FLOAT64_C(   453.17) },
      { SIMDE_FLOAT64_C(   722.96), SIMDE_FLOAT64_C(  -732.19), SIMDE_FLOAT64_C(   572.46), SIMDE_FLOAT64_C(   622.53) },
      { SIMDE_FLOAT64_C(   416.15), SIMDE_FLOAT64_C(   722.96), SIMDE_FLOAT64_C(  -948.18), SIMDE_FLOAT64_C(   572.46) } },
    { { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0) },
      { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0) },
      { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(-2147483650.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a);
    simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b);
    simde__m256d r = simde_x_mm256_deinterleaveeven_pd(a, b);
    simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_x_mm256_deinterleaveodd_pd (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float64 a[4];
    const simde_float64 b[4];
    const simde_float64 r[4];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(  -648.87), SIMDE_FLOAT64_C(  -409.81), SIMDE_FLOAT64_C(   258.85), SIMDE_FLOAT64_C(   552.93) },
      { SIMDE_FLOAT64_C(   522.44), SIMDE_FLOAT64_C(   561.99), SIMDE_FLOAT64_C(   622.89), SIMDE_FLOAT64_C(  -995.80) },
      { SIMDE_FLOAT64_C(  -409.81), SIMDE_FLOAT64_C(   561.99), SIMDE_FLOAT64_C(   552.93), SIMDE_FLOAT64_C(  -995.80) } },
    { { SIMDE_FLOAT64_C(  -482.94), SIMDE_FLOAT64_C(  -355.20), SIMDE_FLOAT64_C(   567.28), SIMDE_FLOAT64_C(   592.90) },
      { SIMDE_FLOAT64_C(   321.96), SIMDE_FLOAT64_C(  -278.34), SIMDE_FLOAT64_C(   707.76), SIMDE_FLOAT64_C(  -993.72) },
      { SIMDE_FLOAT64_C(  -355.20), SIMDE_FLOAT64_C(  -278.34), SIMDE_FLOAT64_C(   592.90), SIMDE_FLOAT64_C(  -993.72) } },
    { { SIMDE_FLOAT64_C(    24.59), SIMDE_FLOAT64_C(   115.17), SIMDE_FLOAT64_C(  -487.45), SIMDE_FLOAT64_C(  -271.65) },
      { SIMDE_FLOAT64_C(   637.48), SIMDE_FLOAT64_C(  -438.22), SIMDE_FLOAT64_C(   252.23), SIMDE_FLOAT64_C(  -247.08) },
      { SIMDE_FLOAT64_C(   115.17), SIMDE_FLOAT64_C(  -438.22), SIMDE_FLOAT64_C(  -271.65), SIMDE_FLOAT64_C(  -247.08) } },
    { { SIMDE_FLOAT64_C(   513.68), SIMDE_FLOAT64_C(   -28.49), SIMDE_FLOAT64_C(  -561.16), SIMDE_FLOAT64_C(  -840.08) },
      { SIMDE_FLOAT64_C(   486.66), SIMDE_FLOAT64_C(   -58.51), SIMDE_FLOAT64_C(  -788.82), SIMDE_FLOAT64_C(   837.79) },
      { SIMDE_FLOAT64_C(   -28.49), SIMDE_FLOAT64_C(   -58.51), SIMDE_FLOAT64_C(  -840.08), SIMDE_FLOAT64_C(   837.79) } },
    { { SIMDE_FLOAT64_C(   531.68), SIMDE_FLOAT64_C(   470.03), SIMDE_FLOAT64_C(   390.72), SIMDE_FLOAT64_C(    54.12) },
      { SIMDE_FLOAT64_C(    32.02), SIMDE_FLOAT64_C(    13.61), SIMDE_FLOAT64_C(    58.32), SIMDE_FLOAT64_C(   549.08) },
      { SIMDE_FLOAT64_C(   470.03), SIMDE_FLOAT64_C(    13.61), SIMDE_FLOAT64_C(    54.12), SIMDE_FLOAT64_C(   549.08) } },
    { { SIMDE_FLOAT64_C(   658.41), SIMDE_FLOAT64_C(  -374.40), SIMDE_FLOAT64_C(   141.98), SIMDE_FLOAT64_C(   -19.63) },
      { SIMDE_FLOAT64_C(   347.26), SIMDE_FLOAT64_C(  -150.25), SIMDE_FLOAT64_C(   -13.34), SIMDE_FLOAT64_C(  -628.15) },
      { SIMDE_FLOAT64_C(  -374.40), SIMDE_FLOAT64_C(  -150.25), SIMDE_FLOAT64_C(   -19.63), SIMDE_FLOAT64_C(  -628.15) } },
    { { SIMDE_FLOAT64_C(   964.92), SIMDE_FLOAT64_C(   499.21), SIMDE_FLOAT64_C(   100.21), SIMDE_FLOAT64_C(   602.40) },
      { SIMDE_FLOAT64_C(  -939.01), SIMDE_FLOAT64_C(  -647.56), SIMDE_FLOAT64_C(  -644.68), SIMDE_FLOAT64_C(   574.67) },
      { SIMDE_FLOAT64_C(   499.21), SIMDE_FLOAT64_C(  -647.56), SIMDE_FLOAT64_C(   602.40), SIMDE_FLOAT64_C(   574.67) } },
    { { SIMDE_FLOAT64_C(   323.95), SIMDE_FLOAT64_C(  -205.84), SIMDE_FLOAT64_C(   734.60), SIMDE_FLOAT64_C(  -189.39) },
      { SIMDE_FLOAT64_C(   735.65), SIMDE_FLOAT64_C(   945.77), SIMDE_FLOAT64_C(  -351.60), SIMDE_FLOAT64_C(   267.33) },
      { SIMDE_FLOAT64_C(  -205.84), SIMDE_FLOAT64_C(   945.77), SIMDE_FLOAT64_C(  -189.39), SIMDE_FLOAT64_C(   267.33) } },
    { { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0) },
      { SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(-2147483650.0), SIMDE_FLOAT64_C(2147483649.0) },
      { SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(2147483649.0), SIMDE_FLOAT64_C(2147483649.0) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a);
    simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b);
    simde__m256d r = simde_x_mm256_deinterleaveodd_pd(a, b);
    simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_add_ps (SIMDE_MUNIT_TEST_ARGS) {
  struct {
    simde_float32 a[8];
    simde_float32 b[8];
    simde_float32 r[8];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(   593.61), SIMDE_FLOAT32_C(  -557.89), SIMDE_FLOAT32_C(   791.53), SIMDE_FLOAT32_C(   154.11),
        SIMDE_FLOAT32_C(  -737.14), SIMDE_FLOAT32_C(   771.63), SIMDE_FLOAT32_C(  -560.40), SIMDE_FLOAT32_C(   722.01) },
      { SIMDE_FLOAT32_C(   419.25), SIMDE_FLOAT32_C(   900.75), SIMDE_FLOAT32_C(    -9.61), SIMDE_FLOAT32_C(  -733.53),
        SIMDE_FLOAT32_C(  -182.34), SIMDE_FLOAT32_C(  -977.20), SIMDE_FLOAT32_C(    52.09), SIMDE_FLOAT32_C(  -330.07) },
      { SIMDE_FLOAT32_C(  1012.86), SIMDE_FLOAT32_C(   342.86), SIMDE_FLOAT32_C(   781.92), SIMDE_FLOAT32_C(  -579.42),
        SIMDE_FLOAT32_C(  -919.48), SIMDE_FLOAT32_C(  -205.56), SIMDE_FLOAT32_C(  -508.31), SIMDE_FLOAT32_C(   391.94) } },
    { { SIMDE_FLOAT32_C(   931.06), SIMDE_FLOAT32_C(  -653.59), SIMDE_FLOAT32_C(   236.39), SIMDE_FLOAT32_C(  -464.67),
        SIMDE_FLOAT32_C(   290.33), SIMDE_FLOAT32_C(   742.49), SIMDE_FLOAT32_C(  -756.45), SIMDE_FLOAT32_C(   594.08) },
      { SIMDE_FLOAT32_C(  -246.91), SIMDE_FLOAT32_C(   947.78), SIMDE_FLOAT32_C(  -474.92), SIMDE_FLOAT32_C(  -744.97),
        SIMDE_FLOAT32_C(   488.25), SIMDE_FLOAT32_C(   386.30), SIMDE_FLOAT32_C(   828.81), SIMDE_FLOAT32_C(    81.87) },
      { SIMDE_FLOAT32_C(   684.15), SIMDE_FLOAT32_C(   294.20), SIMDE_FLOAT32_C(  -238.53), SIMDE_FLOAT32_C( -1209.65),
        SIMDE_FLOAT32_C(   778.59), SIMDE_FLOAT32_C(  1128.78), SIMDE_FLOAT32_C(    72.36), SIMDE_FLOAT32_C(   675.95) } },
    { { SIMDE_FLOAT32_C(   828.40), SIMDE_FLOAT32_C(   620.34), SIMDE_FLOAT32_C(  -764.02), SIMDE_FLOAT32_C(  -908.74),
        SIMDE_FLOAT32_C(   391.97), SIMDE_FLOAT32_C(  -324.42), SIMDE_FLOAT32_C(   813.27), SIMDE_FLOAT32_C(  -188.78) },
      { SIMDE_FLOAT32_C(  -423.67), SIMDE_FLOAT32_C(  -196.34), SIMDE_FLOAT32_C(    77.69), SIMDE_FLOAT32_C(   393.99),
        SIMDE_FLOAT32_C(  -173.54), SIMDE_FLOAT32_C(  -870.22), SIMDE_FLOAT32_C(  -936.08), SIMDE_FLOAT32_C(  -242.47) },
      { SIMDE_FLOAT32_C(   404.74), SIMDE_FLOAT32_C(   424.00), SIMDE_FLOAT32_C(  -686.33), SIMDE_FLOAT32_C(  -514.74),
        SIMDE_FLOAT32_C(   218.44), SIMDE_FLOAT32_C( -1194.64), SIMDE_FLOAT32_C(  -122.81), SIMDE_FLOAT32_C(  -431.25) } },
    { { SIMDE_FLOAT32_C(  -523.81), SIMDE_FLOAT32_C(   300.32), SIMDE_FLOAT32_C(   292.85), SIMDE_FLOAT32_C(   766.52),
        SIMDE_FLOAT32_C(    42.80), SIMDE_FLOAT32_C(   536.40), SIMDE_FLOAT32_C(   360.60), SIMDE_FLOAT32_C(   795.89) },
      { SIMDE_FLOAT32_C(   484.18), SIMDE_FLOAT32_C(   885.68), SIMDE_FLOAT32_C(  -949.08), SIMDE_FLOAT32_C(   -27.56),
        SIMDE_FLOAT32_C(   271.98), SIMDE_FLOAT32_C(   879.73), SIMDE_FLOAT32_C(  -945.69), SIMDE_FLOAT32_C(   100.38) },
      { SIMDE_FLOAT32_C(   -39.63), SIMDE_FLOAT32_C(  1186.00), SIMDE_FLOAT32_C(  -656.23), SIMDE_FLOAT32_C(   738.96),
        SIMDE_FLOAT32_C(   314.78), SIMDE_FLOAT32_C(  1416.13), SIMDE_FLOAT32_C(  -585.09), SIMDE_FLOAT32_C(   896.27) } },
    { { SIMDE_FLOAT32_C(   500.07), SIMDE_FLOAT32_C(  -709.71), SIMDE_FLOAT32_C(   191.65), SIMDE_FLOAT32_C(  -107.96),
        SIMDE_FLOAT32_C(   -34.13), SIMDE_FLOAT32_C(     4.92), SIMDE_FLOAT32_C(   703.26), SIMDE_FLOAT32_C(   542.20) },
      { SIMDE_FLOAT32_C(   808.58), SIMDE_FLOAT32_C(  -219.05), SIMDE_FLOAT32_C(   -63.81), SIMDE_FLOAT32_C(  -364.96),
        SIMDE_FLOAT32_C(   -89.27), SIMDE_FLOAT32_C(     0.12), SIMDE_FLOAT32_C(   392.56), SIMDE_FLOAT32_C(   386.92) },
      { SIMDE_FLOAT32_C(  1308.64), SIMDE_FLOAT32_C(  -928.76), SIMDE_FLOAT32_C(   127.84), SIMDE_FLOAT32_C(  -472.92),
        SIMDE_FLOAT32_C(  -123.40), SIMDE_FLOAT32_C(     5.03), SIMDE_FLOAT32_C(  1095.83), SIMDE_FLOAT32_C(   929.12) } },
    { { SIMDE_FLOAT32_C(  -699.57), SIMDE_FLOAT32_C(  -314.58), SIMDE_FLOAT32_C(   153.44), SIMDE_FLOAT32_C(   343.24),
        SIMDE_FLOAT32_C(  -778.18), SIMDE_FLOAT32_C(  -485.96), SIMDE_FLOAT32_C(   139.13), SIMDE_FLOAT32_C(   706.00) },
      { SIMDE_FLOAT32_C(  -600.28), SIMDE_FLOAT32_C(   190.04), SIMDE_FLOAT32_C(  -321.56), SIMDE_FLOAT32_C(   671.70),
        SIMDE_FLOAT32_C(    69.77), SIMDE_FLOAT32_C(  -267.25), SIMDE_FLOAT32_C(  -227.92), SIMDE_FLOAT32_C(  -430.16) },
      { SIMDE_FLOAT32_C( -1299.84), SIMDE_FLOAT32_C(  -124.54), SIMDE_FLOAT32_C(  -168.11), SIMDE_FLOAT32_C(  1014.94),
        SIMDE_FLOAT32_C(  -708.41), SIMDE_FLOAT32_C(  -753.20), SIMDE_FLOAT32_C(   -88.79), SIMDE_FLOAT32_C(   275.85) } },
    { { SIMDE_FLOAT32_C(    23.04), SIMDE_FLOAT32_C(   963.73), SIMDE_FLOAT32_C(   461.88), SIMDE_FLOAT32_C(   988.91),
        SIMDE_FLOAT32_C(   -31.35), SIMDE_FLOAT32_C(   165.15), SIMDE_FLOAT32_C(   531.11), SIMDE_FLOAT32_C(  -222.78) },
      { SIMDE_FLOAT32_C(   946.10), SIMDE_FLOAT32_C(  -532.70), SIMDE_FLOAT32_C(   412.26), SIMDE_FLOAT32_C(  -143.17),
        SIMDE_FLOAT32_C(   467.42), SIMDE_FLOAT32_C(  -195.17), SIMDE_FLOAT32_C(  -756.25), SIMDE_FLOAT32_C(   767.85) },
      { SIMDE_FLOAT32_C(   969.14), SIMDE_FLOAT32_C(   431.03), SIMDE_FLOAT32_C(   874.15), SIMDE_FLOAT32_C(   845.74),
        SIMDE_FLOAT32_C(   436.06), SIMDE_FLOAT32_C(   -30.03), SIMDE_FLOAT32_C(  -225.14), SIMDE_FLOAT32_C(   545.07) } },
    { { SIMDE_FLOAT32_C(   490.24), SIMDE_FLOAT32_C(   397.20), SIMDE_FLOAT32_C(   111.09), SIMDE_FLOAT32_C(   712.06),
        SIMDE_FLOAT32_C(   911.24), SIMDE_FLOAT32_C(  -749.78), SIMDE_FLOAT32_C(   418.07), SIMDE_FLOAT32_C(  -689.03) },
      { SIMDE_FLOAT32_C(   440.26), SIMDE_FLOAT32_C(  -903.49), SIMDE_FLOAT32_C(   982.67), SIMDE_FLOAT32_C(  -489.97),
        SIMDE_FLOAT32_C(  -170.74), SIMDE_FLOAT32_C(  -245.25), SIMDE_FLOAT32_C(    79.87), SIMDE_FLOAT32_C(   852.30) },
      { SIMDE_FLOAT32_C(   930.50), SIMDE_FLOAT32_C(  -506.29), SIMDE_FLOAT32_C(  1093.75), SIMDE_FLOAT32_C(   222.09),
        SIMDE_FLOAT32_C(   740.50), SIMDE_FLOAT32_C(  -995.03), SIMDE_FLOAT32_C(   497.94), SIMDE_FLOAT32_C(   163.27) } },
    { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0),
        SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0) },
      { SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0),
        SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(2147483649.0), SIMDE_FLOAT32_C(-2147483650.0) },
      { SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00),
        SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a);
    simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b);
    simde__m256 r = simde_mm256_add_ps(a, b);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_add_pd (SIMDE_MUNIT_TEST_ARGS) {
  struct {
    simde_float64 a[4];
    simde_float64 b[4];
    simde_float64 r[4];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(   664.30), SIMDE_FLOAT64_C(   577.46), SIMDE_FLOAT64_C(   740.89), SIMDE_FLOAT64_C(  -918.31) },
      { SIMDE_FLOAT64_C(  -427.81), SIMDE_FLOAT64_C(   139.30), SIMDE_FLOAT64_C(   658.39), SIMDE_FLOAT64_C(   247.77) },
      { SIMDE_FLOAT64_C(   236.49), SIMDE_FLOAT64_C(   716.76), SIMDE_FLOAT64_C(  1399.29), SIMDE_FLOAT64_C(  -670.54) } },
    { { SIMDE_FLOAT64_C(  -470.94), SIMDE_FLOAT64_C(  -355.75), SIMDE_FLOAT64_C(   525.85), SIMDE_FLOAT64_C(   336.05) },
      { SIMDE_FLOAT64_C(  -706.73), SIMDE_FLOAT64_C(   -71.41), SIMDE_FLOAT64_C(   586.58), SIMDE_FLOAT64_C(  -587.59) },
      { SIMDE_FLOAT64_C( -1177.66), SIMDE_FLOAT64_C(  -427.15), SIMDE_FLOAT64_C(  1112.44), SIMDE_FLOAT64_C(  -251.54) } },
    { { SIMDE_FLOAT64_C(   109.20), SIMDE_FLOAT64_C(  -848.44), SIMDE_FLOAT64_C(  -937.49), SIMDE_FLOAT64_C(  -391.24) },
      { SIMDE_FLOAT64_C(   350.57), SIMDE_FLOAT64_C(  -211.87), SIMDE_FLOAT64_C(   614.09), SIMDE_FLOAT64_C(   386.98) },
      { SIMDE_FLOAT64_C(   459.78), SIMDE_FLOAT64_C( -1060.32), SIMDE_FLOAT64_C(  -323.40), SIMDE_FLOAT64_C(    -4.26) } },
    { { SIMDE_FLOAT64_C(  -377.97), SIMDE_FLOAT64_C(   804.63), SIMDE_FLOAT64_C(  -715.40), SIMDE_FLOAT64_C(  -782.75) },
      { SIMDE_FLOAT64_C(  -311.74), SIMDE_FLOAT64_C(   976.76), SIMDE_FLOAT64_C(   342.71), SIMDE_FLOAT64_C(  -647.44) },
      { SIMDE_FLOAT64_C(  -689.71), SIMDE_FLOAT64_C(  1781.39), SIMDE_FLOAT64_C(  -372.68), SIMDE_FLOAT64_C( -1430.19) } },
    { { SIMDE_FLOAT64_C(   554.22), SIMDE_FLOAT64_C(    83.61), SIMDE_FLOAT64_C(  -565.75), SIMDE_FLOAT64_C(  -873.59) },
      { SIMDE_FLOAT64_C(  -777.09), SIMDE_FLOAT64_C(  -907.36), SIMDE_FLOAT64_C(   374.18), SIMDE_FLOAT64_C(  -248.03) },
      { SIMDE_FLOAT64_C(  -222.87), SIMDE_FLOAT64_C(  -823.75), SIMDE_FLOAT64_C(  -191.57), SIMDE_FLOAT64_C( -1121.62) } },
    { { SIMDE_FLOAT64_C(  -263.10), SIMDE_FLOAT64_C(   -99.97), SIMDE_FLOAT64_C(  -911.97), SIMDE_FLOAT64_C(    30.17) },
      { SIMDE_FLOAT64_C(   828.63), SIMDE_FLOAT64_C(   674.61), SIMDE_FLOAT64_C(   442.58), SIMDE_FLOAT64_C(   -62.17) },
      { SIMDE_FLOAT64_C(   565.52), SIMDE_FLOAT64_C(   574.64), SIMDE_FLOAT64_C(  -469.39), SIMDE_FLOAT64_C(   -32.00) } },
    { { SIMDE_FLOAT64_C(   826.16), SIMDE_FLOAT64_C(   505.09), SIMDE_FLOAT64_C(   546.59), SIMDE_FLOAT64_C(   176.74) },
      { SIMDE_FLOAT64_C(  -706.78), SIMDE_FLOAT64_C(   160.68), SIMDE_FLOAT64_C(  -436.28), SIMDE_FLOAT64_C(   -84.75) },
      { SIMDE_FLOAT64_C(   119.38), SIMDE_FLOAT64_C(   665.77), SIMDE_FLOAT64_C(   110.30), SIMDE_FLOAT64_C(    91.98) } },
    { { SIMDE_FLOAT64_C(   -34.70), SIMDE_FLOAT64_C(  -151.68), SIMDE_FLOAT64_C(   132.50), SIMDE_FLOAT64_C(   653.56) },
      { SIMDE_FLOAT64_C(  -174.92), SIMDE_FLOAT64_C(  -524.79), SIMDE_FLOAT64_C(  -993.87), SIMDE_FLOAT64_C(  -620.70) },
      { SIMDE_FLOAT64_C(  -209.61), SIMDE_FLOAT64_C(  -676.47), SIMDE_FLOAT64_C(  -861.38), SIMDE_FLOAT64_C(    32.86) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a);
    simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b);
    simde__m256d r = simde_mm256_add_pd(a, b);
    simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_addsub_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -665.97), SIMDE_FLOAT32_C( -119.17),
                         SIMDE_FLOAT32_C(   98.44), SIMDE_FLOAT32_C( -870.79),
                         SIMDE_FLOAT32_C(  715.06), SIMDE_FLOAT32_C(  168.23),
                         SIMDE_FLOAT32_C(  291.85), SIMDE_FLOAT32_C(  803.77)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  437.26), SIMDE_FLOAT32_C(  621.28),
                         SIMDE_FLOAT32_C(  727.27), SIMDE_FLOAT32_C( -902.73),
                         SIMDE_FLOAT32_C( -279.74), SIMDE_FLOAT32_C(  960.47),
                         SIMDE_FLOAT32_C( -437.81), SIMDE_FLOAT32_C(  516.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -228.71), SIMDE_FLOAT32_C( -740.45),
                         SIMDE_FLOAT32_C(  825.71), SIMDE_FLOAT32_C(   31.94),
                         SIMDE_FLOAT32_C(  435.32), SIMDE_FLOAT32_C( -792.24),
                         SIMDE_FLOAT32_C( -145.96), SIMDE_FLOAT32_C(  287.46)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -441.72), SIMDE_FLOAT32_C(  881.45),
                         SIMDE_FLOAT32_C(  512.79), SIMDE_FLOAT32_C( -201.00),
                         SIMDE_FLOAT32_C(  709.10), SIMDE_FLOAT32_C(  130.51),
                         SIMDE_FLOAT32_C( -836.20), SIMDE_FLOAT32_C(  276.45)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -410.67), SIMDE_FLOAT32_C(  408.11),
                         SIMDE_FLOAT32_C( -371.59), SIMDE_FLOAT32_C( -135.76),
                         SIMDE_FLOAT32_C( -896.75), SIMDE_FLOAT32_C( -185.21),
                         SIMDE_FLOAT32_C( -154.35), SIMDE_FLOAT32_C( -995.14)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -852.39), SIMDE_FLOAT32_C(  473.34),
                         SIMDE_FLOAT32_C(  141.20), SIMDE_FLOAT32_C(  -65.24),
                         SIMDE_FLOAT32_C( -187.65), SIMDE_FLOAT32_C(  315.72),
                         SIMDE_FLOAT32_C( -990.55), SIMDE_FLOAT32_C( 1271.59)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  792.33), SIMDE_FLOAT32_C( -677.48),
                         SIMDE_FLOAT32_C(  -78.97), SIMDE_FLOAT32_C(  595.95),
                         SIMDE_FLOAT32_C(  441.86), SIMDE_FLOAT32_C(  221.91),
                         SIMDE_FLOAT32_C(  688.66), SIMDE_FLOAT32_C( -937.21)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  615.36), SIMDE_FLOAT32_C( -858.28),
                         SIMDE_FLOAT32_C(  102.87), SIMDE_FLOAT32_C( -380.97),
                         SIMDE_FLOAT32_C( -155.81), SIMDE_FLOAT32_C( -426.12),
                         SIMDE_FLOAT32_C( -862.23), SIMDE_FLOAT32_C( -891.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( 1407.69), SIMDE_FLOAT32_C(  180.80),
                         SIMDE_FLOAT32_C(   23.90), SIMDE_FLOAT32_C(  976.92),
                         SIMDE_FLOAT32_C(  286.05), SIMDE_FLOAT32_C(  648.03),
                         SIMDE_FLOAT32_C( -173.57), SIMDE_FLOAT32_C(  -45.90)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -493.40), SIMDE_FLOAT32_C( -257.06),
                         SIMDE_FLOAT32_C( -968.46), SIMDE_FLOAT32_C(  634.36),
                         SIMDE_FLOAT32_C( -600.69), SIMDE_FLOAT32_C( -769.31),
                         SIMDE_FLOAT32_C(  230.22), SIMDE_FLOAT32_C( -863.68)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.13), SIMDE_FLOAT32_C(  166.99),
                         SIMDE_FLOAT32_C(  125.37), SIMDE_FLOAT32_C( -846.30),
                         SIMDE_FLOAT32_C(  414.00), SIMDE_FLOAT32_C( -144.57),
                         SIMDE_FLOAT32_C(  -43.08), SIMDE_FLOAT32_C(  287.75)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-1171.53), SIMDE_FLOAT32_C( -424.05),
                         SIMDE_FLOAT32_C( -843.09), SIMDE_FLOAT32_C( 1480.66),
                         SIMDE_FLOAT32_C( -186.69), SIMDE_FLOAT32_C( -624.74),
                         SIMDE_FLOAT32_C(  187.14), SIMDE_FLOAT32_C(-1151.43)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -449.73), SIMDE_FLOAT32_C( -350.27),
                         SIMDE_FLOAT32_C( -591.20), SIMDE_FLOAT32_C(  571.44),
                         SIMDE_FLOAT32_C(  787.95), SIMDE_FLOAT32_C(  514.78),
                         SIMDE_FLOAT32_C( -355.32), SIMDE_FLOAT32_C(  545.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  341.77), SIMDE_FLOAT32_C( -839.01),
                         SIMDE_FLOAT32_C(   -9.88), SIMDE_FLOAT32_C(  193.01),
                         SIMDE_FLOAT32_C(  871.45), SIMDE_FLOAT32_C( -840.06),
                         SIMDE_FLOAT32_C(  689.52), SIMDE_FLOAT32_C( -902.32)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -107.96), SIMDE_FLOAT32_C(  488.74),
                         SIMDE_FLOAT32_C( -601.08), SIMDE_FLOAT32_C(  378.43),
                         SIMDE_FLOAT32_C( 1659.40), SIMDE_FLOAT32_C( 1354.84),
                         SIMDE_FLOAT32_C(  334.20), SIMDE_FLOAT32_C( 1448.18)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -336.63), SIMDE_FLOAT32_C(  960.76),
                         SIMDE_FLOAT32_C(  657.64), SIMDE_FLOAT32_C( -548.45),
                         SIMDE_FLOAT32_C( -101.48), SIMDE_FLOAT32_C( -271.70),
                         SIMDE_FLOAT32_C(   23.46), SIMDE_FLOAT32_C(  562.46)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -564.95), SIMDE_FLOAT32_C( -418.72),
                         SIMDE_FLOAT32_C( -693.90), SIMDE_FLOAT32_C( -314.48),
                         SIMDE_FLOAT32_C( -760.76), SIMDE_FLOAT32_C(    7.61),
                         SIMDE_FLOAT32_C(  228.32), SIMDE_FLOAT32_C( -230.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -901.58), SIMDE_FLOAT32_C( 1379.48),
                         SIMDE_FLOAT32_C(  -36.26), SIMDE_FLOAT32_C( -233.97),
                         SIMDE_FLOAT32_C( -862.24), SIMDE_FLOAT32_C( -279.31),
                         SIMDE_FLOAT32_C(  251.78), SIMDE_FLOAT32_C(  792.77)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -451.91), SIMDE_FLOAT32_C( -184.14),
                         SIMDE_FLOAT32_C( -772.77), SIMDE_FLOAT32_C( -278.00),
                         SIMDE_FLOAT32_C( -640.89), SIMDE_FLOAT32_C(  -35.35),
                         SIMDE_FLOAT32_C( -518.18), SIMDE_FLOAT32_C( -851.69)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -152.24), SIMDE_FLOAT32_C(  636.58),
                         SIMDE_FLOAT32_C( -808.67), SIMDE_FLOAT32_C(  166.94),
                         SIMDE_FLOAT32_C( -359.45), SIMDE_FLOAT32_C( -888.48),
                         SIMDE_FLOAT32_C( -553.07), SIMDE_FLOAT32_C( -570.58)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -604.15), SIMDE_FLOAT32_C( -820.72),
                         SIMDE_FLOAT32_C(-1581.44), SIMDE_FLOAT32_C( -444.94),
                         SIMDE_FLOAT32_C(-1000.34), SIMDE_FLOAT32_C(  853.13),
                         SIMDE_FLOAT32_C(-1071.25), SIMDE_FLOAT32_C( -281.11)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -3.71), SIMDE_FLOAT32_C( -782.28),
                         SIMDE_FLOAT32_C(  533.36), SIMDE_FLOAT32_C(  848.42),
                         SIMDE_FLOAT32_C(  345.49), SIMDE_FLOAT32_C( -110.04),
                         SIMDE_FLOAT32_C( -550.91), SIMDE_FLOAT32_C(   -5.89)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  186.13), SIMDE_FLOAT32_C( -310.95),
                         SIMDE_FLOAT32_C(  742.56), SIMDE_FLOAT32_C( -943.39),
                         SIMDE_FLOAT32_C( -294.98), SIMDE_FLOAT32_C( -455.35),
                         SIMDE_FLOAT32_C(  262.46), SIMDE_FLOAT32_C(  299.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  182.42), SIMDE_FLOAT32_C( -471.33),
                         SIMDE_FLOAT32_C( 1275.92), SIMDE_FLOAT32_C( 1791.81),
                         SIMDE_FLOAT32_C(   50.51), SIMDE_FLOAT32_C(  345.31),
                         SIMDE_FLOAT32_C( -288.45), SIMDE_FLOAT32_C( -305.06)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_addsub_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_addsub_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  715.06), SIMDE_FLOAT64_C(  168.23),
                         SIMDE_FLOAT64_C(  291.85), SIMDE_FLOAT64_C(  803.77)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -279.74), SIMDE_FLOAT64_C(  960.47),
                         SIMDE_FLOAT64_C( -437.81), SIMDE_FLOAT64_C(  516.31)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  435.32), SIMDE_FLOAT64_C( -792.24),
                         SIMDE_FLOAT64_C( -145.96), SIMDE_FLOAT64_C(  287.46)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -665.97), SIMDE_FLOAT64_C( -119.17),
                         SIMDE_FLOAT64_C(   98.44), SIMDE_FLOAT64_C( -870.79)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  437.26), SIMDE_FLOAT64_C(  621.28),
                         SIMDE_FLOAT64_C(  727.27), SIMDE_FLOAT64_C( -902.73)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -228.71), SIMDE_FLOAT64_C( -740.45),
                         SIMDE_FLOAT64_C(  825.71), SIMDE_FLOAT64_C(   31.94)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  709.10), SIMDE_FLOAT64_C(  130.51),
                         SIMDE_FLOAT64_C( -836.20), SIMDE_FLOAT64_C(  276.45)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -896.75), SIMDE_FLOAT64_C( -185.21),
                         SIMDE_FLOAT64_C( -154.35), SIMDE_FLOAT64_C( -995.14)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -187.65), SIMDE_FLOAT64_C(  315.72),
                         SIMDE_FLOAT64_C( -990.55), SIMDE_FLOAT64_C( 1271.59)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -441.72), SIMDE_FLOAT64_C(  881.45),
                         SIMDE_FLOAT64_C(  512.79), SIMDE_FLOAT64_C( -201.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -410.67), SIMDE_FLOAT64_C(  408.11),
                         SIMDE_FLOAT64_C( -371.59), SIMDE_FLOAT64_C( -135.76)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -852.39), SIMDE_FLOAT64_C(  473.34),
                         SIMDE_FLOAT64_C(  141.20), SIMDE_FLOAT64_C(  -65.24)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  441.86), SIMDE_FLOAT64_C(  221.91),
                         SIMDE_FLOAT64_C(  688.66), SIMDE_FLOAT64_C( -937.21)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -155.81), SIMDE_FLOAT64_C( -426.12),
                         SIMDE_FLOAT64_C( -862.23), SIMDE_FLOAT64_C( -891.31)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  286.05), SIMDE_FLOAT64_C(  648.03),
                         SIMDE_FLOAT64_C( -173.57), SIMDE_FLOAT64_C(  -45.90)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  792.33), SIMDE_FLOAT64_C( -677.48),
                         SIMDE_FLOAT64_C(  -78.97), SIMDE_FLOAT64_C(  595.95)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  615.36), SIMDE_FLOAT64_C( -858.28),
                         SIMDE_FLOAT64_C(  102.87), SIMDE_FLOAT64_C( -380.97)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( 1407.69), SIMDE_FLOAT64_C(  180.80),
                         SIMDE_FLOAT64_C(   23.90), SIMDE_FLOAT64_C(  976.92)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -600.69), SIMDE_FLOAT64_C( -769.31),
                         SIMDE_FLOAT64_C(  230.22), SIMDE_FLOAT64_C( -863.68)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  414.00), SIMDE_FLOAT64_C( -144.57),
                         SIMDE_FLOAT64_C(  -43.08), SIMDE_FLOAT64_C(  287.75)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.69), SIMDE_FLOAT64_C( -624.74),
                         SIMDE_FLOAT64_C(  187.14), SIMDE_FLOAT64_C(-1151.43)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -493.40), SIMDE_FLOAT64_C( -257.06),
                         SIMDE_FLOAT64_C( -968.46), SIMDE_FLOAT64_C(  634.36)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.13), SIMDE_FLOAT64_C(  166.99),
                         SIMDE_FLOAT64_C(  125.37), SIMDE_FLOAT64_C( -846.30)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-1171.53), SIMDE_FLOAT64_C( -424.05),
                         SIMDE_FLOAT64_C( -843.09), SIMDE_FLOAT64_C( 1480.66)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_addsub_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_and_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -927.26), SIMDE_FLOAT32_C( -802.03),
                         SIMDE_FLOAT32_C( -266.41), SIMDE_FLOAT32_C(  -50.41),
                         SIMDE_FLOAT32_C( -309.19), SIMDE_FLOAT32_C( -707.19),
                         SIMDE_FLOAT32_C( -220.07), SIMDE_FLOAT32_C(  127.67)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -205.91), SIMDE_FLOAT32_C( -275.07),
                         SIMDE_FLOAT32_C(  -13.57), SIMDE_FLOAT32_C(  990.91),
                         SIMDE_FLOAT32_C( -167.84), SIMDE_FLOAT32_C(  346.85),
                         SIMDE_FLOAT32_C(  124.29), SIMDE_FLOAT32_C(  759.96)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -3.09), SIMDE_FLOAT32_C(   -2.13),
                         SIMDE_FLOAT32_C(   -8.07), SIMDE_FLOAT32_C(    3.03),
                         SIMDE_FLOAT32_C( -130.56), SIMDE_FLOAT32_C(    2.50),
                         SIMDE_FLOAT32_C(   54.02), SIMDE_FLOAT32_C(    2.96)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -39.85), SIMDE_FLOAT32_C(  325.28),
                         SIMDE_FLOAT32_C(  658.09), SIMDE_FLOAT32_C(  797.01),
                         SIMDE_FLOAT32_C( -512.38), SIMDE_FLOAT32_C( -352.93),
                         SIMDE_FLOAT32_C( -357.04), SIMDE_FLOAT32_C( -456.94)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -802.91), SIMDE_FLOAT32_C(  574.48),
                         SIMDE_FLOAT32_C( -277.42), SIMDE_FLOAT32_C(  470.68),
                         SIMDE_FLOAT32_C( -575.30), SIMDE_FLOAT32_C(  -85.17),
                         SIMDE_FLOAT32_C(  928.67), SIMDE_FLOAT32_C(  878.11)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -2.13), SIMDE_FLOAT32_C(    2.04),
                         SIMDE_FLOAT32_C(    2.01), SIMDE_FLOAT32_C(    3.05),
                         SIMDE_FLOAT32_C( -512.25), SIMDE_FLOAT32_C(  -80.17),
                         SIMDE_FLOAT32_C(    2.50), SIMDE_FLOAT32_C(    3.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  758.85), SIMDE_FLOAT32_C(  749.41),
                         SIMDE_FLOAT32_C(  129.33), SIMDE_FLOAT32_C( -389.28),
                         SIMDE_FLOAT32_C( -248.13), SIMDE_FLOAT32_C(  236.41),
                         SIMDE_FLOAT32_C(  416.63), SIMDE_FLOAT32_C( -413.79)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -615.54), SIMDE_FLOAT32_C(  496.18),
                         SIMDE_FLOAT32_C( -485.54), SIMDE_FLOAT32_C( -818.26),
                         SIMDE_FLOAT32_C( -139.14), SIMDE_FLOAT32_C(  955.99),
                         SIMDE_FLOAT32_C(  356.33), SIMDE_FLOAT32_C( -498.79)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  614.53), SIMDE_FLOAT32_C(    2.88),
                         SIMDE_FLOAT32_C(  128.27), SIMDE_FLOAT32_C(   -3.01),
                         SIMDE_FLOAT32_C( -136.13), SIMDE_FLOAT32_C(    3.69),
                         SIMDE_FLOAT32_C(  288.00), SIMDE_FLOAT32_C( -400.79)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -131.10), SIMDE_FLOAT32_C( -368.55),
                         SIMDE_FLOAT32_C(  657.24), SIMDE_FLOAT32_C(  939.62),
                         SIMDE_FLOAT32_C(  -50.98), SIMDE_FLOAT32_C(  313.56),
                         SIMDE_FLOAT32_C( -914.58), SIMDE_FLOAT32_C(  -29.59)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -195.92), SIMDE_FLOAT32_C( -475.65),
                         SIMDE_FLOAT32_C( -524.29), SIMDE_FLOAT32_C(  537.04),
                         SIMDE_FLOAT32_C( -280.43), SIMDE_FLOAT32_C(  379.54),
                         SIMDE_FLOAT32_C(  -38.50), SIMDE_FLOAT32_C( -188.05)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -131.04), SIMDE_FLOAT32_C( -336.52),
                         SIMDE_FLOAT32_C(  512.03), SIMDE_FLOAT32_C(  521.04),
                         SIMDE_FLOAT32_C(  -34.03), SIMDE_FLOAT32_C(  313.54),
                         SIMDE_FLOAT32_C(   -2.00), SIMDE_FLOAT32_C(  -10.75)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  159.88), SIMDE_FLOAT32_C( -559.41),
                         SIMDE_FLOAT32_C(  -99.24), SIMDE_FLOAT32_C( -420.50),
                         SIMDE_FLOAT32_C( -953.72), SIMDE_FLOAT32_C(  849.74),
                         SIMDE_FLOAT32_C(   80.27), SIMDE_FLOAT32_C(  -41.97)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  457.08), SIMDE_FLOAT32_C( -505.60),
                         SIMDE_FLOAT32_C( -390.51), SIMDE_FLOAT32_C( -524.68),
                         SIMDE_FLOAT32_C( -419.12), SIMDE_FLOAT32_C(  955.42),
                         SIMDE_FLOAT32_C(  180.08), SIMDE_FLOAT32_C(   33.67)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  132.50), SIMDE_FLOAT32_C(   -2.14),
                         SIMDE_FLOAT32_C(  -97.13), SIMDE_FLOAT32_C(   -2.03),
                         SIMDE_FLOAT32_C(   -3.00), SIMDE_FLOAT32_C(  785.16),
                         SIMDE_FLOAT32_C(   40.00), SIMDE_FLOAT32_C(   33.66)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  157.14), SIMDE_FLOAT32_C( -391.25),
                         SIMDE_FLOAT32_C( -362.22), SIMDE_FLOAT32_C( -137.98),
                         SIMDE_FLOAT32_C( -303.90), SIMDE_FLOAT32_C(  545.52),
                         SIMDE_FLOAT32_C( -383.06), SIMDE_FLOAT32_C(  973.73)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -973.80), SIMDE_FLOAT32_C(  457.53),
                         SIMDE_FLOAT32_C( -199.04), SIMDE_FLOAT32_C(  856.68),
                         SIMDE_FLOAT32_C( -830.67), SIMDE_FLOAT32_C( -121.42),
                         SIMDE_FLOAT32_C(  772.32), SIMDE_FLOAT32_C(  704.39)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    2.27), SIMDE_FLOAT32_C(  385.00),
                         SIMDE_FLOAT32_C( -133.03), SIMDE_FLOAT32_C(    2.00),
                         SIMDE_FLOAT32_C(   -2.12), SIMDE_FLOAT32_C(    2.00),
                         SIMDE_FLOAT32_C(    2.02), SIMDE_FLOAT32_C(  704.14)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  942.14), SIMDE_FLOAT32_C(  176.96),
                         SIMDE_FLOAT32_C(  525.37), SIMDE_FLOAT32_C(  924.18),
                         SIMDE_FLOAT32_C( -300.50), SIMDE_FLOAT32_C( -450.02),
                         SIMDE_FLOAT32_C(  708.11), SIMDE_FLOAT32_C(  742.76)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  647.68), SIMDE_FLOAT32_C(  632.57),
                         SIMDE_FLOAT32_C( -972.04), SIMDE_FLOAT32_C( -483.76),
                         SIMDE_FLOAT32_C(  274.26), SIMDE_FLOAT32_C(  570.79),
                         SIMDE_FLOAT32_C( -945.60), SIMDE_FLOAT32_C( -519.05)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  646.13), SIMDE_FLOAT32_C(    2.25),
                         SIMDE_FLOAT32_C(  524.04), SIMDE_FLOAT32_C(    3.52),
                         SIMDE_FLOAT32_C(  256.00), SIMDE_FLOAT32_C(    2.00),
                         SIMDE_FLOAT32_C(  640.09), SIMDE_FLOAT32_C(  518.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  361.93), SIMDE_FLOAT32_C( -223.38),
                         SIMDE_FLOAT32_C(  -51.19), SIMDE_FLOAT32_C( -300.56),
                         SIMDE_FLOAT32_C(  363.29), SIMDE_FLOAT32_C(  804.04),
                         SIMDE_FLOAT32_C(   99.54), SIMDE_FLOAT32_C( -622.58)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  446.79), SIMDE_FLOAT32_C(  300.89),
                         SIMDE_FLOAT32_C(  727.13), SIMDE_FLOAT32_C( -671.93),
                         SIMDE_FLOAT32_C(  708.76), SIMDE_FLOAT32_C( -834.24),
                         SIMDE_FLOAT32_C( -881.82), SIMDE_FLOAT32_C(  430.54)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  296.79), SIMDE_FLOAT32_C(  150.38),
                         SIMDE_FLOAT32_C(    2.07), SIMDE_FLOAT32_C(   -2.10),
                         SIMDE_FLOAT32_C(    2.77), SIMDE_FLOAT32_C(  768.03),
                         SIMDE_FLOAT32_C(    3.06), SIMDE_FLOAT32_C(    2.30)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_and_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_and_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -309.19), SIMDE_FLOAT64_C( -707.19),
                         SIMDE_FLOAT64_C( -220.07), SIMDE_FLOAT64_C(  127.67)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -167.84), SIMDE_FLOAT64_C(  346.85),
                         SIMDE_FLOAT64_C(  124.29), SIMDE_FLOAT64_C(  759.96)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -130.56), SIMDE_FLOAT64_C(    2.50),
                         SIMDE_FLOAT64_C(   54.02), SIMDE_FLOAT64_C(    2.96)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -927.26), SIMDE_FLOAT64_C( -802.03),
                         SIMDE_FLOAT64_C( -266.41), SIMDE_FLOAT64_C(  -50.41)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.91), SIMDE_FLOAT64_C( -275.07),
                         SIMDE_FLOAT64_C(  -13.57), SIMDE_FLOAT64_C(  990.91)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -3.09), SIMDE_FLOAT64_C(   -2.13),
                         SIMDE_FLOAT64_C(   -8.07), SIMDE_FLOAT64_C(    3.03)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -512.38), SIMDE_FLOAT64_C( -352.93),
                         SIMDE_FLOAT64_C( -357.04), SIMDE_FLOAT64_C( -456.94)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -575.30), SIMDE_FLOAT64_C(  -85.17),
                         SIMDE_FLOAT64_C(  928.67), SIMDE_FLOAT64_C(  878.11)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -512.25), SIMDE_FLOAT64_C(  -80.17),
                         SIMDE_FLOAT64_C(    2.50), SIMDE_FLOAT64_C(    3.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -39.85), SIMDE_FLOAT64_C(  325.28),
                         SIMDE_FLOAT64_C(  658.09), SIMDE_FLOAT64_C(  797.01)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -802.91), SIMDE_FLOAT64_C(  574.48),
                         SIMDE_FLOAT64_C( -277.42), SIMDE_FLOAT64_C(  470.68)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -2.13), SIMDE_FLOAT64_C(    2.04),
                         SIMDE_FLOAT64_C(    2.01), SIMDE_FLOAT64_C(    3.05)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -248.13), SIMDE_FLOAT64_C(  236.41),
                         SIMDE_FLOAT64_C(  416.63), SIMDE_FLOAT64_C( -413.79)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -139.14), SIMDE_FLOAT64_C(  955.99),
                         SIMDE_FLOAT64_C(  356.33), SIMDE_FLOAT64_C( -498.79)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -136.13), SIMDE_FLOAT64_C(    3.69),
                         SIMDE_FLOAT64_C(  288.00), SIMDE_FLOAT64_C( -400.79)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  758.85), SIMDE_FLOAT64_C(  749.41),
                         SIMDE_FLOAT64_C(  129.33), SIMDE_FLOAT64_C( -389.28)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -615.54), SIMDE_FLOAT64_C(  496.18),
                         SIMDE_FLOAT64_C( -485.54), SIMDE_FLOAT64_C( -818.26)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  614.53), SIMDE_FLOAT64_C(    2.88),
                         SIMDE_FLOAT64_C(  128.27), SIMDE_FLOAT64_C(   -3.01)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -50.98), SIMDE_FLOAT64_C(  313.56),
                         SIMDE_FLOAT64_C( -914.58), SIMDE_FLOAT64_C(  -29.59)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -280.43), SIMDE_FLOAT64_C(  379.54),
                         SIMDE_FLOAT64_C(  -38.50), SIMDE_FLOAT64_C( -188.05)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -34.03), SIMDE_FLOAT64_C(  313.54),
                         SIMDE_FLOAT64_C(   -2.00), SIMDE_FLOAT64_C(  -10.75)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -131.10), SIMDE_FLOAT64_C( -368.55),
                         SIMDE_FLOAT64_C(  657.24), SIMDE_FLOAT64_C(  939.62)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -195.92), SIMDE_FLOAT64_C( -475.65),
                         SIMDE_FLOAT64_C( -524.29), SIMDE_FLOAT64_C(  537.04)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -131.04), SIMDE_FLOAT64_C( -336.52),
                         SIMDE_FLOAT64_C(  512.03), SIMDE_FLOAT64_C(  521.04)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_and_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_andnot_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i b;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C(-1269691626), INT32_C(-1170050076), INT32_C( -309781764), INT32_C( -576457271),
                            INT32_C( -634907762), INT32_C( 2098123667), INT32_C( -562209537), INT32_C(    5131913)),
      simde_mm256_set_epi32(INT32_C( 1938996560), INT32_C( -950945230), INT32_C( 1149254280), INT32_C(   44378753),
                            INT32_C( 2057504516), INT32_C( -993118301), INT32_C( 1739559582), INT32_C( -627102775)),
      simde_mm256_set_epi32(INT32_C( 1132508224), INT32_C( 1158776850), INT32_C(       8192), INT32_C(   33819136),
                            INT32_C(  545457152), INT32_C(-2134891488), INT32_C(  562201088), INT32_C( -628022976)) },
    { simde_mm256_set_epi32(INT32_C( 1623231278), INT32_C( 1012862807), INT32_C(-1490329263), INT32_C( -978121763),
                            INT32_C( 1155479170), INT32_C( 1443672525), INT32_C(-2006927873), INT32_C(-1659310326)),
      simde_mm256_set_epi32(INT32_C( 1899349789), INT32_C( -971300838), INT32_C(-1615418151), INT32_C( 1727488659),
                            INT32_C( -760518716), INT32_C( -855193027), INT32_C(-1052461502), INT32_C( 1027044741)),
      simde_mm256_set_epi32(INT32_C(  288703505), INT32_C(-1040179192), INT32_C(  412388488), INT32_C(  574906370),
                            INT32_C(-1843379900), INT32_C(-1996338640), INT32_C( 1090781696), INT32_C(  539431045)) },
    { simde_mm256_set_epi32(INT32_C(-2122027976), INT32_C( -148145974), INT32_C(    -727916), INT32_C(  -26694351),
                            INT32_C(-1526957699), INT32_C( -878651731), INT32_C( 1530541127), INT32_C(-1559858122)),
      simde_mm256_set_epi32(INT32_C( -534865702), INT32_C(-2025680665), INT32_C(  838358535), INT32_C( 1081018378),
                            INT32_C(-1920205149), INT32_C( -965251235), INT32_C(  386054154), INT32_C(-2050252028)),
      simde_mm256_set_epi32(INT32_C( 1612353730), INT32_C(    4227109), INT32_C(     528899), INT32_C(     458762),
                            INT32_C(  151225986), INT32_C(   72821072), INT32_C(   67143688), INT32_C(   80315136)) },
    { simde_mm256_set_epi32(INT32_C(  677275617), INT32_C(-1590450048), INT32_C( 1228022027), INT32_C(-1869880241),
                            INT32_C( -606167369), INT32_C( 1388808224), INT32_C(  759425545), INT32_C(  885944499)),
      simde_mm256_set_epi32(INT32_C(  148370019), INT32_C(    1263704), INT32_C( -153021241), INT32_C( 1201345211),
                            INT32_C( 1277824171), INT32_C(-1597492935), INT32_C(  875204555), INT32_C(  494146139)),
      simde_mm256_set_epi32(INT32_C(    8491010), INT32_C(      16472), INT32_C(-1228860220), INT32_C( 1192235696),
                            INT32_C(   69206024), INT32_C(-1610075879), INT32_C(  271058370), INT32_C(  154141256)) },
    { simde_mm256_set_epi32(INT32_C(-1186629793), INT32_C(-1939451012), INT32_C( 1901741359), INT32_C(  320110090),
                            INT32_C(  759094695), INT32_C(-2033042315), INT32_C(-1537152402), INT32_C( -430790655)),
      simde_mm256_set_epi32(INT32_C(-1742460754), INT32_C(  244002796), INT32_C( 1060007632), INT32_C( 2113362450),
                            INT32_C( 1987282511), INT32_C(-1021964277), INT32_C(  124383649), INT32_C( -826572412)),
      simde_mm256_set_epi32(INT32_C(    2099360), INT32_C(   42544256), INT32_C(  237251280), INT32_C( 1826816016),
                            INT32_C( 1379992136), INT32_C( 1090781194), INT32_C(   50925953), INT32_C(  145293700)) },
    { simde_mm256_set_epi32(INT32_C( 1671320788), INT32_C(   16189529), INT32_C(-1485817573), INT32_C(  275783232),
                            INT32_C( -537268511), INT32_C(-1218162385), INT32_C(  832178136), INT32_C(-1011542055)),
      simde_mm256_set_epi32(INT32_C( 1761443736), INT32_C(   53908736), INT32_C(-1387734476), INT32_C(  460708168),
                            INT32_C( 1701279267), INT32_C( 1520265677), INT32_C(-1733909786), INT32_C(  106810720)),
      simde_mm256_set_epi32(INT32_C(  140585736), INT32_C(   50369792), INT32_C(  134791716), INT32_C(  184926472),
                            INT32_C(  537268226), INT32_C( 1217996992), INT32_C(-2010865626), INT32_C(   71878688)) },
    { simde_mm256_set_epi32(INT32_C( 1589201791), INT32_C(-1842375346), INT32_C(-2000660080), INT32_C( -325292371),
                            INT32_C(  507717673), INT32_C(-1795722597), INT32_C( -367442910), INT32_C(  -52157671)),
      simde_mm256_set_epi32(INT32_C(  948353791), INT32_C( -947869222), INT32_C( 1848909924), INT32_C( 1620600148),
                            INT32_C(-1789584398), INT32_C(-1352425263), INT32_C( 1123687286), INT32_C( -367624963)),
      simde_mm256_set_epi32(INT32_C(  537309312), INT32_C( 1166026896), INT32_C( 1714692196), INT32_C(        336),
                            INT32_C(-2129390638), INT32_C(  721453120), INT32_C(   14818132), INT32_C(   34757860)) },
    { simde_mm256_set_epi32(INT32_C(  296819835), INT32_C(  519012224), INT32_C( 1292929763), INT32_C( 1826612612),
                            INT32_C(-1166644367), INT32_C(-1218677893), INT32_C( 1596508105), INT32_C(  479196973)),
      simde_mm256_set_epi32(INT32_C(-1798100031), INT32_C( 1081570687), INT32_C( -734752872), INT32_C( 1848654589),
                            INT32_C(-1212254773), INT32_C( 1838985430), INT32_C( 1056167509), INT32_C(-1850230510)),
      simde_mm256_set_epi32(INT32_C(-2076040320), INT32_C( 1074790527), INT32_C(-1876687592), INT32_C(   35658361),
                            INT32_C(   92804234), INT32_C( 1216381060), INT32_C(  550703124), INT32_C(-2127558638)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_castps_si256(simde_mm256_andnot_ps(simde_mm256_castsi256_ps(test_vec[i].a), simde_mm256_castsi256_ps(test_vec[i].b)));
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_andnot_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i b;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi64x(INT64_C(-8439082616481350053), INT64_C(-1591722586538286382),
                             INT64_C( 1596940992066035921), INT64_C( 7218980746644065590)),
      simde_mm256_set_epi64x(INT64_C(-2889835723484335944), INT64_C( 5702037989414933855),
                             INT64_C(-7588441850660451968), INT64_C( 5622786757188964496)),
      simde_mm256_set_epi64x(INT64_C( 6126343173960302752), INT64_C(  432527267492795661),
                             INT64_C(-9182839637781772032), INT64_C(  720582821084942464)) },
    { simde_mm256_set_epi64x(INT64_C(-3971123103425330466), INT64_C(  247751845170013697),
                             INT64_C( 5585328925838094706), INT64_C(  530115277368604043)),
      simde_mm256_set_epi64x(INT64_C( 4478919331480512906), INT64_C( 7586040465456902581),
                             INT64_C(-3267189003236240754), INT64_C( 4065656935289916932)),
      simde_mm256_set_epi64x(INT64_C( 3893433775916460288), INT64_C( 7495967802570838452),
                             INT64_C(-7914921412211473780), INT64_C( 4045363244402936324)) },
    { simde_mm256_set_epi64x(INT64_C(-3585412687111992497), INT64_C( 6297904517919445636),
                             INT64_C( 3051626993143041669), INT64_C( 4507652791803794073)),
      simde_mm256_set_epi64x(INT64_C( 7790656180790731512), INT64_C(-2238278921609140434),
                             INT64_C(-3659407193774134076), INT64_C(-7376863833910103250)),
      simde_mm256_set_epi64x(INT64_C( 2306388938282574000), INT64_C(-6876986580769954518),
                             INT64_C(-4240666768871972800), INT64_C(-9142279579591991002)) },
    { simde_mm256_set_epi64x(INT64_C( 7999698065344811916), INT64_C(-7341244500866629440),
                             INT64_C(-7328850488568705697), INT64_C(-2323514350316525032)),
      simde_mm256_set_epi64x(INT64_C( -107804005911866164), INT64_C(  111123727598973975),
                             INT64_C(-1111380546027591655), INT64_C( 4764066650640007854)),
      simde_mm256_set_epi64x(INT64_C(-8034139356564159424), INT64_C(  108159439879631895),
                             INT64_C( 6958347297832099840), INT64_C(    7957580804685990)) },
    { simde_mm256_set_epi64x(INT64_C( 3459048579714364836), INT64_C(-3973289916381214951),
                             INT64_C(-7209230353107463172), INT64_C( 2930204184482970329)),
      simde_mm256_set_epi64x(INT64_C( 3010733025435453292), INT64_C( -745749005863907277),
                             INT64_C(-3524732567674843907), INT64_C(-5826758923945327044)),
      simde_mm256_set_epi64x(INT64_C(  704889912361353800), INT64_C( 3828781103108395042),
                             INT64_C( 4901042599637093377), INT64_C(-8718669257106881500)) },
    { simde_mm256_set_epi64x(INT64_C(-7592832401699875339), INT64_C(-8023654109997292762),
                             INT64_C(-2771476036962446296), INT64_C( 9102393041728593835)),
      simde_mm256_set_epi64x(INT64_C( 8663885080599503900), INT64_C(-7386785027439544145),
                             INT64_C( 8164489019718037552), INT64_C(  750237314916317106)),
      simde_mm256_set_epi64x(INT64_C( 7501880507188158472), INT64_C(  673462032523630729),
                             INT64_C( 2325549012813766672), INT64_C(   11611668710957072)) },
    { simde_mm256_set_epi64x(INT64_C(  845408412650914951), INT64_C(-8878005092884591737),
                             INT64_C( 8700006383250628168), INT64_C( 5164616504794613874)),
      simde_mm256_set_epi64x(INT64_C(-8091532354365363750), INT64_C(-8752462265867882388),
                             INT64_C( 4660987830245455204), INT64_C(-5583031631743087362)),
      simde_mm256_set_epi64x(INT64_C(-8934015756479190696), INT64_C(  144397281535078504),
                             INT64_C(     851058845950244), INT64_C(-5764308427349687156)) },
    { simde_mm256_set_epi64x(INT64_C( 2555908784249146521), INT64_C(-8475938792743727314),
                             INT64_C( 2225136272678261710), INT64_C( -363869634240011329)),
      simde_mm256_set_epi64x(INT64_C(-1884165091148513154), INT64_C(-7810796383227036924),
                             INT64_C(-3384871664970291123), INT64_C( 3413049808639883718)),
      simde_mm256_set_epi64x(INT64_C(-4286845753352634266), INT64_C( 1261025490606501888),
                             INT64_C(-4537798832575469567), INT64_C(  363824553704642624)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_castpd_si256(simde_mm256_andnot_pd(simde_mm256_castsi256_pd(test_vec[i].a), simde_mm256_castsi256_pd(test_vec[i].b)));
    simde_assert_m256i_i64(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_castps_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    17.89), SIMDE_FLOAT32_C(  -439.16),
                         SIMDE_FLOAT32_C(   198.42), SIMDE_FLOAT32_C(   352.58),
                         SIMDE_FLOAT32_C(   461.89), SIMDE_FLOAT32_C(  -105.28),
                         SIMDE_FLOAT32_C(   143.68), SIMDE_FLOAT32_C(   337.71)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    17.89), SIMDE_FLOAT32_C(  -439.16),
                         SIMDE_FLOAT32_C(   198.42), SIMDE_FLOAT32_C(   352.58),
                         SIMDE_FLOAT32_C(   461.89), SIMDE_FLOAT32_C(  -105.28),
                         SIMDE_FLOAT32_C(   143.68), SIMDE_FLOAT32_C(   337.71)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   104.39), SIMDE_FLOAT32_C(    34.20),
                         SIMDE_FLOAT32_C(   868.43), SIMDE_FLOAT32_C(  -354.71),
                         SIMDE_FLOAT32_C(    71.91), SIMDE_FLOAT32_C(  -620.66),
                         SIMDE_FLOAT32_C(  -727.46), SIMDE_FLOAT32_C(   516.70)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   104.39), SIMDE_FLOAT32_C(    34.20),
                         SIMDE_FLOAT32_C(   868.43), SIMDE_FLOAT32_C(  -354.71),
                         SIMDE_FLOAT32_C(    71.91), SIMDE_FLOAT32_C(  -620.66),
                         SIMDE_FLOAT32_C(  -727.46), SIMDE_FLOAT32_C(   516.70)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    57.24), SIMDE_FLOAT32_C(  -937.39),
                         SIMDE_FLOAT32_C(  -902.27), SIMDE_FLOAT32_C(  -540.76),
                         SIMDE_FLOAT32_C(  -319.95), SIMDE_FLOAT32_C(   472.57),
                         SIMDE_FLOAT32_C(  -514.05), SIMDE_FLOAT32_C(  -395.89)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    57.24), SIMDE_FLOAT32_C(  -937.39),
                         SIMDE_FLOAT32_C(  -902.27), SIMDE_FLOAT32_C(  -540.76),
                         SIMDE_FLOAT32_C(  -319.95), SIMDE_FLOAT32_C(   472.57),
                         SIMDE_FLOAT32_C(  -514.05), SIMDE_FLOAT32_C(  -395.89)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -289.46), SIMDE_FLOAT32_C(  -292.09),
                         SIMDE_FLOAT32_C(   300.75), SIMDE_FLOAT32_C(   515.47),
                         SIMDE_FLOAT32_C(  -443.08), SIMDE_FLOAT32_C(   738.56),
                         SIMDE_FLOAT32_C(   388.31), SIMDE_FLOAT32_C(  -979.02)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -289.46), SIMDE_FLOAT32_C(  -292.09),
                         SIMDE_FLOAT32_C(   300.75), SIMDE_FLOAT32_C(   515.47),
                         SIMDE_FLOAT32_C(  -443.08), SIMDE_FLOAT32_C(   738.56),
                         SIMDE_FLOAT32_C(   388.31), SIMDE_FLOAT32_C(  -979.02)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -416.84), SIMDE_FLOAT32_C(  -145.54),
                         SIMDE_FLOAT32_C(   814.92), SIMDE_FLOAT32_C(  -389.04),
                         SIMDE_FLOAT32_C(   271.28), SIMDE_FLOAT32_C(   795.75),
                         SIMDE_FLOAT32_C(   715.64), SIMDE_FLOAT32_C(  -282.83)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -416.84), SIMDE_FLOAT32_C(  -145.54),
                         SIMDE_FLOAT32_C(   814.92), SIMDE_FLOAT32_C(  -389.04),
                         SIMDE_FLOAT32_C(   271.28), SIMDE_FLOAT32_C(   795.75),
                         SIMDE_FLOAT32_C(   715.64), SIMDE_FLOAT32_C(  -282.83)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    36.85), SIMDE_FLOAT32_C(   358.48),
                         SIMDE_FLOAT32_C(  -119.62), SIMDE_FLOAT32_C(   444.11),
                         SIMDE_FLOAT32_C(   221.60), SIMDE_FLOAT32_C(  -300.30),
                         SIMDE_FLOAT32_C(   -48.26), SIMDE_FLOAT32_C(   969.79)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    36.85), SIMDE_FLOAT32_C(   358.48),
                         SIMDE_FLOAT32_C(  -119.62), SIMDE_FLOAT32_C(   444.11),
                         SIMDE_FLOAT32_C(   221.60), SIMDE_FLOAT32_C(  -300.30),
                         SIMDE_FLOAT32_C(   -48.26), SIMDE_FLOAT32_C(   969.79)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -242.13), SIMDE_FLOAT32_C(  -172.63),
                         SIMDE_FLOAT32_C(  -556.49), SIMDE_FLOAT32_C(  -637.53),
                         SIMDE_FLOAT32_C(    39.04), SIMDE_FLOAT32_C(  -822.45),
                         SIMDE_FLOAT32_C(  -881.86), SIMDE_FLOAT32_C(   639.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -242.13), SIMDE_FLOAT32_C(  -172.63),
                         SIMDE_FLOAT32_C(  -556.49), SIMDE_FLOAT32_C(  -637.53),
                         SIMDE_FLOAT32_C(    39.04), SIMDE_FLOAT32_C(  -822.45),
                         SIMDE_FLOAT32_C(  -881.86), SIMDE_FLOAT32_C(   639.51)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   262.21), SIMDE_FLOAT32_C(   189.91),
                         SIMDE_FLOAT32_C(  -147.18), SIMDE_FLOAT32_C(  -378.37),
                         SIMDE_FLOAT32_C(  -474.15), SIMDE_FLOAT32_C(   728.93),
                         SIMDE_FLOAT32_C(  -779.84), SIMDE_FLOAT32_C(  -836.44)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   262.21), SIMDE_FLOAT32_C(   189.91),
                         SIMDE_FLOAT32_C(  -147.18), SIMDE_FLOAT32_C(  -378.37),
                         SIMDE_FLOAT32_C(  -474.15), SIMDE_FLOAT32_C(   728.93),
                         SIMDE_FLOAT32_C(  -779.84), SIMDE_FLOAT32_C(  -836.44)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_castpd_ps(simde_mm256_castps_pd(test_vec[i].a));
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_castpd_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  866.38), SIMDE_FLOAT64_C( -294.05),
                         SIMDE_FLOAT64_C( -595.07), SIMDE_FLOAT64_C(   30.82)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  866.38), SIMDE_FLOAT64_C( -294.05),
                         SIMDE_FLOAT64_C( -595.07), SIMDE_FLOAT64_C(   30.82)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  967.90), SIMDE_FLOAT64_C(  598.86),
                         SIMDE_FLOAT64_C( -336.28), SIMDE_FLOAT64_C(  771.72)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  967.90), SIMDE_FLOAT64_C(  598.86),
                         SIMDE_FLOAT64_C( -336.28), SIMDE_FLOAT64_C(  771.72)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -949.57), SIMDE_FLOAT64_C( -900.42),
                         SIMDE_FLOAT64_C(  702.02), SIMDE_FLOAT64_C( -980.34)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -949.57), SIMDE_FLOAT64_C( -900.42),
                         SIMDE_FLOAT64_C(  702.02), SIMDE_FLOAT64_C( -980.34)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  969.39), SIMDE_FLOAT64_C(  513.49),
                         SIMDE_FLOAT64_C(  950.16), SIMDE_FLOAT64_C( -812.08)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  969.39), SIMDE_FLOAT64_C(  513.49),
                         SIMDE_FLOAT64_C(  950.16), SIMDE_FLOAT64_C( -812.08)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -507.62), SIMDE_FLOAT64_C( -207.64),
                         SIMDE_FLOAT64_C(  179.82), SIMDE_FLOAT64_C(   43.82)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -507.62), SIMDE_FLOAT64_C( -207.64),
                         SIMDE_FLOAT64_C(  179.82), SIMDE_FLOAT64_C(   43.82)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  461.29), SIMDE_FLOAT64_C(  530.93),
                         SIMDE_FLOAT64_C(   26.40), SIMDE_FLOAT64_C( -295.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  461.29), SIMDE_FLOAT64_C(  530.93),
                         SIMDE_FLOAT64_C(   26.40), SIMDE_FLOAT64_C( -295.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  195.05), SIMDE_FLOAT64_C( -631.18),
                         SIMDE_FLOAT64_C( -125.11), SIMDE_FLOAT64_C( -657.02)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  195.05), SIMDE_FLOAT64_C( -631.18),
                         SIMDE_FLOAT64_C( -125.11), SIMDE_FLOAT64_C( -657.02)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  839.46), SIMDE_FLOAT64_C( -787.94),
                         SIMDE_FLOAT64_C( -273.33), SIMDE_FLOAT64_C( -261.67)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  839.46), SIMDE_FLOAT64_C( -787.94),
                         SIMDE_FLOAT64_C( -273.33), SIMDE_FLOAT64_C( -261.67)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_castps_pd(simde_mm256_castpd_ps(test_vec[i].a));
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_castps128_ps256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   351.31), SIMDE_FLOAT32_C(   331.36), SIMDE_FLOAT32_C(   112.22), SIMDE_FLOAT32_C(   -15.48)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(   351.31), SIMDE_FLOAT32_C(   331.36),
                         SIMDE_FLOAT32_C(   112.22), SIMDE_FLOAT32_C(   -15.48)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   905.42), SIMDE_FLOAT32_C(  -224.21), SIMDE_FLOAT32_C(   -76.15), SIMDE_FLOAT32_C(   663.18)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(   905.42), SIMDE_FLOAT32_C(  -224.21),
                         SIMDE_FLOAT32_C(   -76.15), SIMDE_FLOAT32_C(   663.18)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   830.23), SIMDE_FLOAT32_C(    -5.95), SIMDE_FLOAT32_C(   918.64), SIMDE_FLOAT32_C(   777.19)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(   830.23), SIMDE_FLOAT32_C(    -5.95),
                         SIMDE_FLOAT32_C(   918.64), SIMDE_FLOAT32_C(   777.19)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -168.85), SIMDE_FLOAT32_C(  -871.57), SIMDE_FLOAT32_C(    40.10), SIMDE_FLOAT32_C(  -558.27)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(  -168.85), SIMDE_FLOAT32_C(  -871.57),
                         SIMDE_FLOAT32_C(    40.10), SIMDE_FLOAT32_C(  -558.27)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -791.93), SIMDE_FLOAT32_C(   293.41), SIMDE_FLOAT32_C(   390.85), SIMDE_FLOAT32_C(   476.42)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(  -791.93), SIMDE_FLOAT32_C(   293.41),
                         SIMDE_FLOAT32_C(   390.85), SIMDE_FLOAT32_C(   476.42)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   481.03), SIMDE_FLOAT32_C(  -319.83), SIMDE_FLOAT32_C(  -437.10), SIMDE_FLOAT32_C(  -775.98)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(   481.03), SIMDE_FLOAT32_C(  -319.83),
                         SIMDE_FLOAT32_C(  -437.10), SIMDE_FLOAT32_C(  -775.98)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   193.26), SIMDE_FLOAT32_C(   702.14), SIMDE_FLOAT32_C(  -820.75), SIMDE_FLOAT32_C(  -158.78)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(   193.26), SIMDE_FLOAT32_C(   702.14),
                         SIMDE_FLOAT32_C(  -820.75), SIMDE_FLOAT32_C(  -158.78)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -995.99), SIMDE_FLOAT32_C(   290.31), SIMDE_FLOAT32_C(  -219.12), SIMDE_FLOAT32_C(  -837.21)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
                         SIMDE_FLOAT32_C(  -995.99), SIMDE_FLOAT32_C(   290.31),
                         SIMDE_FLOAT32_C(  -219.12), SIMDE_FLOAT32_C(  -837.21)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256_private r = simde__m256_to_private(simde_mm256_castps128_ps256(test_vec[i].a));
    simde__m256_private expected = simde__m256_to_private(test_vec[i].r);
    simde_assert_m128_equal(r.m128[0], expected.m128[0]);
  }

  return 0;
}

static int
test_simde_mm256_castps256_ps128(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m128 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -556.83), SIMDE_FLOAT32_C(   534.45),
                         SIMDE_FLOAT32_C(   421.40), SIMDE_FLOAT32_C(   932.30),
                         SIMDE_FLOAT32_C(   169.92), SIMDE_FLOAT32_C(   399.10),
                         SIMDE_FLOAT32_C(  -742.08), SIMDE_FLOAT32_C(  -830.66)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   169.92), SIMDE_FLOAT32_C(   399.10), SIMDE_FLOAT32_C(  -742.08), SIMDE_FLOAT32_C(  -830.66)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   802.82), SIMDE_FLOAT32_C(   -21.64),
                         SIMDE_FLOAT32_C(  -298.77), SIMDE_FLOAT32_C(  -723.27),
                         SIMDE_FLOAT32_C(    42.85), SIMDE_FLOAT32_C(  -154.70),
                         SIMDE_FLOAT32_C(  -285.18), SIMDE_FLOAT32_C(  -881.89)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(    42.85), SIMDE_FLOAT32_C(  -154.70), SIMDE_FLOAT32_C(  -285.18), SIMDE_FLOAT32_C(  -881.89)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   606.40), SIMDE_FLOAT32_C(   978.13),
                         SIMDE_FLOAT32_C(   281.04), SIMDE_FLOAT32_C(   316.13),
                         SIMDE_FLOAT32_C(     8.74), SIMDE_FLOAT32_C(  -824.14),
                         SIMDE_FLOAT32_C(  -338.77), SIMDE_FLOAT32_C(  -977.64)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(     8.74), SIMDE_FLOAT32_C(  -824.14), SIMDE_FLOAT32_C(  -338.77), SIMDE_FLOAT32_C(  -977.64)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   500.38), SIMDE_FLOAT32_C(  -378.47),
                         SIMDE_FLOAT32_C(  -151.95), SIMDE_FLOAT32_C(  -513.15),
                         SIMDE_FLOAT32_C(  -509.66), SIMDE_FLOAT32_C(   542.95),
                         SIMDE_FLOAT32_C(  -511.97), SIMDE_FLOAT32_C(   606.72)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -509.66), SIMDE_FLOAT32_C(   542.95), SIMDE_FLOAT32_C(  -511.97), SIMDE_FLOAT32_C(   606.72)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -156.57), SIMDE_FLOAT32_C(  -909.62),
                         SIMDE_FLOAT32_C(   457.12), SIMDE_FLOAT32_C(  -549.96),
                         SIMDE_FLOAT32_C(   250.75), SIMDE_FLOAT32_C(  -503.56),
                         SIMDE_FLOAT32_C(  -397.59), SIMDE_FLOAT32_C(   644.59)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   250.75), SIMDE_FLOAT32_C(  -503.56), SIMDE_FLOAT32_C(  -397.59), SIMDE_FLOAT32_C(   644.59)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   692.42), SIMDE_FLOAT32_C(   776.78),
                         SIMDE_FLOAT32_C(  -240.36), SIMDE_FLOAT32_C(  -615.28),
                         SIMDE_FLOAT32_C(  -428.59), SIMDE_FLOAT32_C(   807.96),
                         SIMDE_FLOAT32_C(  -867.86), SIMDE_FLOAT32_C(   511.34)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -428.59), SIMDE_FLOAT32_C(   807.96), SIMDE_FLOAT32_C(  -867.86), SIMDE_FLOAT32_C(   511.34)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   115.07), SIMDE_FLOAT32_C(  -200.05),
                         SIMDE_FLOAT32_C(  -278.17), SIMDE_FLOAT32_C(   321.78),
                         SIMDE_FLOAT32_C(   793.85), SIMDE_FLOAT32_C(   416.18),
                         SIMDE_FLOAT32_C(  -935.48), SIMDE_FLOAT32_C(  -637.83)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   793.85), SIMDE_FLOAT32_C(   416.18), SIMDE_FLOAT32_C(  -935.48), SIMDE_FLOAT32_C(  -637.83)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   951.45), SIMDE_FLOAT32_C(   803.49),
                         SIMDE_FLOAT32_C(  -646.03), SIMDE_FLOAT32_C(  -379.68),
                         SIMDE_FLOAT32_C(   433.22), SIMDE_FLOAT32_C(   128.68),
                         SIMDE_FLOAT32_C(   589.03), SIMDE_FLOAT32_C(   956.87)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   433.22), SIMDE_FLOAT32_C(   128.68), SIMDE_FLOAT32_C(   589.03), SIMDE_FLOAT32_C(   956.87)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128 r = simde_mm256_castps256_ps128(test_vec[i].a);
    simde_assert_m128_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_castps_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C(  834872153), INT32_C(  230986620), INT32_C( -480324866),
                            INT32_C( 1237553077), INT32_C(  596539913), INT32_C( -724550399), INT32_C( -685617130)),
      simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C(  834872153), INT32_C(  230986620), INT32_C( -480324866),
                            INT32_C( 1237553077), INT32_C(  596539913), INT32_C( -724550399), INT32_C( -685617130)) },
    { simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C(  748311192),
                            INT32_C( -177311449), INT32_C(  358911508), INT32_C( 1754057382), INT32_C(-1724435543)),
      simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C(  748311192),
                            INT32_C( -177311449), INT32_C(  358911508), INT32_C( 1754057382), INT32_C(-1724435543)) },
    { simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C(  492300795), INT32_C(-1881516103), INT32_C( 1522261816),
                            INT32_C(  516271628), INT32_C( 1619360533), INT32_C(  585952460), INT32_C(-1270838330)),
      simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C(  492300795), INT32_C(-1881516103), INT32_C( 1522261816),
                            INT32_C(  516271628), INT32_C( 1619360533), INT32_C(  585952460), INT32_C(-1270838330)) },
    { simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C(  730682044),
                            INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C(  283253644)),
      simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C(  730682044),
                            INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C(  283253644)) },
    { simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C(  605916520), INT32_C(-1265057380),
                            INT32_C( 1158984758), INT32_C(  710723273), INT32_C( -342604717), INT32_C(-1218392316)),
      simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C(  605916520), INT32_C(-1265057380),
                            INT32_C( 1158984758), INT32_C(  710723273), INT32_C( -342604717), INT32_C(-1218392316)) },
    { simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985),
                            INT32_C( 1725575775), INT32_C( -263968835), INT32_C(   26802813), INT32_C( -641556710)),
      simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985),
                            INT32_C( 1725575775), INT32_C( -263968835), INT32_C(   26802813), INT32_C( -641556710)) },
    { simde_mm256_set_epi32(INT32_C(  938077299), INT32_C(    4161792), INT32_C( 1718084645), INT32_C( 1391219860),
                            INT32_C( 1311036795), INT32_C(  132407700), INT32_C(-1161361885), INT32_C( -462662147)),
      simde_mm256_set_epi32(INT32_C(  938077299), INT32_C(    4161792), INT32_C( 1718084645), INT32_C( 1391219860),
                            INT32_C( 1311036795), INT32_C(  132407700), INT32_C(-1161361885), INT32_C( -462662147)) },
    { simde_mm256_set_epi32(INT32_C(  987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026),
                            INT32_C(  138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)),
      simde_mm256_set_epi32(INT32_C(  987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026),
                            INT32_C(  138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_castps_si256(simde_mm256_castsi256_ps(test_vec[i].a));
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_castpd_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329),
                             INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)),
      simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329),
                             INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)) },
    { simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460),
                             INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)),
      simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460),
                             INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)) },
    { simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539),
                             INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)),
      simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539),
                             INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)) },
    { simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387),
                             INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)),
      simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387),
                             INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)) },
    { simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492),
                             INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)),
      simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492),
                             INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)) },
    { simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C(  727608602759940145),
                             INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)),
      simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C(  727608602759940145),
                             INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)) },
    { simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510),
                             INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)),
      simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510),
                             INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)) },
    { simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778),
                             INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)),
      simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778),
                             INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_castpd_si256(simde_mm256_castsi256_pd(test_vec[i].a));
    simde_assert_m256i_i64(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_castsi128_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm_set_epi32(INT32_C( 1176995756), INT32_C(-1870675232), INT32_C(  996429243), INT32_C(  550488102)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C( 1176995756), INT32_C(-1870675232), INT32_C(  996429243), INT32_C(  550488102)) },
    { simde_mm_set_epi32(INT32_C( 1022574086), INT32_C( -246750524), INT32_C(-1886376341), INT32_C(-1870907175)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C( 1022574086), INT32_C( -246750524), INT32_C(-1886376341), INT32_C(-1870907175)) },
    { simde_mm_set_epi32(INT32_C( 1399644059), INT32_C(-2062431582), INT32_C(  861056404), INT32_C(-1456249685)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C( 1399644059), INT32_C(-2062431582), INT32_C(  861056404), INT32_C(-1456249685)) },
    { simde_mm_set_epi32(INT32_C(-1357701315), INT32_C( -200201270), INT32_C(-2128732965), INT32_C( -971656840)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C(-1357701315), INT32_C( -200201270), INT32_C(-2128732965), INT32_C( -971656840)) },
    { simde_mm_set_epi32(INT32_C( 1758025228), INT32_C(   19121992), INT32_C( 1973849856), INT32_C( -609470236)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C( 1758025228), INT32_C(   19121992), INT32_C( 1973849856), INT32_C( -609470236)) },
    { simde_mm_set_epi32(INT32_C(-1315323340), INT32_C( 1995350243), INT32_C(-1725897434), INT32_C(  791183816)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C(-1315323340), INT32_C( 1995350243), INT32_C(-1725897434), INT32_C(  791183816)) },
    { simde_mm_set_epi32(INT32_C(-2046705493), INT32_C(  441938624), INT32_C(  -46886380), INT32_C(  606821245)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C(-2046705493), INT32_C(  441938624), INT32_C(  -46886380), INT32_C(  606821245)) },
    { simde_mm_set_epi32(INT32_C( 1167221394), INT32_C(  739794596), INT32_C(  187704590), INT32_C( 1165304892)),
      simde_mm256_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0),
                            INT32_C( 1167221394), INT32_C(  739794596), INT32_C(  187704590), INT32_C( 1165304892)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128i r = simde_mm256_extractf128_si256(simde_mm256_castsi128_si256(test_vec[i].a), 0);
    simde__m128i e = simde_mm256_extractf128_si256(test_vec[i].r, 0);
    simde_assert_m128i_equal(r, e);
  }

  return 0;
}

static int
test_simde_mm256_castsi256_si128(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m128i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C( 1033968789), INT32_C(  712909368), INT32_C(  -15382203), INT32_C(  726776461),
                            INT32_C( 1212968394), INT32_C( -910350077), INT32_C(-1401880553), INT32_C(-1640064659)),
      simde_mm_set_epi32(INT32_C( 1212968394), INT32_C( -910350077), INT32_C(-1401880553), INT32_C(-1640064659)) },
    { simde_mm256_set_epi32(INT32_C( -244971373), INT32_C( -839397474), INT32_C(-1281097070), INT32_C( 1259688200),
                            INT32_C(-1678523239), INT32_C(-1335997395), INT32_C( 1104214719), INT32_C(-1646552356)),
      simde_mm_set_epi32(INT32_C(-1678523239), INT32_C(-1335997395), INT32_C( 1104214719), INT32_C(-1646552356)) },
    { simde_mm256_set_epi32(INT32_C( 1339422473), INT32_C( -532071515), INT32_C(-1679156122), INT32_C( -104726847),
                            INT32_C( -189233938), INT32_C(-1476384511), INT32_C(   59015981), INT32_C( -574854746)),
      simde_mm_set_epi32(INT32_C( -189233938), INT32_C(-1476384511), INT32_C(   59015981), INT32_C( -574854746)) },
    { simde_mm256_set_epi32(INT32_C(  104804994), INT32_C(-1602912924), INT32_C(-1184587502), INT32_C( -929055139),
                            INT32_C(-1913020666), INT32_C( 1485870300), INT32_C( -930325282), INT32_C(  971511935)),
      simde_mm_set_epi32(INT32_C(-1913020666), INT32_C( 1485870300), INT32_C( -930325282), INT32_C(  971511935)) },
    { simde_mm256_set_epi32(INT32_C( -640493670), INT32_C( -513373085), INT32_C(  396752088), INT32_C( 1774159809),
                            INT32_C(-1068197323), INT32_C( -727216092), INT32_C( 2046795601), INT32_C( -954579053)),
      simde_mm_set_epi32(INT32_C(-1068197323), INT32_C( -727216092), INT32_C( 2046795601), INT32_C( -954579053)) },
    { simde_mm256_set_epi32(INT32_C(  968938230), INT32_C(  324986947), INT32_C( 1563795037), INT32_C( 1925209729),
                            INT32_C(-1635044296), INT32_C(  685246103), INT32_C( 1765586923), INT32_C( -978308891)),
      simde_mm_set_epi32(INT32_C(-1635044296), INT32_C(  685246103), INT32_C( 1765586923), INT32_C( -978308891)) },
    { simde_mm256_set_epi32(INT32_C(-1695851306), INT32_C(-2116140969), INT32_C(  146847367), INT32_C( -593023293),
                            INT32_C( 1573776318), INT32_C(-1046034616), INT32_C( -645014018), INT32_C( 1014255016)),
      simde_mm_set_epi32(INT32_C( 1573776318), INT32_C(-1046034616), INT32_C( -645014018), INT32_C( 1014255016)) },
    { simde_mm256_set_epi32(INT32_C( 1123223298), INT32_C( 1981751223), INT32_C( 1306799664), INT32_C(-1888445770),
                            INT32_C( 1502796782), INT32_C( 1707347280), INT32_C(  760982117), INT32_C( 1124914897)),
      simde_mm_set_epi32(INT32_C( 1502796782), INT32_C( 1707347280), INT32_C(  760982117), INT32_C( 1124914897)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128i r = simde_mm256_castsi256_si128(test_vec[i].a);
    simde_assert_m128i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_castsi256_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C(  834872153), INT32_C(  230986620), INT32_C( -480324866),
                            INT32_C( 1237553077), INT32_C(  596539913), INT32_C( -724550399), INT32_C( -685617130)),
      simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C(  834872153), INT32_C(  230986620), INT32_C( -480324866),
                            INT32_C( 1237553077), INT32_C(  596539913), INT32_C( -724550399), INT32_C( -685617130)) },
    { simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C(  748311192),
                            INT32_C( -177311449), INT32_C(  358911508), INT32_C( 1754057382), INT32_C(-1724435543)),
      simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C(  748311192),
                            INT32_C( -177311449), INT32_C(  358911508), INT32_C( 1754057382), INT32_C(-1724435543)) },
    { simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C(  492300795), INT32_C(-1881516103), INT32_C( 1522261816),
                            INT32_C(  516271628), INT32_C( 1619360533), INT32_C(  585952460), INT32_C(-1270838330)),
      simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C(  492300795), INT32_C(-1881516103), INT32_C( 1522261816),
                            INT32_C(  516271628), INT32_C( 1619360533), INT32_C(  585952460), INT32_C(-1270838330)) },
    { simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C(  730682044),
                            INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C(  283253644)),
      simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C(  730682044),
                            INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C(  283253644)) },
    { simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C(  605916520), INT32_C(-1265057380),
                            INT32_C( 1158984758), INT32_C(  710723273), INT32_C( -342604717), INT32_C(-1218392316)),
      simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C(  605916520), INT32_C(-1265057380),
                            INT32_C( 1158984758), INT32_C(  710723273), INT32_C( -342604717), INT32_C(-1218392316)) },
    { simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985),
                            INT32_C( 1725575775), INT32_C( -263968835), INT32_C(   26802813), INT32_C( -641556710)),
      simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985),
                            INT32_C( 1725575775), INT32_C( -263968835), INT32_C(   26802813), INT32_C( -641556710)) },
    { simde_mm256_set_epi32(INT32_C(  938077299), INT32_C(    4161792), INT32_C( 1718084645), INT32_C( 1391219860),
                            INT32_C( 1311036795), INT32_C(  132407700), INT32_C(-1161361885), INT32_C( -462662147)),
      simde_mm256_set_epi32(INT32_C(  938077299), INT32_C(    4161792), INT32_C( 1718084645), INT32_C( 1391219860),
                            INT32_C( 1311036795), INT32_C(  132407700), INT32_C(-1161361885), INT32_C( -462662147)) },
    { simde_mm256_set_epi32(INT32_C(  987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026),
                            INT32_C(  138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)),
      simde_mm256_set_epi32(INT32_C(  987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026),
                            INT32_C(  138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_castps_si256(simde_mm256_castsi256_ps(test_vec[i].a));
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_castsi256_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329),
                             INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)),
      simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329),
                             INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)) },
    { simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460),
                             INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)),
      simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460),
                             INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)) },
    { simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539),
                             INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)),
      simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539),
                             INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)) },
    { simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387),
                             INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)),
      simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387),
                             INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)) },
    { simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492),
                             INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)),
      simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492),
                             INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)) },
    { simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C(  727608602759940145),
                             INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)),
      simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C(  727608602759940145),
                             INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)) },
    { simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510),
                             INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)),
      simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510),
                             INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)) },
    { simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778),
                             INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)),
      simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778),
                             INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_castpd_si256(simde_mm256_castsi256_pd(test_vec[i].a));
    simde_assert_m256i_i64(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_blend_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   61.35), SIMDE_FLOAT32_C(  540.33),
                         SIMDE_FLOAT32_C( -888.48), SIMDE_FLOAT32_C(  570.09),
                         SIMDE_FLOAT32_C(  312.02), SIMDE_FLOAT32_C( -960.46),
                         SIMDE_FLOAT32_C( -440.55), SIMDE_FLOAT32_C( -796.55)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -286.42), SIMDE_FLOAT32_C( -835.51),
                         SIMDE_FLOAT32_C(  177.42), SIMDE_FLOAT32_C(  142.03),
                         SIMDE_FLOAT32_C(  501.36), SIMDE_FLOAT32_C( -894.74),
                         SIMDE_FLOAT32_C( -798.77), SIMDE_FLOAT32_C(  511.25)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   61.35), SIMDE_FLOAT32_C(  540.33),
                         SIMDE_FLOAT32_C( -888.48), SIMDE_FLOAT32_C(  570.09),
                         SIMDE_FLOAT32_C(  501.36), SIMDE_FLOAT32_C( -894.74),
                         SIMDE_FLOAT32_C( -440.55), SIMDE_FLOAT32_C(  511.25)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  783.21), SIMDE_FLOAT32_C(  251.09),
                         SIMDE_FLOAT32_C( -929.85), SIMDE_FLOAT32_C( -378.97),
                         SIMDE_FLOAT32_C(  496.83), SIMDE_FLOAT32_C( -643.84),
                         SIMDE_FLOAT32_C( -785.45), SIMDE_FLOAT32_C( -676.85)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  375.42), SIMDE_FLOAT32_C( -689.69),
                         SIMDE_FLOAT32_C(  240.54), SIMDE_FLOAT32_C( -955.13),
                         SIMDE_FLOAT32_C(   82.52), SIMDE_FLOAT32_C(  210.36),
                         SIMDE_FLOAT32_C(  621.75), SIMDE_FLOAT32_C( -780.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  783.21), SIMDE_FLOAT32_C(  251.09),
                         SIMDE_FLOAT32_C( -929.85), SIMDE_FLOAT32_C( -378.97),
                         SIMDE_FLOAT32_C(   82.52), SIMDE_FLOAT32_C(  210.36),
                         SIMDE_FLOAT32_C( -785.45), SIMDE_FLOAT32_C( -780.72)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -747.80), SIMDE_FLOAT32_C( -376.86),
                         SIMDE_FLOAT32_C(  238.38), SIMDE_FLOAT32_C( -668.84),
                         SIMDE_FLOAT32_C(  238.09), SIMDE_FLOAT32_C(  936.53),
                         SIMDE_FLOAT32_C( -693.41), SIMDE_FLOAT32_C( -381.56)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   83.85), SIMDE_FLOAT32_C(  559.24),
                         SIMDE_FLOAT32_C( -896.35), SIMDE_FLOAT32_C(  225.46),
                         SIMDE_FLOAT32_C( -243.15), SIMDE_FLOAT32_C( -714.74),
                         SIMDE_FLOAT32_C(  388.91), SIMDE_FLOAT32_C(  608.15)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -747.80), SIMDE_FLOAT32_C( -376.86),
                         SIMDE_FLOAT32_C(  238.38), SIMDE_FLOAT32_C( -668.84),
                         SIMDE_FLOAT32_C( -243.15), SIMDE_FLOAT32_C( -714.74),
                         SIMDE_FLOAT32_C( -693.41), SIMDE_FLOAT32_C(  608.15)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    1.67), SIMDE_FLOAT32_C( -361.23),
                         SIMDE_FLOAT32_C(  362.97), SIMDE_FLOAT32_C( -860.62),
                         SIMDE_FLOAT32_C(  518.00), SIMDE_FLOAT32_C(  985.53),
                         SIMDE_FLOAT32_C(  -40.74), SIMDE_FLOAT32_C(  246.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  236.67), SIMDE_FLOAT32_C( -133.38),
                         SIMDE_FLOAT32_C( -240.09), SIMDE_FLOAT32_C(  681.13),
                         SIMDE_FLOAT32_C( -437.53), SIMDE_FLOAT32_C(  645.53),
                         SIMDE_FLOAT32_C(  472.51), SIMDE_FLOAT32_C(   30.02)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    1.67), SIMDE_FLOAT32_C( -361.23),
                         SIMDE_FLOAT32_C(  362.97), SIMDE_FLOAT32_C( -860.62),
                         SIMDE_FLOAT32_C( -437.53), SIMDE_FLOAT32_C(  645.53),
                         SIMDE_FLOAT32_C(  -40.74), SIMDE_FLOAT32_C(   30.02)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -759.07), SIMDE_FLOAT32_C(  240.96),
                         SIMDE_FLOAT32_C( -743.41), SIMDE_FLOAT32_C( -766.95),
                         SIMDE_FLOAT32_C( -733.55), SIMDE_FLOAT32_C( -798.68),
                         SIMDE_FLOAT32_C( -189.75), SIMDE_FLOAT32_C( -424.58)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -396.91), SIMDE_FLOAT32_C(  509.13),
                         SIMDE_FLOAT32_C(  462.02), SIMDE_FLOAT32_C(  520.45),
                         SIMDE_FLOAT32_C(  948.24), SIMDE_FLOAT32_C(  730.18),
                         SIMDE_FLOAT32_C( -709.02), SIMDE_FLOAT32_C( -858.64)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -759.07), SIMDE_FLOAT32_C(  240.96),
                         SIMDE_FLOAT32_C( -743.41), SIMDE_FLOAT32_C( -766.95),
                         SIMDE_FLOAT32_C(  948.24), SIMDE_FLOAT32_C(  730.18),
                         SIMDE_FLOAT32_C( -189.75), SIMDE_FLOAT32_C( -858.64)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  624.31), SIMDE_FLOAT32_C(  375.12),
                         SIMDE_FLOAT32_C(  629.27), SIMDE_FLOAT32_C(  901.24),
                         SIMDE_FLOAT32_C( -896.96), SIMDE_FLOAT32_C( -769.47),
                         SIMDE_FLOAT32_C(  452.93), SIMDE_FLOAT32_C( -251.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -148.06), SIMDE_FLOAT32_C( -474.61),
                         SIMDE_FLOAT32_C( -146.01), SIMDE_FLOAT32_C( -955.67),
                         SIMDE_FLOAT32_C(  931.60), SIMDE_FLOAT32_C( -773.07),
                         SIMDE_FLOAT32_C(  626.99), SIMDE_FLOAT32_C(  431.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  624.31), SIMDE_FLOAT32_C(  375.12),
                         SIMDE_FLOAT32_C(  629.27), SIMDE_FLOAT32_C(  901.24),
                         SIMDE_FLOAT32_C(  931.60), SIMDE_FLOAT32_C( -773.07),
                         SIMDE_FLOAT32_C(  452.93), SIMDE_FLOAT32_C(  431.72)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -812.40), SIMDE_FLOAT32_C( -551.18),
                         SIMDE_FLOAT32_C(  389.71), SIMDE_FLOAT32_C(  667.74),
                         SIMDE_FLOAT32_C(   -2.37), SIMDE_FLOAT32_C( -129.53),
                         SIMDE_FLOAT32_C(  860.16), SIMDE_FLOAT32_C(  760.95)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -172.95), SIMDE_FLOAT32_C( -591.71),
                         SIMDE_FLOAT32_C( -137.29), SIMDE_FLOAT32_C( -689.63),
                         SIMDE_FLOAT32_C(  644.48), SIMDE_FLOAT32_C(  859.03),
                         SIMDE_FLOAT32_C( -963.16), SIMDE_FLOAT32_C( -158.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -812.40), SIMDE_FLOAT32_C( -551.18),
                         SIMDE_FLOAT32_C(  389.71), SIMDE_FLOAT32_C(  667.74),
                         SIMDE_FLOAT32_C(  644.48), SIMDE_FLOAT32_C(  859.03),
                         SIMDE_FLOAT32_C(  860.16), SIMDE_FLOAT32_C( -158.52)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   53.03), SIMDE_FLOAT32_C( -493.76),
                         SIMDE_FLOAT32_C(  623.37), SIMDE_FLOAT32_C(  296.00),
                         SIMDE_FLOAT32_C( -416.71), SIMDE_FLOAT32_C( -539.89),
                         SIMDE_FLOAT32_C(  210.88), SIMDE_FLOAT32_C(  585.18)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -34.37), SIMDE_FLOAT32_C( -267.21),
                         SIMDE_FLOAT32_C(  411.37), SIMDE_FLOAT32_C( -265.51),
                         SIMDE_FLOAT32_C(  345.42), SIMDE_FLOAT32_C(  252.46),
                         SIMDE_FLOAT32_C(  286.48), SIMDE_FLOAT32_C( -858.99)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   53.03), SIMDE_FLOAT32_C( -493.76),
                         SIMDE_FLOAT32_C(  623.37), SIMDE_FLOAT32_C(  296.00),
                         SIMDE_FLOAT32_C(  345.42), SIMDE_FLOAT32_C(  252.46),
                         SIMDE_FLOAT32_C(  210.88), SIMDE_FLOAT32_C( -858.99)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_blend_ps(test_vec[i].a, test_vec[i].b, 13);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_blend_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  983.61), SIMDE_FLOAT64_C(  -51.56),
                         SIMDE_FLOAT64_C(  561.13), SIMDE_FLOAT64_C( -977.17)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  311.03), SIMDE_FLOAT64_C( -876.87),
                         SIMDE_FLOAT64_C(   15.56), SIMDE_FLOAT64_C(  821.58)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  311.03), SIMDE_FLOAT64_C( -876.87),
                         SIMDE_FLOAT64_C(  561.13), SIMDE_FLOAT64_C(  821.58)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  486.15), SIMDE_FLOAT64_C( -809.80),
                         SIMDE_FLOAT64_C( -134.49), SIMDE_FLOAT64_C( -709.30)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -588.52), SIMDE_FLOAT64_C( -823.37),
                         SIMDE_FLOAT64_C( -436.62), SIMDE_FLOAT64_C( -938.83)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -588.52), SIMDE_FLOAT64_C( -823.37),
                         SIMDE_FLOAT64_C( -134.49), SIMDE_FLOAT64_C( -938.83)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   10.97), SIMDE_FLOAT64_C( -837.09),
                         SIMDE_FLOAT64_C( -238.93), SIMDE_FLOAT64_C( -927.62)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -150.59), SIMDE_FLOAT64_C(  428.13),
                         SIMDE_FLOAT64_C(  655.11), SIMDE_FLOAT64_C(  -28.81)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -150.59), SIMDE_FLOAT64_C(  428.13),
                         SIMDE_FLOAT64_C( -238.93), SIMDE_FLOAT64_C(  -28.81)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -691.52), SIMDE_FLOAT64_C( -756.72),
                         SIMDE_FLOAT64_C(   69.02), SIMDE_FLOAT64_C( -249.34)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -857.57), SIMDE_FLOAT64_C( -720.61),
                         SIMDE_FLOAT64_C(  529.12), SIMDE_FLOAT64_C(  813.95)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -857.57), SIMDE_FLOAT64_C( -720.61),
                         SIMDE_FLOAT64_C(   69.02), SIMDE_FLOAT64_C(  813.95)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  743.57), SIMDE_FLOAT64_C(  671.72),
                         SIMDE_FLOAT64_C(  747.66), SIMDE_FLOAT64_C(  592.11)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -559.29), SIMDE_FLOAT64_C(  529.63),
                         SIMDE_FLOAT64_C(  121.55), SIMDE_FLOAT64_C( -352.32)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -559.29), SIMDE_FLOAT64_C(  529.63),
                         SIMDE_FLOAT64_C(  747.66), SIMDE_FLOAT64_C( -352.32)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -466.05), SIMDE_FLOAT64_C( -621.64),
                         SIMDE_FLOAT64_C(  113.70), SIMDE_FLOAT64_C( -906.12)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -919.48), SIMDE_FLOAT64_C(  972.84),
                         SIMDE_FLOAT64_C(  378.79), SIMDE_FLOAT64_C( -196.68)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -919.48), SIMDE_FLOAT64_C(  972.84),
                         SIMDE_FLOAT64_C(  113.70), SIMDE_FLOAT64_C( -196.68)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -749.13), SIMDE_FLOAT64_C(   30.92),
                         SIMDE_FLOAT64_C(  753.62), SIMDE_FLOAT64_C( -864.28)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -997.13), SIMDE_FLOAT64_C(  675.64),
                         SIMDE_FLOAT64_C( -135.93), SIMDE_FLOAT64_C( -647.89)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -997.13), SIMDE_FLOAT64_C(  675.64),
                         SIMDE_FLOAT64_C(  753.62), SIMDE_FLOAT64_C( -647.89)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  757.64), SIMDE_FLOAT64_C(  119.62),
                         SIMDE_FLOAT64_C(  682.14), SIMDE_FLOAT64_C( -348.74)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  467.61), SIMDE_FLOAT64_C(  532.40),
                         SIMDE_FLOAT64_C(  959.59), SIMDE_FLOAT64_C( -392.58)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  467.61), SIMDE_FLOAT64_C(  532.40),
                         SIMDE_FLOAT64_C(  682.14), SIMDE_FLOAT64_C( -392.58)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_blend_pd(test_vec[i].a, test_vec[i].b, 13);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_blendv_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 mask;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -169.19), SIMDE_FLOAT32_C( -303.51),
                         SIMDE_FLOAT32_C(  280.62), SIMDE_FLOAT32_C(  971.56),
                         SIMDE_FLOAT32_C(  558.62), SIMDE_FLOAT32_C(  244.31),
                         SIMDE_FLOAT32_C( -482.20), SIMDE_FLOAT32_C(  526.92)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  779.01), SIMDE_FLOAT32_C( -628.61),
                         SIMDE_FLOAT32_C( -781.26), SIMDE_FLOAT32_C( -923.79),
                         SIMDE_FLOAT32_C( -624.75), SIMDE_FLOAT32_C( -481.19),
                         SIMDE_FLOAT32_C(  750.60), SIMDE_FLOAT32_C(  693.30)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  392.97), SIMDE_FLOAT32_C(  752.07),
                         SIMDE_FLOAT32_C(  -74.68), SIMDE_FLOAT32_C( -769.29),
                         SIMDE_FLOAT32_C(  600.30), SIMDE_FLOAT32_C( -577.83),
                         SIMDE_FLOAT32_C(  257.89), SIMDE_FLOAT32_C( -759.37)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -169.19), SIMDE_FLOAT32_C( -303.51),
                         SIMDE_FLOAT32_C( -781.26), SIMDE_FLOAT32_C( -923.79),
                         SIMDE_FLOAT32_C(  558.62), SIMDE_FLOAT32_C( -481.19),
                         SIMDE_FLOAT32_C( -482.20), SIMDE_FLOAT32_C(  693.30)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  436.77), SIMDE_FLOAT32_C(  265.17),
                         SIMDE_FLOAT32_C( -598.85), SIMDE_FLOAT32_C( -424.56),
                         SIMDE_FLOAT32_C(  -24.79), SIMDE_FLOAT32_C( -558.99),
                         SIMDE_FLOAT32_C( -299.03), SIMDE_FLOAT32_C( -367.92)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  434.09), SIMDE_FLOAT32_C(   46.59),
                         SIMDE_FLOAT32_C(   85.98), SIMDE_FLOAT32_C( -164.97),
                         SIMDE_FLOAT32_C(   72.68), SIMDE_FLOAT32_C( -140.26),
                         SIMDE_FLOAT32_C(  458.69), SIMDE_FLOAT32_C(  804.02)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  696.57), SIMDE_FLOAT32_C(  799.50),
                         SIMDE_FLOAT32_C(  216.00), SIMDE_FLOAT32_C(  812.94),
                         SIMDE_FLOAT32_C(  321.91), SIMDE_FLOAT32_C(  497.67),
                         SIMDE_FLOAT32_C( -321.87), SIMDE_FLOAT32_C(  -96.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  436.77), SIMDE_FLOAT32_C(  265.17),
                         SIMDE_FLOAT32_C( -598.85), SIMDE_FLOAT32_C( -424.56),
                         SIMDE_FLOAT32_C(  -24.79), SIMDE_FLOAT32_C( -558.99),
                         SIMDE_FLOAT32_C(  458.69), SIMDE_FLOAT32_C(  804.02)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  593.15), SIMDE_FLOAT32_C(  822.85),
                         SIMDE_FLOAT32_C( -843.43), SIMDE_FLOAT32_C( -486.43),
                         SIMDE_FLOAT32_C(  259.42), SIMDE_FLOAT32_C( -708.30),
                         SIMDE_FLOAT32_C( -398.61), SIMDE_FLOAT32_C(  689.88)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.96), SIMDE_FLOAT32_C(  346.21),
                         SIMDE_FLOAT32_C( -865.06), SIMDE_FLOAT32_C(  330.41),
                         SIMDE_FLOAT32_C(  355.72), SIMDE_FLOAT32_C( -380.53),
                         SIMDE_FLOAT32_C(  702.28), SIMDE_FLOAT32_C(    6.18)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -441.80), SIMDE_FLOAT32_C(  453.07),
                         SIMDE_FLOAT32_C( -312.81), SIMDE_FLOAT32_C(  655.80),
                         SIMDE_FLOAT32_C( -443.61), SIMDE_FLOAT32_C( -292.20),
                         SIMDE_FLOAT32_C( -429.77), SIMDE_FLOAT32_C(  815.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.96), SIMDE_FLOAT32_C(  822.85),
                         SIMDE_FLOAT32_C( -865.06), SIMDE_FLOAT32_C( -486.43),
                         SIMDE_FLOAT32_C(  355.72), SIMDE_FLOAT32_C( -380.53),
                         SIMDE_FLOAT32_C(  702.28), SIMDE_FLOAT32_C(  689.88)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  627.12), SIMDE_FLOAT32_C( -903.61),
                         SIMDE_FLOAT32_C(  802.66), SIMDE_FLOAT32_C(  393.79),
                         SIMDE_FLOAT32_C( -634.24), SIMDE_FLOAT32_C(  782.19),
                         SIMDE_FLOAT32_C(  964.94), SIMDE_FLOAT32_C( -554.43)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -249.23), SIMDE_FLOAT32_C( -358.11),
                         SIMDE_FLOAT32_C(   69.71), SIMDE_FLOAT32_C(  774.95),
                         SIMDE_FLOAT32_C( -447.84), SIMDE_FLOAT32_C( -947.94),
                         SIMDE_FLOAT32_C( -908.85), SIMDE_FLOAT32_C(  -21.47)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  241.23), SIMDE_FLOAT32_C(   27.04),
                         SIMDE_FLOAT32_C(  341.47), SIMDE_FLOAT32_C(  482.33),
                         SIMDE_FLOAT32_C(  411.77), SIMDE_FLOAT32_C( -282.69),
                         SIMDE_FLOAT32_C(  915.57), SIMDE_FLOAT32_C( -213.96)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  627.12), SIMDE_FLOAT32_C( -903.61),
                         SIMDE_FLOAT32_C(  802.66), SIMDE_FLOAT32_C(  393.79),
                         SIMDE_FLOAT32_C( -634.24), SIMDE_FLOAT32_C( -947.94),
                         SIMDE_FLOAT32_C(  964.94), SIMDE_FLOAT32_C(  -21.47)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  365.56), SIMDE_FLOAT32_C(  297.70),
                         SIMDE_FLOAT32_C( -723.56), SIMDE_FLOAT32_C(  -52.07),
                         SIMDE_FLOAT32_C(  692.93), SIMDE_FLOAT32_C( -882.05),
                         SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( -366.57)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  744.52), SIMDE_FLOAT32_C(  387.36),
                         SIMDE_FLOAT32_C( -311.40), SIMDE_FLOAT32_C( -280.68),
                         SIMDE_FLOAT32_C(  556.91), SIMDE_FLOAT32_C(  703.77),
                         SIMDE_FLOAT32_C( -828.92), SIMDE_FLOAT32_C(  893.13)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -152.86), SIMDE_FLOAT32_C(  793.25),
                         SIMDE_FLOAT32_C(  667.69), SIMDE_FLOAT32_C(  940.95),
                         SIMDE_FLOAT32_C( -273.21), SIMDE_FLOAT32_C(  148.61),
                         SIMDE_FLOAT32_C(  420.18), SIMDE_FLOAT32_C(    4.30)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  744.52), SIMDE_FLOAT32_C(  297.70),
                         SIMDE_FLOAT32_C( -723.56), SIMDE_FLOAT32_C(  -52.07),
                         SIMDE_FLOAT32_C(  556.91), SIMDE_FLOAT32_C( -882.05),
                         SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( -366.57)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  806.04), SIMDE_FLOAT32_C(  998.56),
                         SIMDE_FLOAT32_C(  954.81), SIMDE_FLOAT32_C( -105.93),
                         SIMDE_FLOAT32_C(  810.39), SIMDE_FLOAT32_C( -451.40),
                         SIMDE_FLOAT32_C( -991.41), SIMDE_FLOAT32_C(   24.70)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   69.57), SIMDE_FLOAT32_C(  -71.31),
                         SIMDE_FLOAT32_C( -379.77), SIMDE_FLOAT32_C( -507.58),
                         SIMDE_FLOAT32_C( -931.37), SIMDE_FLOAT32_C( -271.48),
                         SIMDE_FLOAT32_C(  709.92), SIMDE_FLOAT32_C( -442.85)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -608.56), SIMDE_FLOAT32_C( -319.60),
                         SIMDE_FLOAT32_C( -930.98), SIMDE_FLOAT32_C( -628.59),
                         SIMDE_FLOAT32_C(  898.10), SIMDE_FLOAT32_C( -782.18),
                         SIMDE_FLOAT32_C( -846.42), SIMDE_FLOAT32_C(  513.23)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   69.57), SIMDE_FLOAT32_C(  -71.31),
                         SIMDE_FLOAT32_C( -379.77), SIMDE_FLOAT32_C( -507.58),
                         SIMDE_FLOAT32_C(  810.39), SIMDE_FLOAT32_C( -271.48),
                         SIMDE_FLOAT32_C(  709.92), SIMDE_FLOAT32_C(   24.70)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -598.65), SIMDE_FLOAT32_C( -864.46),
                         SIMDE_FLOAT32_C( -182.16), SIMDE_FLOAT32_C(  855.39),
                         SIMDE_FLOAT32_C( -689.66), SIMDE_FLOAT32_C(  116.31),
                         SIMDE_FLOAT32_C( -552.00), SIMDE_FLOAT32_C(  962.19)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    2.01), SIMDE_FLOAT32_C(  601.06),
                         SIMDE_FLOAT32_C(  276.21), SIMDE_FLOAT32_C( -129.29),
                         SIMDE_FLOAT32_C( -199.59), SIMDE_FLOAT32_C( -345.44),
                         SIMDE_FLOAT32_C( -185.67), SIMDE_FLOAT32_C(  900.58)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  857.48), SIMDE_FLOAT32_C( -480.62),
                         SIMDE_FLOAT32_C( -406.99), SIMDE_FLOAT32_C( -422.80),
                         SIMDE_FLOAT32_C(    1.49), SIMDE_FLOAT32_C(  102.14),
                         SIMDE_FLOAT32_C(  113.98), SIMDE_FLOAT32_C( -405.64)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -598.65), SIMDE_FLOAT32_C(  601.06),
                         SIMDE_FLOAT32_C(  276.21), SIMDE_FLOAT32_C( -129.29),
                         SIMDE_FLOAT32_C( -689.66), SIMDE_FLOAT32_C(  116.31),
                         SIMDE_FLOAT32_C( -552.00), SIMDE_FLOAT32_C(  900.58)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  890.83), SIMDE_FLOAT32_C(  201.56),
                         SIMDE_FLOAT32_C(  104.24), SIMDE_FLOAT32_C(  496.38),
                         SIMDE_FLOAT32_C(  607.57), SIMDE_FLOAT32_C(  285.01),
                         SIMDE_FLOAT32_C(  501.29), SIMDE_FLOAT32_C( -590.78)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -643.60), SIMDE_FLOAT32_C(  256.83),
                         SIMDE_FLOAT32_C(  140.60), SIMDE_FLOAT32_C(  204.90),
                         SIMDE_FLOAT32_C(  371.07), SIMDE_FLOAT32_C( -744.00),
                         SIMDE_FLOAT32_C(  751.14), SIMDE_FLOAT32_C(  233.26)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -384.25), SIMDE_FLOAT32_C( -588.66),
                         SIMDE_FLOAT32_C( -531.19), SIMDE_FLOAT32_C( -520.11),
                         SIMDE_FLOAT32_C(  648.35), SIMDE_FLOAT32_C( -584.03),
                         SIMDE_FLOAT32_C(  700.38), SIMDE_FLOAT32_C(   21.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -643.60), SIMDE_FLOAT32_C(  256.83),
                         SIMDE_FLOAT32_C(  140.60), SIMDE_FLOAT32_C(  204.90),
                         SIMDE_FLOAT32_C(  607.57), SIMDE_FLOAT32_C( -744.00),
                         SIMDE_FLOAT32_C(  501.29), SIMDE_FLOAT32_C( -590.78)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_blendv_ps(test_vec[i].a, test_vec[i].b, test_vec[i].mask);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_blendv_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d mask;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -587.29), SIMDE_FLOAT64_C(  745.99),
                         SIMDE_FLOAT64_C(  660.01), SIMDE_FLOAT64_C(  -72.44)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  307.98), SIMDE_FLOAT64_C(  879.25),
                         SIMDE_FLOAT64_C(  340.44), SIMDE_FLOAT64_C( -338.42)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -599.03), SIMDE_FLOAT64_C(  269.37),
                         SIMDE_FLOAT64_C( -940.99), SIMDE_FLOAT64_C( -383.55)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  307.98), SIMDE_FLOAT64_C(  745.99),
                         SIMDE_FLOAT64_C(  340.44), SIMDE_FLOAT64_C( -338.42)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -369.37), SIMDE_FLOAT64_C(  888.66),
                         SIMDE_FLOAT64_C( -159.55), SIMDE_FLOAT64_C( -869.53)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  649.46), SIMDE_FLOAT64_C(  886.19),
                         SIMDE_FLOAT64_C(  926.89), SIMDE_FLOAT64_C( -697.40)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -515.74), SIMDE_FLOAT64_C( -918.64),
                         SIMDE_FLOAT64_C(  131.75), SIMDE_FLOAT64_C(  581.75)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  649.46), SIMDE_FLOAT64_C(  886.19),
                         SIMDE_FLOAT64_C( -159.55), SIMDE_FLOAT64_C( -869.53)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -900.47), SIMDE_FLOAT64_C(  409.14),
                         SIMDE_FLOAT64_C( -799.12), SIMDE_FLOAT64_C( -260.50)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -10.16), SIMDE_FLOAT64_C(  623.74),
                         SIMDE_FLOAT64_C( -915.24), SIMDE_FLOAT64_C( -491.31)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  804.78), SIMDE_FLOAT64_C( -317.20),
                         SIMDE_FLOAT64_C( -335.85), SIMDE_FLOAT64_C( -779.77)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -900.47), SIMDE_FLOAT64_C(  623.74),
                         SIMDE_FLOAT64_C( -915.24), SIMDE_FLOAT64_C( -491.31)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -87.84), SIMDE_FLOAT64_C(  244.36),
                         SIMDE_FLOAT64_C(  -17.33), SIMDE_FLOAT64_C(  496.74)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  176.35), SIMDE_FLOAT64_C(  303.26),
                         SIMDE_FLOAT64_C( -414.16), SIMDE_FLOAT64_C(  -98.44)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -385.65), SIMDE_FLOAT64_C( -192.42),
                         SIMDE_FLOAT64_C(  392.65), SIMDE_FLOAT64_C( -902.76)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  176.35), SIMDE_FLOAT64_C(  303.26),
                         SIMDE_FLOAT64_C(  -17.33), SIMDE_FLOAT64_C(  -98.44)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -793.35), SIMDE_FLOAT64_C(    6.53),
                         SIMDE_FLOAT64_C(  858.41), SIMDE_FLOAT64_C(  175.72)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  376.10), SIMDE_FLOAT64_C( -410.32),
                         SIMDE_FLOAT64_C(  -49.60), SIMDE_FLOAT64_C( -434.06)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -290.24), SIMDE_FLOAT64_C(  223.04),
                         SIMDE_FLOAT64_C(  738.63), SIMDE_FLOAT64_C( -193.43)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  376.10), SIMDE_FLOAT64_C(    6.53),
                         SIMDE_FLOAT64_C(  858.41), SIMDE_FLOAT64_C( -434.06)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -894.04), SIMDE_FLOAT64_C( -968.20),
                         SIMDE_FLOAT64_C(  146.09), SIMDE_FLOAT64_C( -741.36)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -570.76), SIMDE_FLOAT64_C( -340.64),
                         SIMDE_FLOAT64_C(  593.54), SIMDE_FLOAT64_C( -684.62)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  155.79), SIMDE_FLOAT64_C(  975.56),
                         SIMDE_FLOAT64_C(  939.33), SIMDE_FLOAT64_C(  615.78)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -894.04), SIMDE_FLOAT64_C( -968.20),
                         SIMDE_FLOAT64_C(  146.09), SIMDE_FLOAT64_C( -741.36)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -512.44), SIMDE_FLOAT64_C(  657.99),
                         SIMDE_FLOAT64_C(  888.55), SIMDE_FLOAT64_C(  863.80)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  344.43), SIMDE_FLOAT64_C(  994.17),
                         SIMDE_FLOAT64_C( -142.41), SIMDE_FLOAT64_C( -388.31)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -738.41), SIMDE_FLOAT64_C(  935.62),
                         SIMDE_FLOAT64_C( -743.51), SIMDE_FLOAT64_C(  -41.25)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  344.43), SIMDE_FLOAT64_C(  657.99),
                         SIMDE_FLOAT64_C( -142.41), SIMDE_FLOAT64_C( -388.31)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -178.00), SIMDE_FLOAT64_C( -981.39),
                         SIMDE_FLOAT64_C( -631.33), SIMDE_FLOAT64_C(  518.52)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  592.14), SIMDE_FLOAT64_C(  -27.22),
                         SIMDE_FLOAT64_C(  736.38), SIMDE_FLOAT64_C(  579.20)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  893.09), SIMDE_FLOAT64_C(  120.96),
                         SIMDE_FLOAT64_C(  910.10), SIMDE_FLOAT64_C( -128.61)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -178.00), SIMDE_FLOAT64_C( -981.39),
                         SIMDE_FLOAT64_C( -631.33), SIMDE_FLOAT64_C(  579.20)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_blendv_pd(test_vec[i].a, test_vec[i].b, test_vec[i].mask);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_broadcast_ps (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float32 a[8];
    const simde_float32 r[8];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(   430.96), SIMDE_FLOAT32_C(   742.21), SIMDE_FLOAT32_C(    80.10), SIMDE_FLOAT32_C(  -808.38) },
      { SIMDE_FLOAT32_C(   430.96), SIMDE_FLOAT32_C(   742.21), SIMDE_FLOAT32_C(    80.10), SIMDE_FLOAT32_C(  -808.38),
        SIMDE_FLOAT32_C(   430.96), SIMDE_FLOAT32_C(   742.21), SIMDE_FLOAT32_C(    80.10), SIMDE_FLOAT32_C(  -808.38) } },
    { { SIMDE_FLOAT32_C(   -52.11), SIMDE_FLOAT32_C(  -124.82), SIMDE_FLOAT32_C(   257.00), SIMDE_FLOAT32_C(   583.90) },
      { SIMDE_FLOAT32_C(   -52.11), SIMDE_FLOAT32_C(  -124.82), SIMDE_FLOAT32_C(   257.00), SIMDE_FLOAT32_C(   583.90),
        SIMDE_FLOAT32_C(   -52.11), SIMDE_FLOAT32_C(  -124.82), SIMDE_FLOAT32_C(   257.00), SIMDE_FLOAT32_C(   583.90) } },
    { { SIMDE_FLOAT32_C(  -353.01), SIMDE_FLOAT32_C(   791.63), SIMDE_FLOAT32_C(   304.14), SIMDE_FLOAT32_C(  -431.18) },
      { SIMDE_FLOAT32_C(  -353.01), SIMDE_FLOAT32_C(   791.63), SIMDE_FLOAT32_C(   304.14), SIMDE_FLOAT32_C(  -431.18),
        SIMDE_FLOAT32_C(  -353.01), SIMDE_FLOAT32_C(   791.63), SIMDE_FLOAT32_C(   304.14), SIMDE_FLOAT32_C(  -431.18) } },
    { { SIMDE_FLOAT32_C(   839.02), SIMDE_FLOAT32_C(  -532.53), SIMDE_FLOAT32_C(    12.86), SIMDE_FLOAT32_C(  -518.51) },
      { SIMDE_FLOAT32_C(   839.02), SIMDE_FLOAT32_C(  -532.53), SIMDE_FLOAT32_C(    12.86), SIMDE_FLOAT32_C(  -518.51),
        SIMDE_FLOAT32_C(   839.02), SIMDE_FLOAT32_C(  -532.53), SIMDE_FLOAT32_C(    12.86), SIMDE_FLOAT32_C(  -518.51) } },
    { { SIMDE_FLOAT32_C(   815.19), SIMDE_FLOAT32_C(   919.67), SIMDE_FLOAT32_C(  -404.62), SIMDE_FLOAT32_C(  -140.25) },
      { SIMDE_FLOAT32_C(   815.19), SIMDE_FLOAT32_C(   919.67), SIMDE_FLOAT32_C(  -404.62), SIMDE_FLOAT32_C(  -140.25),
        SIMDE_FLOAT32_C(   815.19), SIMDE_FLOAT32_C(   919.67), SIMDE_FLOAT32_C(  -404.62), SIMDE_FLOAT32_C(  -140.25) } },
    { { SIMDE_FLOAT32_C(   487.65), SIMDE_FLOAT32_C(  -341.39), SIMDE_FLOAT32_C(  -448.94), SIMDE_FLOAT32_C(  -588.75) },
      { SIMDE_FLOAT32_C(   487.65), SIMDE_FLOAT32_C(  -341.39), SIMDE_FLOAT32_C(  -448.94), SIMDE_FLOAT32_C(  -588.75),
        SIMDE_FLOAT32_C(   487.65), SIMDE_FLOAT32_C(  -341.39), SIMDE_FLOAT32_C(  -448.94), SIMDE_FLOAT32_C(  -588.75) } },
    { { SIMDE_FLOAT32_C(    22.82), SIMDE_FLOAT32_C(  -871.28), SIMDE_FLOAT32_C(   241.67), SIMDE_FLOAT32_C(   474.50) },
      { SIMDE_FLOAT32_C(    22.82), SIMDE_FLOAT32_C(  -871.28), SIMDE_FLOAT32_C(   241.67), SIMDE_FLOAT32_C(   474.50),
        SIMDE_FLOAT32_C(    22.82), SIMDE_FLOAT32_C(  -871.28), SIMDE_FLOAT32_C(   241.67), SIMDE_FLOAT32_C(   474.50) } },
    { { SIMDE_FLOAT32_C(  -594.42), SIMDE_FLOAT32_C(  -935.66), SIMDE_FLOAT32_C(  -297.52), SIMDE_FLOAT32_C(   836.54) },
      { SIMDE_FLOAT32_C(  -594.42), SIMDE_FLOAT32_C(  -935.66), SIMDE_FLOAT32_C(  -297.52), SIMDE_FLOAT32_C(   836.54),
        SIMDE_FLOAT32_C(  -594.42), SIMDE_FLOAT32_C(  -935.66), SIMDE_FLOAT32_C(  -297.52), SIMDE_FLOAT32_C(   836.54) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m128 const* a_ = SIMDE_ALIGN_CAST(simde__m128 const*, test_vec[i].a);
    simde__m256 r = simde_mm256_broadcast_ps(a_);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_broadcast_pd (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float64 a[2];
    const simde_float64 r[4];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(  -644.23), SIMDE_FLOAT64_C(   202.59) },
      { SIMDE_FLOAT64_C(  -644.23), SIMDE_FLOAT64_C(   202.59), SIMDE_FLOAT64_C(  -644.23), SIMDE_FLOAT64_C(   202.59) } },
    { { SIMDE_FLOAT64_C(    46.64), SIMDE_FLOAT64_C(  -251.29) },
      { SIMDE_FLOAT64_C(    46.64), SIMDE_FLOAT64_C(  -251.29), SIMDE_FLOAT64_C(    46.64), SIMDE_FLOAT64_C(  -251.29) } },
    { { SIMDE_FLOAT64_C(  -358.76), SIMDE_FLOAT64_C(  -455.49) },
      { SIMDE_FLOAT64_C(  -358.76), SIMDE_FLOAT64_C(  -455.49), SIMDE_FLOAT64_C(  -358.76), SIMDE_FLOAT64_C(  -455.49) } },
    { { SIMDE_FLOAT64_C(   621.72), SIMDE_FLOAT64_C(  -738.99) },
      { SIMDE_FLOAT64_C(   621.72), SIMDE_FLOAT64_C(  -738.99), SIMDE_FLOAT64_C(   621.72), SIMDE_FLOAT64_C(  -738.99) } },
    { { SIMDE_FLOAT64_C(   354.14), SIMDE_FLOAT64_C(  -365.61) },
      { SIMDE_FLOAT64_C(   354.14), SIMDE_FLOAT64_C(  -365.61), SIMDE_FLOAT64_C(   354.14), SIMDE_FLOAT64_C(  -365.61) } },
    { { SIMDE_FLOAT64_C(     5.77), SIMDE_FLOAT64_C(  -199.92) },
      { SIMDE_FLOAT64_C(     5.77), SIMDE_FLOAT64_C(  -199.92), SIMDE_FLOAT64_C(     5.77), SIMDE_FLOAT64_C(  -199.92) } },
    { { SIMDE_FLOAT64_C(   814.08), SIMDE_FLOAT64_C(  -186.66) },
      { SIMDE_FLOAT64_C(   814.08), SIMDE_FLOAT64_C(  -186.66), SIMDE_FLOAT64_C(   814.08), SIMDE_FLOAT64_C(  -186.66) } },
    { { SIMDE_FLOAT64_C(  -805.67), SIMDE_FLOAT64_C(  -248.07) },
      { SIMDE_FLOAT64_C(  -805.67), SIMDE_FLOAT64_C(  -248.07), SIMDE_FLOAT64_C(  -805.67), SIMDE_FLOAT64_C(  -248.07) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256d r = simde_mm256_broadcast_pd(SIMDE_ALIGN_CAST(const simde__m128d*, test_vec[i].a));
    simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

static int
test_simde_mm256_broadcast_sd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float64 a;
    simde__m256d r;
  } test_vec[8] = {
    { SIMDE_FLOAT64_C(  800.84),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  800.84), SIMDE_FLOAT64_C(  800.84),
                         SIMDE_FLOAT64_C(  800.84), SIMDE_FLOAT64_C(  800.84)) },
    { SIMDE_FLOAT64_C(  700.06),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  700.06), SIMDE_FLOAT64_C(  700.06),
                         SIMDE_FLOAT64_C(  700.06), SIMDE_FLOAT64_C(  700.06)) },
    { SIMDE_FLOAT64_C( -801.66),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -801.66), SIMDE_FLOAT64_C( -801.66),
                         SIMDE_FLOAT64_C( -801.66), SIMDE_FLOAT64_C( -801.66)) },
    { SIMDE_FLOAT64_C( -941.38),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -941.38), SIMDE_FLOAT64_C( -941.38),
                         SIMDE_FLOAT64_C( -941.38), SIMDE_FLOAT64_C( -941.38)) },
    { SIMDE_FLOAT64_C( -346.77),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -346.77), SIMDE_FLOAT64_C( -346.77),
                         SIMDE_FLOAT64_C( -346.77), SIMDE_FLOAT64_C( -346.77)) },
    { SIMDE_FLOAT64_C( -833.73),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -833.73), SIMDE_FLOAT64_C( -833.73),
                         SIMDE_FLOAT64_C( -833.73), SIMDE_FLOAT64_C( -833.73)) },
    { SIMDE_FLOAT64_C(  315.88),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  315.88), SIMDE_FLOAT64_C(  315.88),
                         SIMDE_FLOAT64_C(  315.88), SIMDE_FLOAT64_C(  315.88)) },
    { SIMDE_FLOAT64_C( -868.73),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -868.73), SIMDE_FLOAT64_C( -868.73),
                         SIMDE_FLOAT64_C( -868.73), SIMDE_FLOAT64_C( -868.73)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_broadcast_sd(&(test_vec[i].a));
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm_broadcast_ss(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float32 a;
    simde__m128 r;
  } test_vec[8] = {
    { SIMDE_FLOAT32_C(  137.82),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  137.82), SIMDE_FLOAT32_C(  137.82), SIMDE_FLOAT32_C(  137.82), SIMDE_FLOAT32_C(  137.82)) },
    { SIMDE_FLOAT32_C( -118.58),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -118.58), SIMDE_FLOAT32_C( -118.58), SIMDE_FLOAT32_C( -118.58), SIMDE_FLOAT32_C( -118.58)) },
    { SIMDE_FLOAT32_C(  963.02),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  963.02), SIMDE_FLOAT32_C(  963.02), SIMDE_FLOAT32_C(  963.02), SIMDE_FLOAT32_C(  963.02)) },
    { SIMDE_FLOAT32_C(  515.85),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  515.85), SIMDE_FLOAT32_C(  515.85), SIMDE_FLOAT32_C(  515.85), SIMDE_FLOAT32_C(  515.85)) },
    { SIMDE_FLOAT32_C(  110.78),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  110.78), SIMDE_FLOAT32_C(  110.78), SIMDE_FLOAT32_C(  110.78), SIMDE_FLOAT32_C(  110.78)) },
    { SIMDE_FLOAT32_C( -190.98),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -190.98), SIMDE_FLOAT32_C( -190.98), SIMDE_FLOAT32_C( -190.98), SIMDE_FLOAT32_C( -190.98)) },
    { SIMDE_FLOAT32_C( -429.63),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -429.63), SIMDE_FLOAT32_C( -429.63), SIMDE_FLOAT32_C( -429.63), SIMDE_FLOAT32_C( -429.63)) },
    { SIMDE_FLOAT32_C( -924.63),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -924.63), SIMDE_FLOAT32_C( -924.63), SIMDE_FLOAT32_C( -924.63), SIMDE_FLOAT32_C( -924.63)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128 r = simde_mm_broadcast_ss(&(test_vec[i].a));
    simde_assert_m128_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_broadcast_ss(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float32 a;
    simde__m256 r;
  } test_vec[8] = {
    { SIMDE_FLOAT32_C( -970.00),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00),
                         SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00),
                         SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00),
                         SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00)) },
    { SIMDE_FLOAT32_C(  425.08),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  425.08), SIMDE_FLOAT32_C(  425.08),
                         SIMDE_FLOAT32_C(  425.08), SIMDE_FLOAT32_C(  425.08),
                         SIMDE_FLOAT32_C(  425.08), SIMDE_FLOAT32_C(  425.08),
                         SIMDE_FLOAT32_C(  425.08), SIMDE_FLOAT32_C(  425.08)) },
    { SIMDE_FLOAT32_C(  814.32),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  814.32), SIMDE_FLOAT32_C(  814.32),
                         SIMDE_FLOAT32_C(  814.32), SIMDE_FLOAT32_C(  814.32),
                         SIMDE_FLOAT32_C(  814.32), SIMDE_FLOAT32_C(  814.32),
                         SIMDE_FLOAT32_C(  814.32), SIMDE_FLOAT32_C(  814.32)) },
    { SIMDE_FLOAT32_C(  309.83),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  309.83), SIMDE_FLOAT32_C(  309.83),
                         SIMDE_FLOAT32_C(  309.83), SIMDE_FLOAT32_C(  309.83),
                         SIMDE_FLOAT32_C(  309.83), SIMDE_FLOAT32_C(  309.83),
                         SIMDE_FLOAT32_C(  309.83), SIMDE_FLOAT32_C(  309.83)) },
    { SIMDE_FLOAT32_C( -410.17),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17),
                         SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17),
                         SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17),
                         SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17)) },
    { SIMDE_FLOAT32_C( -592.37),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37),
                         SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37),
                         SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37),
                         SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37)) },
    { SIMDE_FLOAT32_C(  297.30),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  297.30), SIMDE_FLOAT32_C(  297.30),
                         SIMDE_FLOAT32_C(  297.30), SIMDE_FLOAT32_C(  297.30),
                         SIMDE_FLOAT32_C(  297.30), SIMDE_FLOAT32_C(  297.30),
                         SIMDE_FLOAT32_C(  297.30), SIMDE_FLOAT32_C(  297.30)) },
    { SIMDE_FLOAT32_C( -549.85),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85),
                         SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85),
                         SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85),
                         SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_broadcast_ss(&(test_vec[i].a));
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_castpd128_pd256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128d a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -698.37), SIMDE_FLOAT64_C(  516.77)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C( -698.37), SIMDE_FLOAT64_C(  516.77)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -509.42), SIMDE_FLOAT64_C( -285.35)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C( -509.42), SIMDE_FLOAT64_C( -285.35)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -919.57), SIMDE_FLOAT64_C(  938.94)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C( -919.57), SIMDE_FLOAT64_C(  938.94)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(   39.71), SIMDE_FLOAT64_C(  227.66)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(   39.71), SIMDE_FLOAT64_C(  227.66)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  548.32), SIMDE_FLOAT64_C( -120.08)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(  548.32), SIMDE_FLOAT64_C( -120.08)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.85), SIMDE_FLOAT64_C(  234.42)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C( -962.85), SIMDE_FLOAT64_C(  234.42)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.16), SIMDE_FLOAT64_C( -985.25)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C( -939.16), SIMDE_FLOAT64_C( -985.25)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  302.61), SIMDE_FLOAT64_C(  350.72)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(  302.61), SIMDE_FLOAT64_C(  350.72)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d_private r = simde__m256d_to_private(simde_mm256_castpd128_pd256(test_vec[i].a));
    simde__m256d_private expected = simde__m256d_to_private(test_vec[i].r);
    simde_assert_m128d_equal(r.m128d[0], expected.m128d[0]);
  }

  return 0;
}

static int
test_simde_mm256_castpd256_pd128(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m128d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -956.85), SIMDE_FLOAT64_C(  625.41),
                         SIMDE_FLOAT64_C(  728.85), SIMDE_FLOAT64_C(  239.74)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  728.85), SIMDE_FLOAT64_C(  239.74)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -642.31), SIMDE_FLOAT64_C( -953.04),
                         SIMDE_FLOAT64_C( -288.66), SIMDE_FLOAT64_C(  999.01)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -288.66), SIMDE_FLOAT64_C(  999.01)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -330.74), SIMDE_FLOAT64_C(  875.72),
                         SIMDE_FLOAT64_C( -137.28), SIMDE_FLOAT64_C( -787.08)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -137.28), SIMDE_FLOAT64_C( -787.08)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -616.04), SIMDE_FLOAT64_C( -762.33),
                         SIMDE_FLOAT64_C(  806.25), SIMDE_FLOAT64_C( -621.65)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  806.25), SIMDE_FLOAT64_C( -621.65)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -28.93), SIMDE_FLOAT64_C(  468.91),
                         SIMDE_FLOAT64_C(  242.39), SIMDE_FLOAT64_C(   -4.32)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  242.39), SIMDE_FLOAT64_C(   -4.32)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -635.10), SIMDE_FLOAT64_C( -479.80),
                         SIMDE_FLOAT64_C(  479.34), SIMDE_FLOAT64_C(  994.78)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  479.34), SIMDE_FLOAT64_C(  994.78)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  839.27), SIMDE_FLOAT64_C( -846.55),
                         SIMDE_FLOAT64_C( -287.23), SIMDE_FLOAT64_C(  498.33)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -287.23), SIMDE_FLOAT64_C(  498.33)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -68.67), SIMDE_FLOAT64_C(  956.25),
                         SIMDE_FLOAT64_C(  462.89), SIMDE_FLOAT64_C( -555.47)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  462.89), SIMDE_FLOAT64_C( -555.47)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128d r = simde_mm256_castpd256_pd128(test_vec[i].a);
    simde_assert_m128i_equal(simde_mm_castpd_si128(r), simde_mm_castpd_si128(test_vec[i].r));
  }

  return 0;
}

static int
test_simde_mm256_ceil_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -242.41), SIMDE_FLOAT64_C( -377.59),
                         SIMDE_FLOAT64_C(  787.73), SIMDE_FLOAT64_C(  903.22)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -242.00), SIMDE_FLOAT64_C( -377.00),
                         SIMDE_FLOAT64_C(  788.00), SIMDE_FLOAT64_C(  904.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -851.63), SIMDE_FLOAT64_C( -168.29),
                         SIMDE_FLOAT64_C(  -47.72), SIMDE_FLOAT64_C( -227.89)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -851.00), SIMDE_FLOAT64_C( -168.00),
                         SIMDE_FLOAT64_C(  -47.00), SIMDE_FLOAT64_C( -227.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  922.71), SIMDE_FLOAT64_C( -494.40),
                         SIMDE_FLOAT64_C( -263.96), SIMDE_FLOAT64_C( -353.64)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  923.00), SIMDE_FLOAT64_C( -494.00),
                         SIMDE_FLOAT64_C( -263.00), SIMDE_FLOAT64_C( -353.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -611.84), SIMDE_FLOAT64_C(  512.63),
                         SIMDE_FLOAT64_C( -238.35), SIMDE_FLOAT64_C( -170.16)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -611.00), SIMDE_FLOAT64_C(  513.00),
                         SIMDE_FLOAT64_C( -238.00), SIMDE_FLOAT64_C( -170.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  816.43), SIMDE_FLOAT64_C(  815.17),
                         SIMDE_FLOAT64_C(  214.52), SIMDE_FLOAT64_C( -660.09)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  817.00), SIMDE_FLOAT64_C(  816.00),
                         SIMDE_FLOAT64_C(  215.00), SIMDE_FLOAT64_C( -660.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  434.90), SIMDE_FLOAT64_C(   54.02),
                         SIMDE_FLOAT64_C( -447.58), SIMDE_FLOAT64_C(  766.46)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  435.00), SIMDE_FLOAT64_C(   55.00),
                         SIMDE_FLOAT64_C( -447.00), SIMDE_FLOAT64_C(  767.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  372.78), SIMDE_FLOAT64_C( -135.62),
                         SIMDE_FLOAT64_C(  715.18), SIMDE_FLOAT64_C( -737.69)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  373.00), SIMDE_FLOAT64_C( -135.00),
                         SIMDE_FLOAT64_C(  716.00), SIMDE_FLOAT64_C( -737.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  491.75), SIMDE_FLOAT64_C(  481.14),
                         SIMDE_FLOAT64_C(  571.31), SIMDE_FLOAT64_C(  426.99)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  492.00), SIMDE_FLOAT64_C(  482.00),
                         SIMDE_FLOAT64_C(  572.00), SIMDE_FLOAT64_C(  427.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_ceil_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_ceil_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  719.50), SIMDE_FLOAT32_C(  423.42),
                         SIMDE_FLOAT32_C( -325.80), SIMDE_FLOAT32_C(   -7.65),
                         SIMDE_FLOAT32_C(  549.35), SIMDE_FLOAT32_C(   88.23),
                         SIMDE_FLOAT32_C(  442.11), SIMDE_FLOAT32_C(  103.18)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  720.00), SIMDE_FLOAT32_C(  424.00),
                         SIMDE_FLOAT32_C( -325.00), SIMDE_FLOAT32_C(   -7.00),
                         SIMDE_FLOAT32_C(  550.00), SIMDE_FLOAT32_C(   89.00),
                         SIMDE_FLOAT32_C(  443.00), SIMDE_FLOAT32_C(  104.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  295.26), SIMDE_FLOAT32_C(  174.52),
                         SIMDE_FLOAT32_C(  976.35), SIMDE_FLOAT32_C( -556.97),
                         SIMDE_FLOAT32_C( -188.36), SIMDE_FLOAT32_C( -888.83),
                         SIMDE_FLOAT32_C(  -89.34), SIMDE_FLOAT32_C(  743.04)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  296.00), SIMDE_FLOAT32_C(  175.00),
                         SIMDE_FLOAT32_C(  977.00), SIMDE_FLOAT32_C( -556.00),
                         SIMDE_FLOAT32_C( -188.00), SIMDE_FLOAT32_C( -888.00),
                         SIMDE_FLOAT32_C(  -89.00), SIMDE_FLOAT32_C(  744.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -846.44), SIMDE_FLOAT32_C(  768.02),
                         SIMDE_FLOAT32_C(  217.87), SIMDE_FLOAT32_C(  724.14),
                         SIMDE_FLOAT32_C( -751.28), SIMDE_FLOAT32_C(  377.99),
                         SIMDE_FLOAT32_C( -892.77), SIMDE_FLOAT32_C( -779.41)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -846.00), SIMDE_FLOAT32_C(  769.00),
                         SIMDE_FLOAT32_C(  218.00), SIMDE_FLOAT32_C(  725.00),
                         SIMDE_FLOAT32_C( -751.00), SIMDE_FLOAT32_C(  378.00),
                         SIMDE_FLOAT32_C( -892.00), SIMDE_FLOAT32_C( -779.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -291.21), SIMDE_FLOAT32_C( -433.62),
                         SIMDE_FLOAT32_C(  331.96), SIMDE_FLOAT32_C(   13.15),
                         SIMDE_FLOAT32_C(   -6.69), SIMDE_FLOAT32_C( -467.28),
                         SIMDE_FLOAT32_C( -722.45), SIMDE_FLOAT32_C( -121.36)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -291.00), SIMDE_FLOAT32_C( -433.00),
                         SIMDE_FLOAT32_C(  332.00), SIMDE_FLOAT32_C(   14.00),
                         SIMDE_FLOAT32_C(   -6.00), SIMDE_FLOAT32_C( -467.00),
                         SIMDE_FLOAT32_C( -722.00), SIMDE_FLOAT32_C( -121.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  337.66), SIMDE_FLOAT32_C(  332.63),
                         SIMDE_FLOAT32_C(  164.76), SIMDE_FLOAT32_C(  401.70),
                         SIMDE_FLOAT32_C( -359.22), SIMDE_FLOAT32_C( -704.77),
                         SIMDE_FLOAT32_C(  780.49), SIMDE_FLOAT32_C( -605.11)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  338.00), SIMDE_FLOAT32_C(  333.00),
                         SIMDE_FLOAT32_C(  165.00), SIMDE_FLOAT32_C(  402.00),
                         SIMDE_FLOAT32_C( -359.00), SIMDE_FLOAT32_C( -704.00),
                         SIMDE_FLOAT32_C(  781.00), SIMDE_FLOAT32_C( -605.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -670.88), SIMDE_FLOAT32_C(  812.79),
                         SIMDE_FLOAT32_C( -668.93), SIMDE_FLOAT32_C(  476.98),
                         SIMDE_FLOAT32_C(  590.12), SIMDE_FLOAT32_C(    1.22),
                         SIMDE_FLOAT32_C( -683.68), SIMDE_FLOAT32_C( -789.77)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -670.00), SIMDE_FLOAT32_C(  813.00),
                         SIMDE_FLOAT32_C( -668.00), SIMDE_FLOAT32_C(  477.00),
                         SIMDE_FLOAT32_C(  591.00), SIMDE_FLOAT32_C(    2.00),
                         SIMDE_FLOAT32_C( -683.00), SIMDE_FLOAT32_C( -789.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -872.42), SIMDE_FLOAT32_C(  -77.05),
                         SIMDE_FLOAT32_C( -381.51), SIMDE_FLOAT32_C( -862.58),
                         SIMDE_FLOAT32_C( -846.15), SIMDE_FLOAT32_C( -734.49),
                         SIMDE_FLOAT32_C(  -50.68), SIMDE_FLOAT32_C(  512.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -872.00), SIMDE_FLOAT32_C(  -77.00),
                         SIMDE_FLOAT32_C( -381.00), SIMDE_FLOAT32_C( -862.00),
                         SIMDE_FLOAT32_C( -846.00), SIMDE_FLOAT32_C( -734.00),
                         SIMDE_FLOAT32_C(  -50.00), SIMDE_FLOAT32_C(  513.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -195.22), SIMDE_FLOAT32_C(  -18.78),
                         SIMDE_FLOAT32_C(  479.49), SIMDE_FLOAT32_C(  552.41),
                         SIMDE_FLOAT32_C(  445.93), SIMDE_FLOAT32_C(  -70.46),
                         SIMDE_FLOAT32_C( -477.54), SIMDE_FLOAT32_C(  557.19)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -195.00), SIMDE_FLOAT32_C(  -18.00),
                         SIMDE_FLOAT32_C(  480.00), SIMDE_FLOAT32_C(  553.00),
                         SIMDE_FLOAT32_C(  446.00), SIMDE_FLOAT32_C(  -70.00),
                         SIMDE_FLOAT32_C( -477.00), SIMDE_FLOAT32_C(  558.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_ceil_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

#if !defined(SIMDE_FAST_MATH)
// Could be re-enabled if test cases without NAN arguments or results are added
// But will need to make sure only those NAN-less tests are run in FAST_MATH mode

#define SIMDE_F64_ALL_SET (u64_to_f64(~UINT64_C(0)))

static int
test_simde_mm_cmp_pd (SIMDE_MUNIT_TEST_ARGS) {
#if 1
  static const struct {
    const simde_float64 a[2];
    const simde_float64 b[2];
    const int64_t r[2];
  } test_vec[] = {
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   164.71),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   882.73), SIMDE_FLOAT64_C(  -344.73) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -800.96) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   243.90), SIMDE_FLOAT64_C(  -564.72) },
      { SIMDE_FLOAT64_C(   243.90), SIMDE_FLOAT64_C(  -564.72) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   549.90),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -694.78), SIMDE_FLOAT64_C(  -529.80) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -529.80) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   371.38), SIMDE_FLOAT64_C(   419.05) },
      { SIMDE_FLOAT64_C(   -26.85), SIMDE_FLOAT64_C(   419.05) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   291.86),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -600.70), SIMDE_FLOAT64_C(  -543.43) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -717.97) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -518.93), SIMDE_FLOAT64_C(   604.78) },
      { SIMDE_FLOAT64_C(   -63.79), SIMDE_FLOAT64_C(   604.78) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -796.62),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -619.43), SIMDE_FLOAT64_C(   753.28) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   753.28) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   939.97), SIMDE_FLOAT64_C(   777.74) },
      { SIMDE_FLOAT64_C(   939.97), SIMDE_FLOAT64_C(   777.74) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   269.88),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   -40.49), SIMDE_FLOAT64_C(  -438.25) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   358.81) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   640.83), SIMDE_FLOAT64_C(  -869.84) },
      { SIMDE_FLOAT64_C(   640.83), SIMDE_FLOAT64_C(  -878.10) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   846.86),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   393.99), SIMDE_FLOAT64_C(  -949.75) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -949.75) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -539.66), SIMDE_FLOAT64_C(  -972.99) },
      { SIMDE_FLOAT64_C(  -539.66), SIMDE_FLOAT64_C(  -599.69) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   711.66),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   926.28), SIMDE_FLOAT64_C(   -18.45) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -114.21) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -755.40), SIMDE_FLOAT64_C(  -438.38) },
      { SIMDE_FLOAT64_C(  -108.46), SIMDE_FLOAT64_C(   885.43) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -992.67),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -521.45), SIMDE_FLOAT64_C(   854.19) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   854.19) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   647.10), SIMDE_FLOAT64_C(   707.97) },
      { SIMDE_FLOAT64_C(   647.10), SIMDE_FLOAT64_C(   707.97) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -492.25),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   789.40), SIMDE_FLOAT64_C(  -780.59) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -780.59) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -398.53), SIMDE_FLOAT64_C(  -255.74) },
      { SIMDE_FLOAT64_C(  -398.53), SIMDE_FLOAT64_C(  -153.93) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -268.50),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(    71.15), SIMDE_FLOAT64_C(  -261.18) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   549.71) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   422.25), SIMDE_FLOAT64_C(  -502.55) },
      { SIMDE_FLOAT64_C(   762.12), SIMDE_FLOAT64_C(    69.35) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   176.79),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -760.86), SIMDE_FLOAT64_C(   684.54) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -971.46) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   744.23), SIMDE_FLOAT64_C(   104.92) },
      { SIMDE_FLOAT64_C(   744.23), SIMDE_FLOAT64_C(   104.92) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   191.77),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -429.74), SIMDE_FLOAT64_C(   923.27) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   923.27) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   191.12), SIMDE_FLOAT64_C(   255.10) },
      { SIMDE_FLOAT64_C(   191.12), SIMDE_FLOAT64_C(   255.10) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   682.72),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -571.20), SIMDE_FLOAT64_C(  -140.49) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -140.49) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -303.52), SIMDE_FLOAT64_C(  -551.99) },
      { SIMDE_FLOAT64_C(  -303.52), SIMDE_FLOAT64_C(  -551.99) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -213.59),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -130.74), SIMDE_FLOAT64_C(   978.19) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   978.19) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(    80.92), SIMDE_FLOAT64_C(  -436.46) },
      { SIMDE_FLOAT64_C(    80.92), SIMDE_FLOAT64_C(  -436.46) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -114.58),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   455.52), SIMDE_FLOAT64_C(  -431.86) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -431.86) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -447.74), SIMDE_FLOAT64_C(   971.71) },
      { SIMDE_FLOAT64_C(  -194.02), SIMDE_FLOAT64_C(   248.74) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   689.45),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   359.04), SIMDE_FLOAT64_C(  -524.13) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -524.13) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   667.81), SIMDE_FLOAT64_C(  -644.49) },
      { SIMDE_FLOAT64_C(   641.82), SIMDE_FLOAT64_C(  -251.27) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(    20.77),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -581.01), SIMDE_FLOAT64_C(   906.19) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   906.19) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   758.83), SIMDE_FLOAT64_C(   901.99) },
      { SIMDE_FLOAT64_C(   758.83), SIMDE_FLOAT64_C(  -688.91) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   559.83),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -722.67), SIMDE_FLOAT64_C(   249.29) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   249.29) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   864.66), SIMDE_FLOAT64_C(  -820.79) },
      { SIMDE_FLOAT64_C(   572.75), SIMDE_FLOAT64_C(  -820.79) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -718.81),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -856.64), SIMDE_FLOAT64_C(   301.97) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -437.64) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -563.13), SIMDE_FLOAT64_C(   682.49) },
      { SIMDE_FLOAT64_C(  -563.13), SIMDE_FLOAT64_C(   682.49) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -493.20),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   675.38), SIMDE_FLOAT64_C(  -933.37) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   952.71) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   589.07), SIMDE_FLOAT64_C(    41.08) },
      { SIMDE_FLOAT64_C(   589.07), SIMDE_FLOAT64_C(    41.08) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   -13.81),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -554.21), SIMDE_FLOAT64_C(   267.39) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   267.39) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   151.51), SIMDE_FLOAT64_C(   777.51) },
      { SIMDE_FLOAT64_C(   151.51), SIMDE_FLOAT64_C(   588.38) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   784.09),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   842.80), SIMDE_FLOAT64_C(  -709.11) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   518.19) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   470.90), SIMDE_FLOAT64_C(   673.44) },
      { SIMDE_FLOAT64_C(  -728.10), SIMDE_FLOAT64_C(   673.44) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -486.30),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   161.60), SIMDE_FLOAT64_C(   499.90) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   607.39) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -803.46), SIMDE_FLOAT64_C(   336.63) },
      { SIMDE_FLOAT64_C(  -803.46), SIMDE_FLOAT64_C(   348.05) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   -63.57),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -561.66), SIMDE_FLOAT64_C(  -279.48) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -718.86) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   799.33), SIMDE_FLOAT64_C(   368.93) },
      { SIMDE_FLOAT64_C(   799.33), SIMDE_FLOAT64_C(   270.23) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -669.80),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -422.21), SIMDE_FLOAT64_C(  -156.10) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -156.10) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   346.78), SIMDE_FLOAT64_C(   111.08) },
      { SIMDE_FLOAT64_C(  -719.85), SIMDE_FLOAT64_C(   543.32) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -108.63),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -221.58), SIMDE_FLOAT64_C(   827.80) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   827.80) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   497.90), SIMDE_FLOAT64_C(   559.73) },
      { SIMDE_FLOAT64_C(   875.87), SIMDE_FLOAT64_C(   297.23) },
      { -INT64_C(                   1),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -432.54),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(  -101.22), SIMDE_FLOAT64_C(  -102.34) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -102.34) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   215.96), SIMDE_FLOAT64_C(  -914.64) },
      { SIMDE_FLOAT64_C(   215.96), SIMDE_FLOAT64_C(  -914.64) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -893.94),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   660.33), SIMDE_FLOAT64_C(    -2.57) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(    -2.57) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   655.50), SIMDE_FLOAT64_C(   373.55) },
      { SIMDE_FLOAT64_C(    96.60), SIMDE_FLOAT64_C(   153.39) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(  -549.37),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   223.84), SIMDE_FLOAT64_C(    18.09) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(    18.09) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   599.19), SIMDE_FLOAT64_C(   657.31) },
      { SIMDE_FLOAT64_C(   599.19), SIMDE_FLOAT64_C(   657.31) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   377.88),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -136.97), SIMDE_FLOAT64_C(   483.94) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   483.94) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   -37.90), SIMDE_FLOAT64_C(   306.60) },
      { SIMDE_FLOAT64_C(   131.24), SIMDE_FLOAT64_C(  -382.41) },
      {  INT64_C(                   0), -INT64_C(                   1) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   770.99),             SIMDE_MATH_NAN },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(   200.30), SIMDE_FLOAT64_C(  -778.38) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -778.38) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { { SIMDE_FLOAT64_C(  -453.24), SIMDE_FLOAT64_C(   155.45) },
      { SIMDE_FLOAT64_C(  -130.76), SIMDE_FLOAT64_C(   155.45) },
      {  INT64_C(                   0),  INT64_C(                   0) } },
    { {             SIMDE_MATH_NAN,             SIMDE_MATH_NAN },
      { SIMDE_FLOAT64_C(   -38.91),             SIMDE_MATH_NAN },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(   631.48), SIMDE_FLOAT64_C(  -661.03) },
      {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -505.49) },
      { -INT64_C(                   1), -INT64_C(                   1) } },
    { { SIMDE_FLOAT64_C(    17.87), SIMDE_FLOAT64_C(  -695.71) },
      { SIMDE_FLOAT64_C(    17.87), SIMDE_FLOAT64_C(   979.97) },
      { -INT64_C(                   1), -INT64_C(                   1) } }
  };

  simde__m128d a, b;
  simde__m128i r;

  a = simde_mm_loadu_pd(test_vec[(0 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(0 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(0 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(0 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(0 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(0 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(0 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(0 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(0 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(1 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(1 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(1 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(1 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(1 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(1 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(1 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(1 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(1 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(2 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(2 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(2 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(2 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(2 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(2 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(2 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(2 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(2 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(3 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(3 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(3 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(3 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(3 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(3 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(3 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(3 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(3 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(4 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(4 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(4 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(4 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(4 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(4 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(4 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(4 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(4 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(5 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(5 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(5 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(5 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(5 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(5 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(5 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(5 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(5 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(6 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(6 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(6 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(6 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(6 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(6 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(6 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(6 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(6 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(7 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(7 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(7 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(7 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(7 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(7 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(7 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(7 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(7 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(8 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(8 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(8 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(8 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(8 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(8 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(8 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(8 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(8 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(9 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(9 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(9 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(9 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(9 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(9 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(9 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(9 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(9 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(10 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(10 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(10 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(10 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(10 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(10 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(10 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(10 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(10 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(11 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(11 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(11 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(11 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(11 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(11 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(11 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(11 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(11 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(12 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(12 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(12 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(12 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(12 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(12 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(12 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(12 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(12 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(13 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(13 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(13 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(13 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(13 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(13 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(13 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(13 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(13 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(14 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(14 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(14 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(14 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(14 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(14 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(14 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(14 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(14 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(15 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(15 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(15 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(15 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(15 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(15 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(15 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(15 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(15 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(16 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(16 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(16 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(16 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(16 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(16 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(16 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(16 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(16 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(17 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(17 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(17 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(17 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(17 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(17 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(17 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(17 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(17 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(18 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(18 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(18 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(18 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(18 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(18 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(18 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(18 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(18 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(19 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(19 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(19 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(19 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(19 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(19 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(19 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(19 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(19 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(20 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(20 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(20 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(20 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(20 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(20 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(20 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(20 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(20 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(21 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(21 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(21 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(21 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(21 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(21 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(21 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(21 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(21 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(22 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(22 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(22 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(22 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(22 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(22 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(22 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(22 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(22 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(23 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(23 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(23 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(23 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(23 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(23 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(23 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(23 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(23 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(24 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(24 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(24 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(24 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(24 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(24 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(24 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(24 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(24 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(25 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(25 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(25 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(25 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(25 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(25 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(25 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(25 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(25 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(26 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(26 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(26 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(26 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(26 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(26 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(26 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(26 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(26 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(27 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(27 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(27 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(27 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(27 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(27 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(27 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(27 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(27 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(28 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(28 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(28 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(28 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(28 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(28 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(28 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(28 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(28 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(29 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(29 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(29 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(29 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(29 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(29 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(29 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(29 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(29 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(30 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(30 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(30 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(30 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(30 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(30 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(30 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(30 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(30 * 3) + 2].r));

  a = simde_mm_loadu_pd(test_vec[(31 * 3) + 0].a);
  b = simde_mm_loadu_pd(test_vec[(31 * 3) + 0].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(31 * 3) + 0].r));
  a = simde_mm_loadu_pd(test_vec[(31 * 3) + 1].a);
  b = simde_mm_loadu_pd(test_vec[(31 * 3) + 1].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(31 * 3) + 1].r));
  a = simde_mm_loadu_pd(test_vec[(31 * 3) + 2].a);
  b = simde_mm_loadu_pd(test_vec[(31 * 3) + 2].b);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[(31 * 3) + 2].r));

  return 0;
#else
  fputc('\n', stdout);

  const simde__m128d nanv = simde_mm_set1_pd(SIMDE_MATH_NAN);

  simde__m128d a, b;
  simde__m128i r;

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = nanv;
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blend_pd(b, nanv, 2);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  b = simde_mm_blend_pd(b, nanv, 1);
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
  b = simde_mm_blendv_pd(a, b, simde_mm_cmplt_pd(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
  r = simde_mm_castpd_si128(simde_mm_cmp_pd(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST);

  return 1;
#endif
}

static int
test_simde_mm_cmp_ps (SIMDE_MUNIT_TEST_ARGS) {
#if 1
  static const struct {
    const simde_float32 a[4];
    const simde_float32 b[4];
    const int32_t r[4];
  } test_vec[] = {
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -662.94), SIMDE_FLOAT32_C(  -757.48) },
      { SIMDE_FLOAT32_C(  -220.18),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   251.42) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(  -900.58), SIMDE_FLOAT32_C(    81.89), SIMDE_FLOAT32_C(  -267.28), SIMDE_FLOAT32_C(   661.06) },
      { SIMDE_FLOAT32_C(  -653.20), SIMDE_FLOAT32_C(   -78.79), SIMDE_FLOAT32_C(  -892.67), SIMDE_FLOAT32_C(  -584.84) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   297.65), SIMDE_FLOAT32_C(   487.28) },
      { SIMDE_FLOAT32_C(  -798.26),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   -18.44) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(  -401.27), SIMDE_FLOAT32_C(  -655.55), SIMDE_FLOAT32_C(   977.58), SIMDE_FLOAT32_C(  -975.82) },
      { SIMDE_FLOAT32_C(  -401.27), SIMDE_FLOAT32_C(  -655.55), SIMDE_FLOAT32_C(  -718.54), SIMDE_FLOAT32_C(  -975.82) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -933.61), SIMDE_FLOAT32_C(  -889.36) },
      { SIMDE_FLOAT32_C(     3.02),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -222.06) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -577.67), SIMDE_FLOAT32_C(  -111.59), SIMDE_FLOAT32_C(   576.86), SIMDE_FLOAT32_C(  -525.02) },
      { SIMDE_FLOAT32_C(  -577.67), SIMDE_FLOAT32_C(  -111.59), SIMDE_FLOAT32_C(   201.14), SIMDE_FLOAT32_C(  -552.62) },
      { -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   375.61), SIMDE_FLOAT32_C(  -294.32) },
      { SIMDE_FLOAT32_C(   401.26),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   702.59) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(  -331.47), SIMDE_FLOAT32_C(  -343.02), SIMDE_FLOAT32_C(   -84.35), SIMDE_FLOAT32_C(    62.09) },
      { SIMDE_FLOAT32_C(  -201.62), SIMDE_FLOAT32_C(  -343.02), SIMDE_FLOAT32_C(   -84.35), SIMDE_FLOAT32_C(    62.09) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -634.69), SIMDE_FLOAT32_C(   180.58) },
      { SIMDE_FLOAT32_C(  -964.81),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   180.58) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   -31.54), SIMDE_FLOAT32_C(  -460.60), SIMDE_FLOAT32_C(    59.69), SIMDE_FLOAT32_C(  -896.85) },
      { SIMDE_FLOAT32_C(   -31.54), SIMDE_FLOAT32_C(  -460.60), SIMDE_FLOAT32_C(  -759.14), SIMDE_FLOAT32_C(   631.22) },
      {  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   549.00), SIMDE_FLOAT32_C(  -593.33) },
      { SIMDE_FLOAT32_C(   584.95),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   620.14) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   526.79), SIMDE_FLOAT32_C(  -807.91), SIMDE_FLOAT32_C(   693.90), SIMDE_FLOAT32_C(   116.69) },
      { SIMDE_FLOAT32_C(   -40.92), SIMDE_FLOAT32_C(  -807.91), SIMDE_FLOAT32_C(  -526.22), SIMDE_FLOAT32_C(   116.69) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(     9.71), SIMDE_FLOAT32_C(   936.28) },
      { SIMDE_FLOAT32_C(   428.05),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   936.28) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(    58.26), SIMDE_FLOAT32_C(  -503.28), SIMDE_FLOAT32_C(   258.80), SIMDE_FLOAT32_C(   199.80) },
      { SIMDE_FLOAT32_C(   186.64), SIMDE_FLOAT32_C(  -503.28), SIMDE_FLOAT32_C(   258.80), SIMDE_FLOAT32_C(   199.80) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   138.23), SIMDE_FLOAT32_C(   402.97) },
      { SIMDE_FLOAT32_C(   -12.82),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -756.24) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(   494.19), SIMDE_FLOAT32_C(  -555.40), SIMDE_FLOAT32_C(  -366.05), SIMDE_FLOAT32_C(   547.91) },
      { SIMDE_FLOAT32_C(   494.19), SIMDE_FLOAT32_C(  -193.94), SIMDE_FLOAT32_C(   186.57), SIMDE_FLOAT32_C(   316.42) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   202.87), SIMDE_FLOAT32_C(  -243.49) },
      { SIMDE_FLOAT32_C(  -495.11),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -243.49) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -725.65), SIMDE_FLOAT32_C(  -784.49), SIMDE_FLOAT32_C(   418.04), SIMDE_FLOAT32_C(   714.81) },
      { SIMDE_FLOAT32_C(  -993.32), SIMDE_FLOAT32_C(  -784.49), SIMDE_FLOAT32_C(   418.04), SIMDE_FLOAT32_C(   500.87) },
      {  INT32_C(           0), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   617.26), SIMDE_FLOAT32_C(   326.12) },
      { SIMDE_FLOAT32_C(  -319.28),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   326.12) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(  -948.58), SIMDE_FLOAT32_C(   909.32), SIMDE_FLOAT32_C(   141.54), SIMDE_FLOAT32_C(   388.32) },
      { SIMDE_FLOAT32_C(    14.75), SIMDE_FLOAT32_C(   909.32), SIMDE_FLOAT32_C(   141.54), SIMDE_FLOAT32_C(   289.10) },
      { -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -701.36), SIMDE_FLOAT32_C(   621.41) },
      { SIMDE_FLOAT32_C(   578.73),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -172.16) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   672.99), SIMDE_FLOAT32_C(   969.52), SIMDE_FLOAT32_C(   449.08), SIMDE_FLOAT32_C(   739.56) },
      { SIMDE_FLOAT32_C(   672.99), SIMDE_FLOAT32_C(   969.52), SIMDE_FLOAT32_C(  -149.98), SIMDE_FLOAT32_C(    39.36) },
      { -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   604.38), SIMDE_FLOAT32_C(   358.23) },
      { SIMDE_FLOAT32_C(   -29.08),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   358.23) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   802.05), SIMDE_FLOAT32_C(  -906.68), SIMDE_FLOAT32_C(   827.87), SIMDE_FLOAT32_C(   384.81) },
      { SIMDE_FLOAT32_C(   802.05), SIMDE_FLOAT32_C(  -906.68), SIMDE_FLOAT32_C(   827.87), SIMDE_FLOAT32_C(   305.82) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   978.65), SIMDE_FLOAT32_C(   789.10) },
      { SIMDE_FLOAT32_C(  -511.42),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -545.90) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -682.95), SIMDE_FLOAT32_C(   968.33), SIMDE_FLOAT32_C(   -82.82), SIMDE_FLOAT32_C(  -487.42) },
      { SIMDE_FLOAT32_C(  -682.95), SIMDE_FLOAT32_C(   968.33), SIMDE_FLOAT32_C(   -82.82), SIMDE_FLOAT32_C(  -774.97) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -879.80), SIMDE_FLOAT32_C(   273.68) },
      { SIMDE_FLOAT32_C(   322.33),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -155.92) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -402.06), SIMDE_FLOAT32_C(  -262.44), SIMDE_FLOAT32_C(   402.03), SIMDE_FLOAT32_C(   994.54) },
      { SIMDE_FLOAT32_C(  -402.06), SIMDE_FLOAT32_C(  -262.44), SIMDE_FLOAT32_C(   402.03), SIMDE_FLOAT32_C(   994.54) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   830.70), SIMDE_FLOAT32_C(   549.04) },
      { SIMDE_FLOAT32_C(  -870.94),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   549.04) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   649.20), SIMDE_FLOAT32_C(   -61.96), SIMDE_FLOAT32_C(   898.25), SIMDE_FLOAT32_C(  -289.82) },
      { SIMDE_FLOAT32_C(   649.20), SIMDE_FLOAT32_C(   -61.96), SIMDE_FLOAT32_C(   898.25), SIMDE_FLOAT32_C(  -289.82) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -521.30), SIMDE_FLOAT32_C(   457.21) },
      { SIMDE_FLOAT32_C(   363.45),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   730.93) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -665.89), SIMDE_FLOAT32_C(   643.39), SIMDE_FLOAT32_C(  -876.32), SIMDE_FLOAT32_C(    32.78) },
      { SIMDE_FLOAT32_C(  -665.89), SIMDE_FLOAT32_C(   643.39), SIMDE_FLOAT32_C(  -913.05), SIMDE_FLOAT32_C(   776.22) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -355.97), SIMDE_FLOAT32_C(   785.63) },
      { SIMDE_FLOAT32_C(   204.67),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   785.63) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(   734.32), SIMDE_FLOAT32_C(   999.27), SIMDE_FLOAT32_C(   544.76), SIMDE_FLOAT32_C(  -722.45) },
      { SIMDE_FLOAT32_C(  -107.15), SIMDE_FLOAT32_C(   999.27), SIMDE_FLOAT32_C(   -50.17), SIMDE_FLOAT32_C(   226.96) },
      {  INT32_C(           0), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -628.65), SIMDE_FLOAT32_C(   709.03) },
      { SIMDE_FLOAT32_C(   922.25),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -151.49) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   635.67), SIMDE_FLOAT32_C(  -864.89), SIMDE_FLOAT32_C(   686.11), SIMDE_FLOAT32_C(  -913.95) },
      { SIMDE_FLOAT32_C(  -885.24), SIMDE_FLOAT32_C(  -864.89), SIMDE_FLOAT32_C(   899.88), SIMDE_FLOAT32_C(  -913.95) },
      {  INT32_C(           0),  INT32_C(           0), -INT32_C(           1),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -258.42), SIMDE_FLOAT32_C(  -262.52) },
      { SIMDE_FLOAT32_C(  -730.63),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -262.52) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -438.30), SIMDE_FLOAT32_C(  -593.28), SIMDE_FLOAT32_C(  -471.62), SIMDE_FLOAT32_C(   563.96) },
      { SIMDE_FLOAT32_C(  -438.30), SIMDE_FLOAT32_C(  -593.28), SIMDE_FLOAT32_C(  -471.62), SIMDE_FLOAT32_C(   563.96) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   261.48), SIMDE_FLOAT32_C(  -298.71) },
      { SIMDE_FLOAT32_C(  -257.54),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(    11.83) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   958.81), SIMDE_FLOAT32_C(    81.92), SIMDE_FLOAT32_C(  -313.24), SIMDE_FLOAT32_C(   306.80) },
      { SIMDE_FLOAT32_C(  -214.12), SIMDE_FLOAT32_C(    81.92), SIMDE_FLOAT32_C(   -13.33), SIMDE_FLOAT32_C(   347.58) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   862.86), SIMDE_FLOAT32_C(   975.22) },
      { SIMDE_FLOAT32_C(  -860.57),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   975.22) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(  -962.78), SIMDE_FLOAT32_C(   136.15), SIMDE_FLOAT32_C(  -687.08), SIMDE_FLOAT32_C(  -498.21) },
      { SIMDE_FLOAT32_C(  -962.78), SIMDE_FLOAT32_C(   136.15), SIMDE_FLOAT32_C(  -687.08), SIMDE_FLOAT32_C(  -498.21) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   277.62), SIMDE_FLOAT32_C(   145.85) },
      { SIMDE_FLOAT32_C(   860.99),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   145.85) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -179.51), SIMDE_FLOAT32_C(   901.42), SIMDE_FLOAT32_C(   229.97), SIMDE_FLOAT32_C(   379.12) },
      { SIMDE_FLOAT32_C(  -179.51), SIMDE_FLOAT32_C(  -849.80), SIMDE_FLOAT32_C(   229.97), SIMDE_FLOAT32_C(   379.12) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   432.45), SIMDE_FLOAT32_C(  -800.48) },
      { SIMDE_FLOAT32_C(    55.37),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -719.93) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   596.33), SIMDE_FLOAT32_C(   122.73), SIMDE_FLOAT32_C(   537.20), SIMDE_FLOAT32_C(  -532.09) },
      { SIMDE_FLOAT32_C(   596.33), SIMDE_FLOAT32_C(  -949.00), SIMDE_FLOAT32_C(   478.94), SIMDE_FLOAT32_C(   502.89) },
      {  INT32_C(           0), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(    90.71), SIMDE_FLOAT32_C(  -854.55) },
      { SIMDE_FLOAT32_C(  -444.65),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   610.72) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(   749.60), SIMDE_FLOAT32_C(   939.79), SIMDE_FLOAT32_C(  -324.86), SIMDE_FLOAT32_C(  -112.76) },
      { SIMDE_FLOAT32_C(   749.60), SIMDE_FLOAT32_C(   939.79), SIMDE_FLOAT32_C(  -324.86), SIMDE_FLOAT32_C(  -112.76) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   409.07), SIMDE_FLOAT32_C(   426.48) },
      { SIMDE_FLOAT32_C(   474.06),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -541.86) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   200.39), SIMDE_FLOAT32_C(  -870.97), SIMDE_FLOAT32_C(  -980.63), SIMDE_FLOAT32_C(   586.50) },
      { SIMDE_FLOAT32_C(   200.39), SIMDE_FLOAT32_C(   829.61), SIMDE_FLOAT32_C(  -980.63), SIMDE_FLOAT32_C(   586.50) },
      { -INT32_C(           1),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -940.36), SIMDE_FLOAT32_C(   981.56) },
      { SIMDE_FLOAT32_C(    32.67),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   981.56) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   587.88), SIMDE_FLOAT32_C(   862.64), SIMDE_FLOAT32_C(  -581.86), SIMDE_FLOAT32_C(   441.17) },
      { SIMDE_FLOAT32_C(   587.88), SIMDE_FLOAT32_C(   862.64), SIMDE_FLOAT32_C(  -581.86), SIMDE_FLOAT32_C(  -916.84) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(    74.20), SIMDE_FLOAT32_C(   468.19) },
      { SIMDE_FLOAT32_C(  -692.04),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   468.19) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -884.98), SIMDE_FLOAT32_C(   -64.97), SIMDE_FLOAT32_C(   946.97), SIMDE_FLOAT32_C(  -524.01) },
      { SIMDE_FLOAT32_C(  -884.98), SIMDE_FLOAT32_C(   880.17), SIMDE_FLOAT32_C(   946.97), SIMDE_FLOAT32_C(   819.72) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -291.26), SIMDE_FLOAT32_C(    26.62) },
      { SIMDE_FLOAT32_C(   -20.09),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -321.30) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { { SIMDE_FLOAT32_C(   136.45), SIMDE_FLOAT32_C(  -268.56), SIMDE_FLOAT32_C(   403.33), SIMDE_FLOAT32_C(   544.01) },
      { SIMDE_FLOAT32_C(   136.45), SIMDE_FLOAT32_C(  -549.38), SIMDE_FLOAT32_C(   403.33), SIMDE_FLOAT32_C(   544.01) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   437.20), SIMDE_FLOAT32_C(  -445.80) },
      { SIMDE_FLOAT32_C(  -188.61),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   132.62) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -733.08), SIMDE_FLOAT32_C(   423.54), SIMDE_FLOAT32_C(   467.42), SIMDE_FLOAT32_C(  -703.44) },
      { SIMDE_FLOAT32_C(   762.03), SIMDE_FLOAT32_C(   614.31), SIMDE_FLOAT32_C(  -365.84), SIMDE_FLOAT32_C(  -703.44) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   212.03), SIMDE_FLOAT32_C(   873.75) },
      { SIMDE_FLOAT32_C(  -511.90),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   169.34) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(   626.53), SIMDE_FLOAT32_C(   -96.69), SIMDE_FLOAT32_C(   970.66), SIMDE_FLOAT32_C(   435.34) },
      { SIMDE_FLOAT32_C(   626.53), SIMDE_FLOAT32_C(   -96.69), SIMDE_FLOAT32_C(   970.66), SIMDE_FLOAT32_C(   435.34) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -786.07), SIMDE_FLOAT32_C(   651.55) },
      { SIMDE_FLOAT32_C(  -973.04),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -484.94) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(   763.63), SIMDE_FLOAT32_C(   880.05), SIMDE_FLOAT32_C(   610.45), SIMDE_FLOAT32_C(   344.92) },
      { SIMDE_FLOAT32_C(  -191.52), SIMDE_FLOAT32_C(   333.99), SIMDE_FLOAT32_C(   610.45), SIMDE_FLOAT32_C(   344.92) },
      { -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -912.11), SIMDE_FLOAT32_C(   836.93) },
      { SIMDE_FLOAT32_C(  -716.49),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   836.93) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { { SIMDE_FLOAT32_C(  -977.25), SIMDE_FLOAT32_C(  -883.83), SIMDE_FLOAT32_C(   974.68), SIMDE_FLOAT32_C(  -894.09) },
      { SIMDE_FLOAT32_C(  -298.87), SIMDE_FLOAT32_C(  -636.50), SIMDE_FLOAT32_C(   974.68), SIMDE_FLOAT32_C(  -894.09) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } }
  };

  simde__m128 a, b;
  simde__m128i r;

  a = simde_mm_loadu_ps(test_vec[(0 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(0 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(0 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(0 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(0 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(0 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(1 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(1 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(0 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(1 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(1 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(1 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(2 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(2 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(2 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(2 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(2 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(2 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(3 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(3 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(3 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(3 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(3 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(3 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(4 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(4 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(4 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(4 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(4 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(4 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(5 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(5 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(5 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(5 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(5 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(5 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(6 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(6 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(6 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(6 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(6 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(6 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(7 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(7 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(7 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(7 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(7 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(7 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(8 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(8 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(8 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(8 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(8 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(8 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(9 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(9 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(9 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(9 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(9 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(9 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(10 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(10 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(10 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(10 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(10 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(10 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(11 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(11 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(11 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(11 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(11 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(11 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(12 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(12 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(12 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(12 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(12 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(12 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(13 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(13 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(13 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(13 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(13 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(13 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(14 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(14 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(14 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(14 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(14 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(14 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(15 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(15 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(15 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(15 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(15 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(15 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(16 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(16 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(16 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(16 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(16 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(16 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(17 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(17 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(17 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(17 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(17 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(17 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(18 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(18 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(18 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(18 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(18 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(18 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(19 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(19 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(19 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(19 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(19 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(19 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(20 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(20 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(20 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(20 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(20 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(20 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(21 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(21 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(21 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(21 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(21 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(21 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(22 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(22 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(22 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(22 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(22 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(22 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(23 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(23 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(23 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(23 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(23 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(23 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(24 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(24 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(24 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(24 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(24 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(24 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(25 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(25 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(25 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(25 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(25 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(25 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(26 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(26 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(26 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(26 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(26 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(26 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(27 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(27 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(27 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(27 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(27 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(27 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(28 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(28 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(28 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(28 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(28 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(28 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(29 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(29 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(29 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(29 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(29 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(29 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(30 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(30 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(30 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(30 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(30 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(30 * 2) + 1].r));

  a = simde_mm_loadu_ps(test_vec[(31 * 2) + 0].a);
  b = simde_mm_loadu_ps(test_vec[(31 * 2) + 0].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(31 * 2) + 0].r));

  a = simde_mm_loadu_ps(test_vec[(31 * 2) + 1].a);
  b = simde_mm_loadu_ps(test_vec[(31 * 2) + 1].b);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[(31 * 2) + 1].r));

  return 0;
#else
  fputc('\n', stdout);

  const simde__m128 nanv = simde_mm_set1_ps(SIMDE_MATH_NANF);

  simde__m128 a, b;
  simde__m128i r;

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  a = simde_mm_blend_ps(a, nanv, 3);
  b = simde_mm_blend_ps(b, nanv, 6);
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm_blendv_ps(a, b, simde_mm_cmplt_ps(simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
  r = simde_mm_castps_si128(simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST);

  return 1;
#endif
}

static int
test_simde_mm_cmp_sd(SIMDE_MUNIT_TEST_ARGS) {
  simde__m128d a, b, r, e;

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  107.30), SIMDE_FLOAT64_C( -206.83));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  787.17), SIMDE_FLOAT64_C( -721.13));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  107.30), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 0);
  simde_assert_m128d_equal(r, e);

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(   33.46), SIMDE_FLOAT64_C(  248.77));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -730.30), SIMDE_FLOAT64_C(  751.84));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(   33.46), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 1);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  -53.40), SIMDE_FLOAT64_C(   23.60));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -939.31), SIMDE_FLOAT64_C( -627.35));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  -53.40), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 2);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -636.30), SIMDE_FLOAT64_C( -129.84));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  230.46), SIMDE_FLOAT64_C( -440.12));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -636.30), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 3);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  961.48), SIMDE_FLOAT64_C(  556.89));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  374.50), SIMDE_FLOAT64_C(  904.56));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  961.48), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 4);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -605.79), SIMDE_FLOAT64_C( -288.15));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -656.14), SIMDE_FLOAT64_C(  374.06));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -605.79), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 5);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -592.25), SIMDE_FLOAT64_C( -155.18));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -130.80), SIMDE_FLOAT64_C(  432.83));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -592.25), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 6);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -431.55), SIMDE_FLOAT64_C(  636.53));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -452.34), SIMDE_FLOAT64_C( -728.36));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -431.55), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 7);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -183.31), SIMDE_FLOAT64_C( -243.73));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  442.30), SIMDE_FLOAT64_C(  450.60));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -183.31), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 8);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -210.29), SIMDE_FLOAT64_C(  -50.46));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -734.21), SIMDE_FLOAT64_C(  273.75));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -210.29), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 9);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -186.57), SIMDE_FLOAT64_C( -822.86));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -462.18), SIMDE_FLOAT64_C( -425.13));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -186.57), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 10);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  968.62), SIMDE_FLOAT64_C( -745.37));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -464.28), SIMDE_FLOAT64_C(  713.42));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  968.62), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 11);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  671.94), SIMDE_FLOAT64_C( -405.59));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -842.74), SIMDE_FLOAT64_C( -483.02));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  671.94), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 12);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -228.63), SIMDE_FLOAT64_C(  298.91));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -708.14), SIMDE_FLOAT64_C(  189.31));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -228.63), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 13);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -797.77), SIMDE_FLOAT64_C( -286.96));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  288.87), SIMDE_FLOAT64_C(  398.76));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -797.77), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 14);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -248.44), SIMDE_FLOAT64_C(  191.43));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  602.04), SIMDE_FLOAT64_C(  999.35));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -248.44), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 15);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -614.65), SIMDE_FLOAT64_C(  963.28));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -385.61), SIMDE_FLOAT64_C(  770.12));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -614.65), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 16);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -717.70), SIMDE_FLOAT64_C(  750.92));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  618.97), SIMDE_FLOAT64_C(  676.03));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -717.70), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 17);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  561.50), SIMDE_FLOAT64_C(  549.62));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -454.02), SIMDE_FLOAT64_C( -961.18));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  561.50), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 18);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -480.89), SIMDE_FLOAT64_C(  -68.38));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  293.48), SIMDE_FLOAT64_C(  459.12));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -480.89), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 19);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -600.32), SIMDE_FLOAT64_C( -105.54));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -623.63), SIMDE_FLOAT64_C(  235.12));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -600.32), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 20);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  548.84), SIMDE_FLOAT64_C(  411.69));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  824.49), SIMDE_FLOAT64_C( -866.20));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  548.84), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 21);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  201.44), SIMDE_FLOAT64_C(  276.75));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -301.93), SIMDE_FLOAT64_C( -238.56));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  201.44), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 22);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  250.23), SIMDE_FLOAT64_C( -604.81));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  557.49), SIMDE_FLOAT64_C(  137.99));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  250.23), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 23);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -235.15), SIMDE_FLOAT64_C( -121.76));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -394.35), SIMDE_FLOAT64_C(  272.69));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -235.15), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 24);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  119.18), SIMDE_FLOAT64_C(  423.89));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  610.02), SIMDE_FLOAT64_C( -437.27));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  119.18), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 25);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -156.34), SIMDE_FLOAT64_C( -571.13));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -665.54), SIMDE_FLOAT64_C(  -18.98));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -156.34), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 26);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  685.87), SIMDE_FLOAT64_C( -600.86));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -804.08), SIMDE_FLOAT64_C( -631.16));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  685.87), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 27);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  959.19), SIMDE_FLOAT64_C( -478.47));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C( -520.61), SIMDE_FLOAT64_C( -214.50));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  959.19), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 28);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -527.73), SIMDE_FLOAT64_C( -211.69));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  356.74), SIMDE_FLOAT64_C(  955.71));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -527.73), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 29);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C( -426.25), SIMDE_FLOAT64_C( -493.55));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  886.66), SIMDE_FLOAT64_C(  569.52));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C( -426.25), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm_cmp_sd(a, b, 30);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  a = simde_mm_set_pd(SIMDE_FLOAT64_C(  981.35), SIMDE_FLOAT64_C( -586.10));
  b = simde_mm_set_pd(SIMDE_FLOAT64_C(  775.81), SIMDE_FLOAT64_C(  631.37));
  e = simde_mm_set_pd(SIMDE_FLOAT64_C(  981.35), SIMDE_F64_ALL_SET);
  r = simde_mm_cmp_sd(a, b, 31);
  simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e));

  return 0;
}

static int
test_simde_mm_cmp_ss(SIMDE_MUNIT_TEST_ARGS) {
  simde__m128 a, b, r, e;

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(   29.86), SIMDE_FLOAT32_C( -506.56), SIMDE_FLOAT32_C(   52.70), SIMDE_FLOAT32_C( -451.19));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -773.77), SIMDE_FLOAT32_C(  947.93), SIMDE_FLOAT32_C( -234.67), SIMDE_FLOAT32_C( -847.97));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(   29.86), SIMDE_FLOAT32_C( -506.56), SIMDE_FLOAT32_C(   52.70), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 0);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -133.43), SIMDE_FLOAT32_C(  949.13), SIMDE_FLOAT32_C(  326.28), SIMDE_FLOAT32_C(  158.71));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  684.48), SIMDE_FLOAT32_C(  677.57), SIMDE_FLOAT32_C( -960.66), SIMDE_FLOAT32_C( -282.67));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -133.43), SIMDE_FLOAT32_C(  949.13), SIMDE_FLOAT32_C(  326.28), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 1);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  193.40), SIMDE_FLOAT32_C(  779.62), SIMDE_FLOAT32_C( -982.70), SIMDE_FLOAT32_C(  733.89));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  820.93), SIMDE_FLOAT32_C(   29.11), SIMDE_FLOAT32_C( -999.26), SIMDE_FLOAT32_C(   78.74));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  193.40), SIMDE_FLOAT32_C(  779.62), SIMDE_FLOAT32_C( -982.70), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 2);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  253.28), SIMDE_FLOAT32_C(  166.31), SIMDE_FLOAT32_C(  346.10), SIMDE_FLOAT32_C(  502.59));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  807.03), SIMDE_FLOAT32_C(  802.13), SIMDE_FLOAT32_C(  411.74), SIMDE_FLOAT32_C(  596.93));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  253.28), SIMDE_FLOAT32_C(  166.31), SIMDE_FLOAT32_C(  346.10), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 3);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  820.59), SIMDE_FLOAT32_C(  257.56), SIMDE_FLOAT32_C( -468.51), SIMDE_FLOAT32_C( -573.74));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -952.67), SIMDE_FLOAT32_C( -547.39), SIMDE_FLOAT32_C(   82.21), SIMDE_FLOAT32_C(   55.32));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  820.59), SIMDE_FLOAT32_C(  257.56), SIMDE_FLOAT32_C( -468.51), SIMDE_FLOAT32_C( -573.74)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 4);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -856.96), SIMDE_FLOAT32_C(  380.95), SIMDE_FLOAT32_C(  354.16), SIMDE_FLOAT32_C(  933.34));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  937.78), SIMDE_FLOAT32_C( -846.91), SIMDE_FLOAT32_C(  481.96), SIMDE_FLOAT32_C( -401.55));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -856.96), SIMDE_FLOAT32_C(  380.95), SIMDE_FLOAT32_C(  354.16), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 5);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  701.49), SIMDE_FLOAT32_C(  203.70), SIMDE_FLOAT32_C( -473.49), SIMDE_FLOAT32_C(  919.68));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  917.48), SIMDE_FLOAT32_C(  496.48), SIMDE_FLOAT32_C( -380.99), SIMDE_FLOAT32_C( -612.04));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  701.49), SIMDE_FLOAT32_C(  203.70), SIMDE_FLOAT32_C( -473.49), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 6);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -495.27), SIMDE_FLOAT32_C(    2.61), SIMDE_FLOAT32_C(  190.46), SIMDE_FLOAT32_C( -489.47));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  733.40), SIMDE_FLOAT32_C( -262.53), SIMDE_FLOAT32_C( -250.54), SIMDE_FLOAT32_C(  753.56));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -495.27), SIMDE_FLOAT32_C(    2.61), SIMDE_FLOAT32_C(  190.46), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 7);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -295.58), SIMDE_FLOAT32_C(  686.48), SIMDE_FLOAT32_C( -715.68), SIMDE_FLOAT32_C( -678.55));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  -64.78), SIMDE_FLOAT32_C( -994.32), SIMDE_FLOAT32_C(  819.61), SIMDE_FLOAT32_C( -435.68));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -295.58), SIMDE_FLOAT32_C(  686.48), SIMDE_FLOAT32_C( -715.68), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 8);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -446.36), SIMDE_FLOAT32_C( -630.25), SIMDE_FLOAT32_C(  895.61), SIMDE_FLOAT32_C( -359.83));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -712.74), SIMDE_FLOAT32_C( -683.20), SIMDE_FLOAT32_C( -684.03), SIMDE_FLOAT32_C(  476.51));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -446.36), SIMDE_FLOAT32_C( -630.25), SIMDE_FLOAT32_C(  895.61), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 9);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  453.59), SIMDE_FLOAT32_C( -704.68), SIMDE_FLOAT32_C(  968.54), SIMDE_FLOAT32_C(  -73.76));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -793.56), SIMDE_FLOAT32_C( -626.26), SIMDE_FLOAT32_C(  371.33), SIMDE_FLOAT32_C(  945.10));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  453.59), SIMDE_FLOAT32_C( -704.68), SIMDE_FLOAT32_C(  968.54), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 10);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  747.32), SIMDE_FLOAT32_C(  722.71), SIMDE_FLOAT32_C(   10.11), SIMDE_FLOAT32_C( -589.46));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -358.96), SIMDE_FLOAT32_C( -216.71), SIMDE_FLOAT32_C( -367.08), SIMDE_FLOAT32_C(  571.81));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  747.32), SIMDE_FLOAT32_C(  722.71), SIMDE_FLOAT32_C(   10.11), SIMDE_FLOAT32_C( -589.46)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 11);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -393.04), SIMDE_FLOAT32_C( -521.21), SIMDE_FLOAT32_C(  315.85), SIMDE_FLOAT32_C(  101.30));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -254.13), SIMDE_FLOAT32_C(  380.76), SIMDE_FLOAT32_C(  862.95), SIMDE_FLOAT32_C( -994.97));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -393.04), SIMDE_FLOAT32_C( -521.21), SIMDE_FLOAT32_C(  315.85), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 12);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -429.51), SIMDE_FLOAT32_C(  950.04), SIMDE_FLOAT32_C(  770.94), SIMDE_FLOAT32_C( -853.08));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  178.64), SIMDE_FLOAT32_C(  -50.79), SIMDE_FLOAT32_C(  741.69), SIMDE_FLOAT32_C( -786.81));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -429.51), SIMDE_FLOAT32_C(  950.04), SIMDE_FLOAT32_C(  770.94), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 13);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  499.30), SIMDE_FLOAT32_C(  807.95), SIMDE_FLOAT32_C( -410.68), SIMDE_FLOAT32_C(  505.23));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -114.66), SIMDE_FLOAT32_C(  865.01), SIMDE_FLOAT32_C( -665.39), SIMDE_FLOAT32_C(  342.00));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  499.30), SIMDE_FLOAT32_C(  807.95), SIMDE_FLOAT32_C( -410.68), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 14);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  744.11), SIMDE_FLOAT32_C(  103.80), SIMDE_FLOAT32_C(  230.08), SIMDE_FLOAT32_C( -784.93));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -640.30), SIMDE_FLOAT32_C(  690.88), SIMDE_FLOAT32_C( -782.01), SIMDE_FLOAT32_C( -779.01));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  744.11), SIMDE_FLOAT32_C(  103.80), SIMDE_FLOAT32_C(  230.08), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 15);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  937.00), SIMDE_FLOAT32_C( -237.56), SIMDE_FLOAT32_C(  614.04), SIMDE_FLOAT32_C( -552.02));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  636.71), SIMDE_FLOAT32_C(  821.24), SIMDE_FLOAT32_C(  385.34), SIMDE_FLOAT32_C( -655.54));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  937.00), SIMDE_FLOAT32_C( -237.56), SIMDE_FLOAT32_C(  614.04), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 16);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -840.84), SIMDE_FLOAT32_C( -184.65), SIMDE_FLOAT32_C(  856.01), SIMDE_FLOAT32_C(  182.80));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  -54.26), SIMDE_FLOAT32_C(  831.01), SIMDE_FLOAT32_C( -693.60), SIMDE_FLOAT32_C( -149.67));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -840.84), SIMDE_FLOAT32_C( -184.65), SIMDE_FLOAT32_C(  856.01), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 17);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  422.72), SIMDE_FLOAT32_C( -740.98), SIMDE_FLOAT32_C( -971.04), SIMDE_FLOAT32_C(   90.38));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  787.23), SIMDE_FLOAT32_C( -946.13), SIMDE_FLOAT32_C(  562.60), SIMDE_FLOAT32_C(   34.39));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  422.72), SIMDE_FLOAT32_C( -740.98), SIMDE_FLOAT32_C( -971.04), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 18);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  379.97), SIMDE_FLOAT32_C(  252.47), SIMDE_FLOAT32_C(  573.41), SIMDE_FLOAT32_C(  371.64));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(   86.67), SIMDE_FLOAT32_C(  230.06), SIMDE_FLOAT32_C(  816.36), SIMDE_FLOAT32_C( -574.41));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  379.97), SIMDE_FLOAT32_C(  252.47), SIMDE_FLOAT32_C(  573.41), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 19);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( -409.94), SIMDE_FLOAT32_C(  339.05), SIMDE_FLOAT32_C(  567.23));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  548.12), SIMDE_FLOAT32_C(  482.27), SIMDE_FLOAT32_C( -877.01), SIMDE_FLOAT32_C(  105.90));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( -409.94), SIMDE_FLOAT32_C(  339.05), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 20);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  443.93), SIMDE_FLOAT32_C(  879.02), SIMDE_FLOAT32_C(  280.77), SIMDE_FLOAT32_C(  215.63));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  858.68), SIMDE_FLOAT32_C(  507.37), SIMDE_FLOAT32_C(  274.86), SIMDE_FLOAT32_C( -935.31));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  443.93), SIMDE_FLOAT32_C(  879.02), SIMDE_FLOAT32_C(  280.77), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 21);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  417.98), SIMDE_FLOAT32_C(  304.47), SIMDE_FLOAT32_C( -370.04), SIMDE_FLOAT32_C(  620.00));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -491.67), SIMDE_FLOAT32_C(  756.54), SIMDE_FLOAT32_C( -538.71), SIMDE_FLOAT32_C( -880.53));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  417.98), SIMDE_FLOAT32_C(  304.47), SIMDE_FLOAT32_C( -370.04), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 22);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -778.82), SIMDE_FLOAT32_C( -277.37), SIMDE_FLOAT32_C( -561.10), SIMDE_FLOAT32_C(  913.75));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -799.75), SIMDE_FLOAT32_C(  322.89), SIMDE_FLOAT32_C(  168.49), SIMDE_FLOAT32_C( -586.31));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -778.82), SIMDE_FLOAT32_C( -277.37), SIMDE_FLOAT32_C( -561.10), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 23);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(   19.63), SIMDE_FLOAT32_C( -796.59), SIMDE_FLOAT32_C(  829.80), SIMDE_FLOAT32_C(  577.01));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  825.83), SIMDE_FLOAT32_C( -702.38), SIMDE_FLOAT32_C(  909.63), SIMDE_FLOAT32_C( -668.68));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(   19.63), SIMDE_FLOAT32_C( -796.59), SIMDE_FLOAT32_C(  829.80), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 24);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -935.55), SIMDE_FLOAT32_C( -906.06), SIMDE_FLOAT32_C(   23.18), SIMDE_FLOAT32_C( -374.29));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(  903.69), SIMDE_FLOAT32_C( -930.74), SIMDE_FLOAT32_C( -354.90), SIMDE_FLOAT32_C( -304.33));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -935.55), SIMDE_FLOAT32_C( -906.06), SIMDE_FLOAT32_C(   23.18), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 25);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -730.13), SIMDE_FLOAT32_C( -258.50), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C( -348.94));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(   29.53), SIMDE_FLOAT32_C(  827.18), SIMDE_FLOAT32_C(  334.24), SIMDE_FLOAT32_C( -491.97));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -730.13), SIMDE_FLOAT32_C( -258.50), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 26);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -381.04), SIMDE_FLOAT32_C(  669.32), SIMDE_FLOAT32_C( -574.70), SIMDE_FLOAT32_C(  440.24));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -837.63), SIMDE_FLOAT32_C( -877.92), SIMDE_FLOAT32_C( -226.69), SIMDE_FLOAT32_C( -557.27));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -381.04), SIMDE_FLOAT32_C(  669.32), SIMDE_FLOAT32_C( -574.70), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 27);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C(  239.21), SIMDE_FLOAT32_C(  527.22), SIMDE_FLOAT32_C(  489.56), SIMDE_FLOAT32_C(  238.84));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -364.20), SIMDE_FLOAT32_C(  722.98), SIMDE_FLOAT32_C(  475.77), SIMDE_FLOAT32_C( -967.04));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C(  239.21), SIMDE_FLOAT32_C(  527.22), SIMDE_FLOAT32_C(  489.56), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 28);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -308.58), SIMDE_FLOAT32_C( -108.66), SIMDE_FLOAT32_C(  857.88), SIMDE_FLOAT32_C( -131.87));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -505.11), SIMDE_FLOAT32_C( -213.47), SIMDE_FLOAT32_C(   71.14), SIMDE_FLOAT32_C( -330.60));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -308.58), SIMDE_FLOAT32_C( -108.66), SIMDE_FLOAT32_C(  857.88), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 29);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -236.42), SIMDE_FLOAT32_C(  925.60), SIMDE_FLOAT32_C(  252.25), SIMDE_FLOAT32_C( -546.68));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C(   67.98), SIMDE_FLOAT32_C( -613.65), SIMDE_FLOAT32_C( -165.27), SIMDE_FLOAT32_C(  109.49));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -236.42), SIMDE_FLOAT32_C(  925.60), SIMDE_FLOAT32_C(  252.25), SIMDE_FLOAT32_C(    0.00)), simde_mm_setzero_ps());
  r = simde_mm_cmp_ss(a, b, 30);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  a = simde_mm_set_ps(SIMDE_FLOAT32_C( -851.42), SIMDE_FLOAT32_C(  716.28), SIMDE_FLOAT32_C(  257.21), SIMDE_FLOAT32_C(  191.16));
  b = simde_mm_set_ps(SIMDE_FLOAT32_C( -758.42), SIMDE_FLOAT32_C(  731.61), SIMDE_FLOAT32_C(   23.45), SIMDE_FLOAT32_C(  503.57));
  e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -851.42), SIMDE_FLOAT32_C(  716.28), SIMDE_FLOAT32_C(  257.21), SIMDE_FLOAT32_C(     0.0)), simde_x_mm_setone_ps());
  r = simde_mm_cmp_ss(a, b, 31);
  simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e));

  return 0;
}

static int
test_simde_mm256_cmp_pd(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256d a, b, r, e;

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -357.57), SIMDE_FLOAT64_C(  765.93),
                         SIMDE_FLOAT64_C( -550.14), SIMDE_FLOAT64_C(  -22.41));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  868.56), SIMDE_FLOAT64_C(  688.68),
                         SIMDE_FLOAT64_C( -724.59), SIMDE_FLOAT64_C(  334.75));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 0);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -54.73), SIMDE_FLOAT64_C(   28.08),
                         SIMDE_FLOAT64_C(   97.90), SIMDE_FLOAT64_C(  892.29));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -838.35), SIMDE_FLOAT64_C(  389.60),
                         SIMDE_FLOAT64_C( -784.06), SIMDE_FLOAT64_C( -852.24));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 1);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -763.82), SIMDE_FLOAT64_C( -666.98),
                         SIMDE_FLOAT64_C(   95.45), SIMDE_FLOAT64_C(  511.10));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(   72.91), SIMDE_FLOAT64_C(  842.59),
                         SIMDE_FLOAT64_C( -301.64), SIMDE_FLOAT64_C(  977.39));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 2);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -891.91), SIMDE_FLOAT64_C( -127.40),
                         SIMDE_FLOAT64_C(  463.49), SIMDE_FLOAT64_C(  177.91));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -341.50), SIMDE_FLOAT64_C(  153.72),
                         SIMDE_FLOAT64_C(  151.04), SIMDE_FLOAT64_C( -348.13));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 3);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  100.63), SIMDE_FLOAT64_C(  228.90),
                         SIMDE_FLOAT64_C( -642.19), SIMDE_FLOAT64_C( -977.08));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  565.32), SIMDE_FLOAT64_C( -745.60),
                         SIMDE_FLOAT64_C( -937.14), SIMDE_FLOAT64_C( -396.84));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 4);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -698.69), SIMDE_FLOAT64_C(  381.53),
                         SIMDE_FLOAT64_C( -995.38), SIMDE_FLOAT64_C(  904.84));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  610.68), SIMDE_FLOAT64_C(  607.60),
                         SIMDE_FLOAT64_C(  346.14), SIMDE_FLOAT64_C( -567.77));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 5);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -32.88), SIMDE_FLOAT64_C(  456.08),
                         SIMDE_FLOAT64_C( -158.08), SIMDE_FLOAT64_C( -924.19));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -46.81), SIMDE_FLOAT64_C( -380.35),
                         SIMDE_FLOAT64_C(  820.23), SIMDE_FLOAT64_C( -250.91));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 6);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -875.29), SIMDE_FLOAT64_C(   57.11),
                         SIMDE_FLOAT64_C(  260.25), SIMDE_FLOAT64_C(  164.20));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  715.03), SIMDE_FLOAT64_C(  526.68),
                         SIMDE_FLOAT64_C(  724.10), SIMDE_FLOAT64_C( -661.45));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 7);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -59.93), SIMDE_FLOAT64_C(  531.74),
                         SIMDE_FLOAT64_C(  694.87), SIMDE_FLOAT64_C(  114.93));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  415.34), SIMDE_FLOAT64_C(  611.46),
                         SIMDE_FLOAT64_C( -279.38), SIMDE_FLOAT64_C(  402.62));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 8);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  414.18), SIMDE_FLOAT64_C( -763.39),
                         SIMDE_FLOAT64_C(  386.06), SIMDE_FLOAT64_C(  874.65));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  544.80), SIMDE_FLOAT64_C(  381.68),
                         SIMDE_FLOAT64_C(  466.15), SIMDE_FLOAT64_C( -212.81));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 9);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  970.77), SIMDE_FLOAT64_C( -757.81),
                         SIMDE_FLOAT64_C(  907.57), SIMDE_FLOAT64_C(  981.95));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  537.33), SIMDE_FLOAT64_C( -552.73),
                         SIMDE_FLOAT64_C( -335.84), SIMDE_FLOAT64_C(  263.81));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 10);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  792.31), SIMDE_FLOAT64_C(  400.04),
                         SIMDE_FLOAT64_C( -788.48), SIMDE_FLOAT64_C(  167.61));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  212.33), SIMDE_FLOAT64_C(  296.89),
                         SIMDE_FLOAT64_C(  622.33), SIMDE_FLOAT64_C( -766.53));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 11);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  758.22), SIMDE_FLOAT64_C( -663.32),
                         SIMDE_FLOAT64_C( -999.81), SIMDE_FLOAT64_C(  133.54));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -236.40), SIMDE_FLOAT64_C(  458.49),
                         SIMDE_FLOAT64_C( -796.87), SIMDE_FLOAT64_C(  971.44));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 12);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -918.24), SIMDE_FLOAT64_C( -553.29),
                         SIMDE_FLOAT64_C(  709.03), SIMDE_FLOAT64_C(  -42.30));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -249.29), SIMDE_FLOAT64_C( -863.89),
                         SIMDE_FLOAT64_C(  838.41), SIMDE_FLOAT64_C( -285.41));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 13);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -668.93), SIMDE_FLOAT64_C( -420.18),
                         SIMDE_FLOAT64_C(  785.36), SIMDE_FLOAT64_C( -788.63));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  111.55), SIMDE_FLOAT64_C( -222.13),
                         SIMDE_FLOAT64_C( -579.35), SIMDE_FLOAT64_C( -996.45));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 14);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  841.84), SIMDE_FLOAT64_C( -686.81),
                         SIMDE_FLOAT64_C( -199.31), SIMDE_FLOAT64_C(  982.01));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  421.52), SIMDE_FLOAT64_C(  488.69),
                         SIMDE_FLOAT64_C(  995.06), SIMDE_FLOAT64_C( -730.80));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 15);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -95.24), SIMDE_FLOAT64_C(  253.40),
                         SIMDE_FLOAT64_C( -815.08), SIMDE_FLOAT64_C(  358.42));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -399.53), SIMDE_FLOAT64_C( -710.61),
                         SIMDE_FLOAT64_C( -422.64), SIMDE_FLOAT64_C( -148.83));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 16);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(   77.77), SIMDE_FLOAT64_C(  698.58),
                         SIMDE_FLOAT64_C(  -27.60), SIMDE_FLOAT64_C(  435.81));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -150.12), SIMDE_FLOAT64_C( -751.03),
                         SIMDE_FLOAT64_C( -597.97), SIMDE_FLOAT64_C( -937.82));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 17);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -838.78), SIMDE_FLOAT64_C(   93.35),
                         SIMDE_FLOAT64_C( -825.83), SIMDE_FLOAT64_C( -323.02));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -282.39), SIMDE_FLOAT64_C(  572.90),
                         SIMDE_FLOAT64_C( -581.23), SIMDE_FLOAT64_C(   32.08));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 18);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -807.33), SIMDE_FLOAT64_C(  664.63),
                         SIMDE_FLOAT64_C(  982.61), SIMDE_FLOAT64_C(   63.27));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  946.39), SIMDE_FLOAT64_C(  207.32),
                         SIMDE_FLOAT64_C(   -9.66), SIMDE_FLOAT64_C(   11.76));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 19);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -738.69), SIMDE_FLOAT64_C( -322.11),
                         SIMDE_FLOAT64_C( -163.93), SIMDE_FLOAT64_C( -138.57));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  684.55), SIMDE_FLOAT64_C( -319.23),
                         SIMDE_FLOAT64_C(  930.19), SIMDE_FLOAT64_C(  517.01));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 20);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -76.84), SIMDE_FLOAT64_C(  457.06),
                         SIMDE_FLOAT64_C(  575.12), SIMDE_FLOAT64_C(  845.68));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  233.22), SIMDE_FLOAT64_C( -237.00),
                         SIMDE_FLOAT64_C( -964.93), SIMDE_FLOAT64_C(  750.37));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 21);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  199.98), SIMDE_FLOAT64_C(  741.05),
                         SIMDE_FLOAT64_C( -723.44), SIMDE_FLOAT64_C(  323.27));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  103.44), SIMDE_FLOAT64_C( -854.52),
                         SIMDE_FLOAT64_C(  244.92), SIMDE_FLOAT64_C(  486.47));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 22);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -576.37), SIMDE_FLOAT64_C(  750.85),
                         SIMDE_FLOAT64_C(  434.13), SIMDE_FLOAT64_C(  344.29));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -968.83), SIMDE_FLOAT64_C(  577.41),
                         SIMDE_FLOAT64_C(  995.59), SIMDE_FLOAT64_C(  750.10));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 23);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  526.25), SIMDE_FLOAT64_C(  -57.74),
                         SIMDE_FLOAT64_C( -432.94), SIMDE_FLOAT64_C(  882.68));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -495.53), SIMDE_FLOAT64_C( -602.01),
                         SIMDE_FLOAT64_C( -925.63), SIMDE_FLOAT64_C(  123.17));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 24);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -567.21), SIMDE_FLOAT64_C(  600.16),
                         SIMDE_FLOAT64_C( -766.87), SIMDE_FLOAT64_C(   11.16));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  695.58), SIMDE_FLOAT64_C(  482.48),
                         SIMDE_FLOAT64_C(  350.48), SIMDE_FLOAT64_C( -969.97));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C(    0.00),
                         SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 25);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -85.80), SIMDE_FLOAT64_C(  500.17),
                         SIMDE_FLOAT64_C(  916.37), SIMDE_FLOAT64_C(  398.15));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  745.00), SIMDE_FLOAT64_C( -144.13),
                         SIMDE_FLOAT64_C( -516.66), SIMDE_FLOAT64_C(  995.75));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 26);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  546.73), SIMDE_FLOAT64_C( -603.02),
                         SIMDE_FLOAT64_C( -971.83), SIMDE_FLOAT64_C(  389.90));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  776.68), SIMDE_FLOAT64_C( -130.82),
                         SIMDE_FLOAT64_C(  580.30), SIMDE_FLOAT64_C(  704.29));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                         SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));
  r = simde_mm256_cmp_pd(a, b, 27);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -164.05), SIMDE_FLOAT64_C(  409.22),
                         SIMDE_FLOAT64_C( -602.22), SIMDE_FLOAT64_C(  375.71));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  363.61), SIMDE_FLOAT64_C( -315.81),
                         SIMDE_FLOAT64_C( -199.39), SIMDE_FLOAT64_C(  806.44));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 28);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  200.12), SIMDE_FLOAT64_C(  648.82),
                         SIMDE_FLOAT64_C(  -75.31), SIMDE_FLOAT64_C(  801.78));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  431.61), SIMDE_FLOAT64_C(  123.43),
                         SIMDE_FLOAT64_C(  753.52), SIMDE_FLOAT64_C( -346.75));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 29);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -27.55), SIMDE_FLOAT64_C(  895.94),
                         SIMDE_FLOAT64_C(  742.64), SIMDE_FLOAT64_C(  -59.01));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  812.89), SIMDE_FLOAT64_C( -405.22),
                         SIMDE_FLOAT64_C(  782.32), SIMDE_FLOAT64_C( -131.42));
  e = simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET,
                         SIMDE_FLOAT64_C(    0.00), SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 30);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  a = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -68.17), SIMDE_FLOAT64_C( -246.45),
                         SIMDE_FLOAT64_C(   32.69), SIMDE_FLOAT64_C( -878.59));
  b = simde_mm256_set_pd(SIMDE_FLOAT64_C(  -82.57), SIMDE_FLOAT64_C(  930.53),
                         SIMDE_FLOAT64_C( -591.17), SIMDE_FLOAT64_C( -164.90));
  e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET,
                         SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET);
  r = simde_mm256_cmp_pd(a, b, 31);
  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e));

  return 0;
}


static int
test_simde_mm256_cmp_ps (SIMDE_MUNIT_TEST_ARGS) {
#if 1
  static const struct {
    const simde_float32 a[8];
    const simde_float32 b[8];
    const int32_t r[8];
  } test_vec[] = {
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -149.05), SIMDE_FLOAT32_C(  -893.98),
        SIMDE_FLOAT32_C(  -696.89), SIMDE_FLOAT32_C(   685.79), SIMDE_FLOAT32_C(  -307.00), SIMDE_FLOAT32_C(   627.21) },
      { SIMDE_FLOAT32_C(   279.01),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -893.98),
        SIMDE_FLOAT32_C(  -696.89), SIMDE_FLOAT32_C(   685.79), SIMDE_FLOAT32_C(  -307.00), SIMDE_FLOAT32_C(   627.21) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   718.94), SIMDE_FLOAT32_C(   731.02),
        SIMDE_FLOAT32_C(  -671.96), SIMDE_FLOAT32_C(  -928.83), SIMDE_FLOAT32_C(   835.13), SIMDE_FLOAT32_C(   607.05) },
      { SIMDE_FLOAT32_C(  -160.16),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   731.02),
        SIMDE_FLOAT32_C(  -671.96), SIMDE_FLOAT32_C(  -928.83), SIMDE_FLOAT32_C(   835.13), SIMDE_FLOAT32_C(   607.05) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -606.93), SIMDE_FLOAT32_C(  -171.52),
        SIMDE_FLOAT32_C(  -117.33), SIMDE_FLOAT32_C(  -255.25), SIMDE_FLOAT32_C(   953.12), SIMDE_FLOAT32_C(   722.51) },
      { SIMDE_FLOAT32_C(   256.36),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -171.52),
        SIMDE_FLOAT32_C(  -117.33), SIMDE_FLOAT32_C(  -255.25), SIMDE_FLOAT32_C(   953.12), SIMDE_FLOAT32_C(   722.51) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   -44.94), SIMDE_FLOAT32_C(   456.27),
        SIMDE_FLOAT32_C(  -798.46), SIMDE_FLOAT32_C(  -786.63), SIMDE_FLOAT32_C(  -692.20), SIMDE_FLOAT32_C(   457.89) },
      { SIMDE_FLOAT32_C(   580.38),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   456.27),
        SIMDE_FLOAT32_C(  -798.46), SIMDE_FLOAT32_C(  -786.63), SIMDE_FLOAT32_C(  -692.20), SIMDE_FLOAT32_C(   457.89) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   137.68), SIMDE_FLOAT32_C(   722.30),
        SIMDE_FLOAT32_C(   692.72), SIMDE_FLOAT32_C(   865.79), SIMDE_FLOAT32_C(   713.14), SIMDE_FLOAT32_C(   273.10) },
      { SIMDE_FLOAT32_C(  -194.17),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   722.30),
        SIMDE_FLOAT32_C(   692.72), SIMDE_FLOAT32_C(   865.79), SIMDE_FLOAT32_C(   713.14), SIMDE_FLOAT32_C(   273.10) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   563.54), SIMDE_FLOAT32_C(   120.17),
        SIMDE_FLOAT32_C(   285.05), SIMDE_FLOAT32_C(  -659.84), SIMDE_FLOAT32_C(   911.23), SIMDE_FLOAT32_C(  -909.12) },
      { SIMDE_FLOAT32_C(  -696.13),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   120.17),
        SIMDE_FLOAT32_C(   285.05), SIMDE_FLOAT32_C(  -659.84), SIMDE_FLOAT32_C(   911.23), SIMDE_FLOAT32_C(  -909.12) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   574.42), SIMDE_FLOAT32_C(   966.29),
        SIMDE_FLOAT32_C(   -95.08), SIMDE_FLOAT32_C(    65.92), SIMDE_FLOAT32_C(   875.04), SIMDE_FLOAT32_C(   208.79) },
      { SIMDE_FLOAT32_C(   969.08),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   966.29),
        SIMDE_FLOAT32_C(   -95.08), SIMDE_FLOAT32_C(    65.92), SIMDE_FLOAT32_C(   875.04), SIMDE_FLOAT32_C(   208.79) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -958.46), SIMDE_FLOAT32_C(  -533.36),
        SIMDE_FLOAT32_C(   881.84), SIMDE_FLOAT32_C(   468.13), SIMDE_FLOAT32_C(  -142.59), SIMDE_FLOAT32_C(   850.92) },
      { SIMDE_FLOAT32_C(   813.92),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -533.36),
        SIMDE_FLOAT32_C(   881.84), SIMDE_FLOAT32_C(   468.13), SIMDE_FLOAT32_C(  -142.59), SIMDE_FLOAT32_C(   850.92) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   772.60), SIMDE_FLOAT32_C(  -659.06),
        SIMDE_FLOAT32_C(  -880.78), SIMDE_FLOAT32_C(  -296.98), SIMDE_FLOAT32_C(   306.40), SIMDE_FLOAT32_C(   933.13) },
      { SIMDE_FLOAT32_C(   569.99),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -659.06),
        SIMDE_FLOAT32_C(  -880.78), SIMDE_FLOAT32_C(  -296.98), SIMDE_FLOAT32_C(   306.40), SIMDE_FLOAT32_C(   933.13) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   656.08), SIMDE_FLOAT32_C(   -55.65),
        SIMDE_FLOAT32_C(   959.33), SIMDE_FLOAT32_C(  -735.25), SIMDE_FLOAT32_C(  -486.86), SIMDE_FLOAT32_C(   529.31) },
      { SIMDE_FLOAT32_C(  -335.33),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   -55.65),
        SIMDE_FLOAT32_C(   959.33), SIMDE_FLOAT32_C(  -735.25), SIMDE_FLOAT32_C(  -486.86), SIMDE_FLOAT32_C(   529.31) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -229.26), SIMDE_FLOAT32_C(   262.90),
        SIMDE_FLOAT32_C(   293.37), SIMDE_FLOAT32_C(   510.82), SIMDE_FLOAT32_C(  -990.58), SIMDE_FLOAT32_C(   958.04) },
      { SIMDE_FLOAT32_C(   647.21),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   262.90),
        SIMDE_FLOAT32_C(   293.37), SIMDE_FLOAT32_C(   510.82), SIMDE_FLOAT32_C(  -990.58), SIMDE_FLOAT32_C(   958.04) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   545.59), SIMDE_FLOAT32_C(   191.30),
        SIMDE_FLOAT32_C(   393.56), SIMDE_FLOAT32_C(   299.15), SIMDE_FLOAT32_C(  -224.76), SIMDE_FLOAT32_C(    40.78) },
      { SIMDE_FLOAT32_C(   296.38),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   191.30),
        SIMDE_FLOAT32_C(   393.56), SIMDE_FLOAT32_C(   299.15), SIMDE_FLOAT32_C(  -224.76), SIMDE_FLOAT32_C(    40.78) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   927.95), SIMDE_FLOAT32_C(  -876.81),
        SIMDE_FLOAT32_C(   104.06), SIMDE_FLOAT32_C(   980.20), SIMDE_FLOAT32_C(  -671.87), SIMDE_FLOAT32_C(  -599.56) },
      { SIMDE_FLOAT32_C(  -557.14),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -876.81),
        SIMDE_FLOAT32_C(   104.06), SIMDE_FLOAT32_C(   980.20), SIMDE_FLOAT32_C(  -671.87), SIMDE_FLOAT32_C(  -599.56) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -411.43), SIMDE_FLOAT32_C(   277.51),
        SIMDE_FLOAT32_C(  -583.73), SIMDE_FLOAT32_C(  -386.70), SIMDE_FLOAT32_C(  -484.55), SIMDE_FLOAT32_C(  -140.87) },
      { SIMDE_FLOAT32_C(   740.85),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   277.51),
        SIMDE_FLOAT32_C(  -583.73), SIMDE_FLOAT32_C(  -386.70), SIMDE_FLOAT32_C(  -484.55), SIMDE_FLOAT32_C(  -140.87) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   404.67), SIMDE_FLOAT32_C(  -709.45),
        SIMDE_FLOAT32_C(   517.91), SIMDE_FLOAT32_C(  -423.96), SIMDE_FLOAT32_C(   -29.75), SIMDE_FLOAT32_C(   258.76) },
      { SIMDE_FLOAT32_C(   589.45),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -709.45),
        SIMDE_FLOAT32_C(   517.91), SIMDE_FLOAT32_C(  -423.96), SIMDE_FLOAT32_C(   -29.75), SIMDE_FLOAT32_C(   258.76) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -837.48), SIMDE_FLOAT32_C(  -366.29),
        SIMDE_FLOAT32_C(  -689.14), SIMDE_FLOAT32_C(   659.78), SIMDE_FLOAT32_C(    68.61), SIMDE_FLOAT32_C(   900.31) },
      { SIMDE_FLOAT32_C(  -888.23),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -366.29),
        SIMDE_FLOAT32_C(  -689.14), SIMDE_FLOAT32_C(   659.78), SIMDE_FLOAT32_C(    68.61), SIMDE_FLOAT32_C(   900.31) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   336.50), SIMDE_FLOAT32_C(   832.58),
        SIMDE_FLOAT32_C(   473.28), SIMDE_FLOAT32_C(   328.79), SIMDE_FLOAT32_C(   638.06), SIMDE_FLOAT32_C(   585.05) },
      { SIMDE_FLOAT32_C(   211.27),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   832.58),
        SIMDE_FLOAT32_C(   473.28), SIMDE_FLOAT32_C(   328.79), SIMDE_FLOAT32_C(   638.06), SIMDE_FLOAT32_C(   585.05) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   613.30), SIMDE_FLOAT32_C(   222.53),
        SIMDE_FLOAT32_C(  -615.33), SIMDE_FLOAT32_C(   736.15), SIMDE_FLOAT32_C(  -359.69), SIMDE_FLOAT32_C(   595.94) },
      { SIMDE_FLOAT32_C(   299.86),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   222.53),
        SIMDE_FLOAT32_C(  -615.33), SIMDE_FLOAT32_C(   736.15), SIMDE_FLOAT32_C(  -359.69), SIMDE_FLOAT32_C(   595.94) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -612.34), SIMDE_FLOAT32_C(   983.84),
        SIMDE_FLOAT32_C(   869.05), SIMDE_FLOAT32_C(   300.06), SIMDE_FLOAT32_C(  -294.90), SIMDE_FLOAT32_C(   168.91) },
      { SIMDE_FLOAT32_C(   982.50),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   983.84),
        SIMDE_FLOAT32_C(   869.05), SIMDE_FLOAT32_C(   300.06), SIMDE_FLOAT32_C(  -294.90), SIMDE_FLOAT32_C(   168.91) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   454.72), SIMDE_FLOAT32_C(  -309.22),
        SIMDE_FLOAT32_C(   693.07), SIMDE_FLOAT32_C(  -199.41), SIMDE_FLOAT32_C(  -731.51), SIMDE_FLOAT32_C(   675.56) },
      { SIMDE_FLOAT32_C(  -126.29),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -309.22),
        SIMDE_FLOAT32_C(   693.07), SIMDE_FLOAT32_C(  -199.41), SIMDE_FLOAT32_C(  -731.51), SIMDE_FLOAT32_C(   675.56) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -975.47), SIMDE_FLOAT32_C(   128.01),
        SIMDE_FLOAT32_C(  -578.31), SIMDE_FLOAT32_C(  -917.53), SIMDE_FLOAT32_C(  -744.78), SIMDE_FLOAT32_C(   295.40) },
      { SIMDE_FLOAT32_C(  -132.05),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   128.01),
        SIMDE_FLOAT32_C(  -578.31), SIMDE_FLOAT32_C(  -917.53), SIMDE_FLOAT32_C(  -744.78), SIMDE_FLOAT32_C(   295.40) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   446.17), SIMDE_FLOAT32_C(  -488.36),
        SIMDE_FLOAT32_C(  -808.41), SIMDE_FLOAT32_C(   490.80), SIMDE_FLOAT32_C(  -777.96), SIMDE_FLOAT32_C(    59.54) },
      { SIMDE_FLOAT32_C(   809.79),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -488.36),
        SIMDE_FLOAT32_C(  -808.41), SIMDE_FLOAT32_C(   490.80), SIMDE_FLOAT32_C(  -777.96), SIMDE_FLOAT32_C(    59.54) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   304.29), SIMDE_FLOAT32_C(   -22.30),
        SIMDE_FLOAT32_C(  -584.82), SIMDE_FLOAT32_C(   911.85), SIMDE_FLOAT32_C(   686.42), SIMDE_FLOAT32_C(  -775.03) },
      { SIMDE_FLOAT32_C(   -48.11),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   -22.30),
        SIMDE_FLOAT32_C(  -584.82), SIMDE_FLOAT32_C(   911.85), SIMDE_FLOAT32_C(   686.42), SIMDE_FLOAT32_C(  -775.03) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   126.36), SIMDE_FLOAT32_C(  -734.16),
        SIMDE_FLOAT32_C(    75.86), SIMDE_FLOAT32_C(  -459.15), SIMDE_FLOAT32_C(  -296.91), SIMDE_FLOAT32_C(  -972.25) },
      { SIMDE_FLOAT32_C(  -260.60),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -734.16),
        SIMDE_FLOAT32_C(    75.86), SIMDE_FLOAT32_C(  -459.15), SIMDE_FLOAT32_C(  -296.91), SIMDE_FLOAT32_C(  -972.25) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   891.27), SIMDE_FLOAT32_C(  -173.12),
        SIMDE_FLOAT32_C(  -710.57), SIMDE_FLOAT32_C(  -612.12), SIMDE_FLOAT32_C(   290.00), SIMDE_FLOAT32_C(    28.83) },
      { SIMDE_FLOAT32_C(  -483.53),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -173.12),
        SIMDE_FLOAT32_C(  -710.57), SIMDE_FLOAT32_C(  -612.12), SIMDE_FLOAT32_C(   290.00), SIMDE_FLOAT32_C(    28.83) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   246.43), SIMDE_FLOAT32_C(   806.45),
        SIMDE_FLOAT32_C(   890.16), SIMDE_FLOAT32_C(   886.17), SIMDE_FLOAT32_C(  -836.55), SIMDE_FLOAT32_C(  -593.37) },
      { SIMDE_FLOAT32_C(  -345.31),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   806.45),
        SIMDE_FLOAT32_C(   890.16), SIMDE_FLOAT32_C(   886.17), SIMDE_FLOAT32_C(  -836.55), SIMDE_FLOAT32_C(  -593.37) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   248.21), SIMDE_FLOAT32_C(  -602.16),
        SIMDE_FLOAT32_C(   137.27), SIMDE_FLOAT32_C(   520.80), SIMDE_FLOAT32_C(   196.40), SIMDE_FLOAT32_C(   791.96) },
      { SIMDE_FLOAT32_C(   423.74),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -602.16),
        SIMDE_FLOAT32_C(   137.27), SIMDE_FLOAT32_C(   520.80), SIMDE_FLOAT32_C(   196.40), SIMDE_FLOAT32_C(   791.96) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   652.89), SIMDE_FLOAT32_C(  -299.80),
        SIMDE_FLOAT32_C(   881.22), SIMDE_FLOAT32_C(   323.51), SIMDE_FLOAT32_C(  -975.97), SIMDE_FLOAT32_C(   304.97) },
      { SIMDE_FLOAT32_C(   381.53),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -299.80),
        SIMDE_FLOAT32_C(   881.22), SIMDE_FLOAT32_C(   323.51), SIMDE_FLOAT32_C(  -975.97), SIMDE_FLOAT32_C(   304.97) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -635.51), SIMDE_FLOAT32_C(  -625.25),
        SIMDE_FLOAT32_C(  -196.99), SIMDE_FLOAT32_C(   239.42), SIMDE_FLOAT32_C(  -869.44), SIMDE_FLOAT32_C(  -815.46) },
      { SIMDE_FLOAT32_C(   558.80),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -625.25),
        SIMDE_FLOAT32_C(  -196.99), SIMDE_FLOAT32_C(   239.42), SIMDE_FLOAT32_C(  -869.44), SIMDE_FLOAT32_C(  -815.46) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -219.59), SIMDE_FLOAT32_C(   986.71),
        SIMDE_FLOAT32_C(  -996.95), SIMDE_FLOAT32_C(   488.42), SIMDE_FLOAT32_C(  -907.98), SIMDE_FLOAT32_C(   561.85) },
      { SIMDE_FLOAT32_C(   697.99),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   986.71),
        SIMDE_FLOAT32_C(  -996.95), SIMDE_FLOAT32_C(   488.42), SIMDE_FLOAT32_C(  -907.98), SIMDE_FLOAT32_C(   561.85) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -833.05), SIMDE_FLOAT32_C(   336.12),
        SIMDE_FLOAT32_C(   477.57), SIMDE_FLOAT32_C(  -303.83), SIMDE_FLOAT32_C(  -554.52), SIMDE_FLOAT32_C(   175.56) },
      { SIMDE_FLOAT32_C(   568.80),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   336.12),
        SIMDE_FLOAT32_C(   477.57), SIMDE_FLOAT32_C(  -303.83), SIMDE_FLOAT32_C(  -554.52), SIMDE_FLOAT32_C(   175.56) },
      {  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
    { {            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   809.70), SIMDE_FLOAT32_C(  -687.69),
        SIMDE_FLOAT32_C(   317.83), SIMDE_FLOAT32_C(   873.47), SIMDE_FLOAT32_C(   505.20), SIMDE_FLOAT32_C(  -113.37) },
      { SIMDE_FLOAT32_C(   300.43),            SIMDE_MATH_NANF,            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -687.69),
        SIMDE_FLOAT32_C(   317.83), SIMDE_FLOAT32_C(   873.47), SIMDE_FLOAT32_C(   505.20), SIMDE_FLOAT32_C(  -113.37) },
      { -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1), -INT32_C(           1) } }
  };

  simde__m256 a, b;
  simde__m256i r;

  a = simde_mm256_loadu_ps(test_vec[0].a);
  b = simde_mm256_loadu_ps(test_vec[0].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[0].r));

  a = simde_mm256_loadu_ps(test_vec[1].a);
  b = simde_mm256_loadu_ps(test_vec[1].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[1].r));

  a = simde_mm256_loadu_ps(test_vec[2].a);
  b = simde_mm256_loadu_ps(test_vec[2].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[2].r));

  a = simde_mm256_loadu_ps(test_vec[3].a);
  b = simde_mm256_loadu_ps(test_vec[3].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[3].r));

  a = simde_mm256_loadu_ps(test_vec[4].a);
  b = simde_mm256_loadu_ps(test_vec[4].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[4].r));

  a = simde_mm256_loadu_ps(test_vec[5].a);
  b = simde_mm256_loadu_ps(test_vec[5].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[5].r));

  a = simde_mm256_loadu_ps(test_vec[6].a);
  b = simde_mm256_loadu_ps(test_vec[6].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[6].r));

  a = simde_mm256_loadu_ps(test_vec[7].a);
  b = simde_mm256_loadu_ps(test_vec[7].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[7].r));

  a = simde_mm256_loadu_ps(test_vec[8].a);
  b = simde_mm256_loadu_ps(test_vec[8].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[8].r));

  a = simde_mm256_loadu_ps(test_vec[9].a);
  b = simde_mm256_loadu_ps(test_vec[9].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[9].r));

  a = simde_mm256_loadu_ps(test_vec[10].a);
  b = simde_mm256_loadu_ps(test_vec[10].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[10].r));

  a = simde_mm256_loadu_ps(test_vec[11].a);
  b = simde_mm256_loadu_ps(test_vec[11].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[11].r));

  a = simde_mm256_loadu_ps(test_vec[12].a);
  b = simde_mm256_loadu_ps(test_vec[12].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[12].r));

  a = simde_mm256_loadu_ps(test_vec[13].a);
  b = simde_mm256_loadu_ps(test_vec[13].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[13].r));

  a = simde_mm256_loadu_ps(test_vec[14].a);
  b = simde_mm256_loadu_ps(test_vec[14].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[14].r));

  a = simde_mm256_loadu_ps(test_vec[15].a);
  b = simde_mm256_loadu_ps(test_vec[15].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[15].r));

  a = simde_mm256_loadu_ps(test_vec[16].a);
  b = simde_mm256_loadu_ps(test_vec[16].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[16].r));

  a = simde_mm256_loadu_ps(test_vec[17].a);
  b = simde_mm256_loadu_ps(test_vec[17].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[17].r));

  a = simde_mm256_loadu_ps(test_vec[18].a);
  b = simde_mm256_loadu_ps(test_vec[18].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[18].r));

  a = simde_mm256_loadu_ps(test_vec[19].a);
  b = simde_mm256_loadu_ps(test_vec[19].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[19].r));

  a = simde_mm256_loadu_ps(test_vec[20].a);
  b = simde_mm256_loadu_ps(test_vec[20].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[20].r));

  a = simde_mm256_loadu_ps(test_vec[21].a);
  b = simde_mm256_loadu_ps(test_vec[21].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[21].r));

  a = simde_mm256_loadu_ps(test_vec[22].a);
  b = simde_mm256_loadu_ps(test_vec[22].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[22].r));

  a = simde_mm256_loadu_ps(test_vec[23].a);
  b = simde_mm256_loadu_ps(test_vec[23].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[23].r));

  a = simde_mm256_loadu_ps(test_vec[24].a);
  b = simde_mm256_loadu_ps(test_vec[24].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[24].r));

  a = simde_mm256_loadu_ps(test_vec[25].a);
  b = simde_mm256_loadu_ps(test_vec[25].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[25].r));

  a = simde_mm256_loadu_ps(test_vec[26].a);
  b = simde_mm256_loadu_ps(test_vec[26].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[26].r));

  a = simde_mm256_loadu_ps(test_vec[27].a);
  b = simde_mm256_loadu_ps(test_vec[27].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[27].r));

  a = simde_mm256_loadu_ps(test_vec[28].a);
  b = simde_mm256_loadu_ps(test_vec[28].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[28].r));

  a = simde_mm256_loadu_ps(test_vec[29].a);
  b = simde_mm256_loadu_ps(test_vec[29].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[29].r));

  a = simde_mm256_loadu_ps(test_vec[30].a);
  b = simde_mm256_loadu_ps(test_vec[30].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[30].r));

  a = simde_mm256_loadu_ps(test_vec[31].a);
  b = simde_mm256_loadu_ps(test_vec[31].b);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[31].r));

  return 0;
#else
  fputc('\n', stdout);

  const simde__m256 nanv = simde_mm256_set1_ps(SIMDE_MATH_NANF);

  simde__m256 a, b;
  simde__m256i r;

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_OQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LT_OS));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LE_OS));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_UNORD_Q));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_UQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLT_US));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLE_US));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_ORD_Q));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_UQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGE_US));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGT_US));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_FALSE_OQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_OQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GE_OS));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GT_OS));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_TRUE_UQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_OS));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LT_OQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_LE_OQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_UNORD_S));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_US));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLT_UQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NLE_UQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_ORD_S));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_EQ_US));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGE_UQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NGT_UQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_FALSE_OS));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_NEQ_OS));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GE_OQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_GT_OQ));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0));
  b = simde_mm256_blendv_ps(a, b, simde_mm256_cmp_ps(simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ));
  a = simde_mm256_blend_ps(a, nanv, 3);
  b = simde_mm256_blend_ps(b, nanv, 6);
  r = simde_mm256_castps_si256(simde_mm256_cmp_ps(a, b, SIMDE_CMP_TRUE_US));
  simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST);
  simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
  simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST);

  return 1;
#endif
}

#endif /* if !defined(SIMDE_FAST_MATH) */

static int
test_simde_mm256_cvtepi32_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128i a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm_set_epi32(INT32_C( 1957018358), INT32_C( 1074174472), INT32_C(  124397699), INT32_C( 1881644266)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(1957018358.00), SIMDE_FLOAT64_C(1074174472.00),
                         SIMDE_FLOAT64_C(124397699.00), SIMDE_FLOAT64_C(1881644266.00)) },
    { simde_mm_set_epi32(INT32_C( 2070107316), INT32_C( 1618653378), INT32_C( 1002292861), INT32_C( 2002977375)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(2070107316.00), SIMDE_FLOAT64_C(1618653378.00),
                         SIMDE_FLOAT64_C(1002292861.00), SIMDE_FLOAT64_C(2002977375.00)) },
    { simde_mm_set_epi32(INT32_C( 2031655643), INT32_C( 1380169755), INT32_C( 1722613954), INT32_C(  223100421)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(2031655643.00), SIMDE_FLOAT64_C(1380169755.00),
                         SIMDE_FLOAT64_C(1722613954.00), SIMDE_FLOAT64_C(223100421.00)) },
    { simde_mm_set_epi32(INT32_C(-1894427767), INT32_C(-1633274427), INT32_C(-2058387969), INT32_C(-1311515394)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-1894427767.00), SIMDE_FLOAT64_C(-1633274427.00),
                         SIMDE_FLOAT64_C(-2058387969.00), SIMDE_FLOAT64_C(-1311515394.00)) },
    { simde_mm_set_epi32(INT32_C(-1443374135), INT32_C( 1382394218), INT32_C( 1459905767), INT32_C( -756048058)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-1443374135.00), SIMDE_FLOAT64_C(1382394218.00),
                         SIMDE_FLOAT64_C(1459905767.00), SIMDE_FLOAT64_C(-756048058.00)) },
    { simde_mm_set_epi32(INT32_C( -781596301), INT32_C( 1840524706), INT32_C( 1502138952), INT32_C(-2118210723)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-781596301.00), SIMDE_FLOAT64_C(1840524706.00),
                         SIMDE_FLOAT64_C(1502138952.00), SIMDE_FLOAT64_C(-2118210723.00)) },
    { simde_mm_set_epi32(INT32_C(   52250967), INT32_C( 1988701031), INT32_C( 1592626424), INT32_C(-1778387557)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(52250967.00), SIMDE_FLOAT64_C(1988701031.00),
                         SIMDE_FLOAT64_C(1592626424.00), SIMDE_FLOAT64_C(-1778387557.00)) },
    { simde_mm_set_epi32(INT32_C(   30979646), INT32_C(  769206580), INT32_C(-2128276240), INT32_C( 1445709709)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(30979646.00), SIMDE_FLOAT64_C(769206580.00),
                         SIMDE_FLOAT64_C(-2128276240.00), SIMDE_FLOAT64_C(1445709709.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_cvtepi32_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_cvtepi32_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C(      -6033), INT32_C(      15813), INT32_C(      12979), INT32_C(     -31712),
                            INT32_C(      18002), INT32_C(      -6019), INT32_C(     -26810), INT32_C(      14091)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-6033.00), SIMDE_FLOAT32_C(15813.00),
                         SIMDE_FLOAT32_C(12979.00), SIMDE_FLOAT32_C(-31712.00),
                         SIMDE_FLOAT32_C(18002.00), SIMDE_FLOAT32_C(-6019.00),
                         SIMDE_FLOAT32_C(-26810.00), SIMDE_FLOAT32_C(14091.00)) },
    { simde_mm256_set_epi32(INT32_C(       6359), INT32_C(       7786), INT32_C(      21856), INT32_C(     -20706),
                            INT32_C(     -20652), INT32_C(      21040), INT32_C(      -8561), INT32_C(     -12779)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( 6359.00), SIMDE_FLOAT32_C( 7786.00),
                         SIMDE_FLOAT32_C(21856.00), SIMDE_FLOAT32_C(-20706.00),
                         SIMDE_FLOAT32_C(-20652.00), SIMDE_FLOAT32_C(21040.00),
                         SIMDE_FLOAT32_C(-8561.00), SIMDE_FLOAT32_C(-12779.00)) },
    { simde_mm256_set_epi32(INT32_C(     -28477), INT32_C(     -21667), INT32_C(     -16892), INT32_C(     -16024),
                            INT32_C(     -11576), INT32_C(        602), INT32_C(      23902), INT32_C(      17547)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-28477.00), SIMDE_FLOAT32_C(-21667.00),
                         SIMDE_FLOAT32_C(-16892.00), SIMDE_FLOAT32_C(-16024.00),
                         SIMDE_FLOAT32_C(-11576.00), SIMDE_FLOAT32_C(  602.00),
                         SIMDE_FLOAT32_C(23902.00), SIMDE_FLOAT32_C(17547.00)) },
    { simde_mm256_set_epi32(INT32_C(       8732), INT32_C(      13948), INT32_C(       7489), INT32_C(      25724),
                            INT32_C(      24561), INT32_C(      11189), INT32_C(      24773), INT32_C(      -8467)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( 8732.00), SIMDE_FLOAT32_C(13948.00),
                         SIMDE_FLOAT32_C( 7489.00), SIMDE_FLOAT32_C(25724.00),
                         SIMDE_FLOAT32_C(24561.00), SIMDE_FLOAT32_C(11189.00),
                         SIMDE_FLOAT32_C(24773.00), SIMDE_FLOAT32_C(-8467.00)) },
    { simde_mm256_set_epi32(INT32_C(     -31943), INT32_C(      26870), INT32_C(     -22515), INT32_C(       3030),
                            INT32_C(      24358), INT32_C(      31924), INT32_C(      30771), INT32_C(       4777)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-31943.00), SIMDE_FLOAT32_C(26870.00),
                         SIMDE_FLOAT32_C(-22515.00), SIMDE_FLOAT32_C( 3030.00),
                         SIMDE_FLOAT32_C(24358.00), SIMDE_FLOAT32_C(31924.00),
                         SIMDE_FLOAT32_C(30771.00), SIMDE_FLOAT32_C( 4777.00)) },
    { simde_mm256_set_epi32(INT32_C(      -1809), INT32_C(     -14404), INT32_C(     -32286), INT32_C(      25399),
                            INT32_C(      29260), INT32_C(     -23412), INT32_C(      12480), INT32_C(     -23461)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-1809.00), SIMDE_FLOAT32_C(-14404.00),
                         SIMDE_FLOAT32_C(-32286.00), SIMDE_FLOAT32_C(25399.00),
                         SIMDE_FLOAT32_C(29260.00), SIMDE_FLOAT32_C(-23412.00),
                         SIMDE_FLOAT32_C(12480.00), SIMDE_FLOAT32_C(-23461.00)) },
    { simde_mm256_set_epi32(INT32_C(      -2274), INT32_C(      23698), INT32_C(       4976), INT32_C(      13398),
                            INT32_C(      -8046), INT32_C(     -16937), INT32_C(      19340), INT32_C(      -6656)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-2274.00), SIMDE_FLOAT32_C(23698.00),
                         SIMDE_FLOAT32_C( 4976.00), SIMDE_FLOAT32_C(13398.00),
                         SIMDE_FLOAT32_C(-8046.00), SIMDE_FLOAT32_C(-16937.00),
                         SIMDE_FLOAT32_C(19340.00), SIMDE_FLOAT32_C(-6656.00)) },
    { simde_mm256_set_epi32(INT32_C(     -29605), INT32_C(       8888), INT32_C(      -8347), INT32_C(      -8890),
                            INT32_C(      -8372), INT32_C(       7334), INT32_C(      14947), INT32_C(      -5546)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-29605.00), SIMDE_FLOAT32_C( 8888.00),
                         SIMDE_FLOAT32_C(-8347.00), SIMDE_FLOAT32_C(-8890.00),
                         SIMDE_FLOAT32_C(-8372.00), SIMDE_FLOAT32_C( 7334.00),
                         SIMDE_FLOAT32_C(14947.00), SIMDE_FLOAT32_C(-5546.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_cvtepi32_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_cvtpd_epi32(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m128i r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  823.92), SIMDE_FLOAT64_C( -252.31),
                         SIMDE_FLOAT64_C(  311.42), SIMDE_FLOAT64_C(  639.08)),
      simde_mm_set_epi32(INT32_C( 824), INT32_C(-252), INT32_C( 311), INT32_C( 639)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  311.89), SIMDE_FLOAT64_C( -507.25),
                         SIMDE_FLOAT64_C(  748.64), SIMDE_FLOAT64_C( -488.86)),
      simde_mm_set_epi32(INT32_C( 312), INT32_C(-507), INT32_C( 749), INT32_C(-489)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  478.57), SIMDE_FLOAT64_C( -328.29),
                         SIMDE_FLOAT64_C( -289.22), SIMDE_FLOAT64_C( -586.95)),
      simde_mm_set_epi32(INT32_C( 479), INT32_C(-328), INT32_C(-289), INT32_C(-587)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  341.08), SIMDE_FLOAT64_C(  432.49),
                         SIMDE_FLOAT64_C(  835.07), SIMDE_FLOAT64_C( -889.53)),
      simde_mm_set_epi32(INT32_C( 341), INT32_C( 432), INT32_C( 835), INT32_C(-890)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -811.21), SIMDE_FLOAT64_C( -487.29),
                         SIMDE_FLOAT64_C(  852.90), SIMDE_FLOAT64_C(  970.07)),
      simde_mm_set_epi32(INT32_C(-811), INT32_C(-487), INT32_C( 853), INT32_C( 970)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  456.79), SIMDE_FLOAT64_C( -836.64),
                         SIMDE_FLOAT64_C( -396.53), SIMDE_FLOAT64_C(  788.69)),
      simde_mm_set_epi32(INT32_C( 457), INT32_C(-837), INT32_C(-397), INT32_C( 789)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  538.31), SIMDE_FLOAT64_C(  630.62),
                         SIMDE_FLOAT64_C( -811.65), SIMDE_FLOAT64_C( -175.08)),
      simde_mm_set_epi32(INT32_C( 538), INT32_C( 631), INT32_C(-812), INT32_C(-175)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -53.87), SIMDE_FLOAT64_C(  -83.11),
                         SIMDE_FLOAT64_C( -288.58), SIMDE_FLOAT64_C( -287.98)),
      simde_mm_set_epi32(INT32_C( -54), INT32_C( -83), INT32_C(-289), INT32_C(-288)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128i r = simde_mm256_cvtpd_epi32(test_vec[i].a);
    simde_assert_m128i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_cvtpd_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m128 r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  375.90), SIMDE_FLOAT64_C( -889.76),
                         SIMDE_FLOAT64_C( -974.31), SIMDE_FLOAT64_C(  373.58)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  375.90), SIMDE_FLOAT32_C( -889.76), SIMDE_FLOAT32_C( -974.31), SIMDE_FLOAT32_C(  373.58)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   84.47), SIMDE_FLOAT64_C(  744.53),
                         SIMDE_FLOAT64_C(  -27.43), SIMDE_FLOAT64_C(  -51.38)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   84.47), SIMDE_FLOAT32_C(  744.53), SIMDE_FLOAT32_C(  -27.43), SIMDE_FLOAT32_C(  -51.38)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   66.02), SIMDE_FLOAT64_C( -438.12),
                         SIMDE_FLOAT64_C(  408.44), SIMDE_FLOAT64_C(  197.54)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   66.02), SIMDE_FLOAT32_C( -438.12), SIMDE_FLOAT32_C(  408.44), SIMDE_FLOAT32_C(  197.54)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   80.23), SIMDE_FLOAT64_C(  910.03),
                         SIMDE_FLOAT64_C(  928.38), SIMDE_FLOAT64_C(  305.66)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   80.23), SIMDE_FLOAT32_C(  910.03), SIMDE_FLOAT32_C(  928.38), SIMDE_FLOAT32_C(  305.66)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -511.50), SIMDE_FLOAT64_C( -503.26),
                         SIMDE_FLOAT64_C( -164.88), SIMDE_FLOAT64_C(  -10.16)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -511.50), SIMDE_FLOAT32_C( -503.26), SIMDE_FLOAT32_C( -164.88), SIMDE_FLOAT32_C(  -10.16)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   99.85), SIMDE_FLOAT64_C( -538.53),
                         SIMDE_FLOAT64_C(   17.38), SIMDE_FLOAT64_C( -161.67)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   99.85), SIMDE_FLOAT32_C( -538.53), SIMDE_FLOAT32_C(   17.38), SIMDE_FLOAT32_C( -161.67)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  891.01), SIMDE_FLOAT64_C(  865.63),
                         SIMDE_FLOAT64_C(  -39.82), SIMDE_FLOAT64_C( -446.03)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  891.01), SIMDE_FLOAT32_C(  865.63), SIMDE_FLOAT32_C(  -39.82), SIMDE_FLOAT32_C( -446.03)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  394.04), SIMDE_FLOAT64_C(  238.42),
                         SIMDE_FLOAT64_C(  746.10), SIMDE_FLOAT64_C(   -8.70)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  394.04), SIMDE_FLOAT32_C(  238.42), SIMDE_FLOAT32_C(  746.10), SIMDE_FLOAT32_C(   -8.70)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128 r = simde_mm256_cvtpd_ps(test_vec[i].a);
    simde_assert_m128_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  598.58), SIMDE_FLOAT32_C(  571.41),
                         SIMDE_FLOAT32_C( -242.37), SIMDE_FLOAT32_C( -717.41),
                         SIMDE_FLOAT32_C(  374.26), SIMDE_FLOAT32_C( -165.53),
                         SIMDE_FLOAT32_C( -357.04), SIMDE_FLOAT32_C( -622.88)),
      simde_mm256_set_epi32(INT32_C( 599), INT32_C( 571), INT32_C(-242), INT32_C(-717),
                            INT32_C( 374), INT32_C(-166), INT32_C(-357), INT32_C(-623)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  257.81), SIMDE_FLOAT32_C( -837.70),
                         SIMDE_FLOAT32_C(  261.48), SIMDE_FLOAT32_C(  542.96),
                         SIMDE_FLOAT32_C(  769.60), SIMDE_FLOAT32_C( -711.96),
                         SIMDE_FLOAT32_C( -326.97), SIMDE_FLOAT32_C( -113.31)),
      simde_mm256_set_epi32(INT32_C( 258), INT32_C(-838), INT32_C( 261), INT32_C( 543),
                            INT32_C( 770), INT32_C(-712), INT32_C(-327), INT32_C(-113)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -569.57), SIMDE_FLOAT32_C(  423.37),
                         SIMDE_FLOAT32_C(  -24.31), SIMDE_FLOAT32_C(  934.89),
                         SIMDE_FLOAT32_C(  421.98), SIMDE_FLOAT32_C(  514.39),
                         SIMDE_FLOAT32_C(  548.83), SIMDE_FLOAT32_C(  419.70)),
      simde_mm256_set_epi32(INT32_C(-570), INT32_C( 423), INT32_C( -24), INT32_C( 935),
                            INT32_C( 422), INT32_C( 514), INT32_C( 549), INT32_C( 420)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -391.60), SIMDE_FLOAT32_C( -565.59),
                         SIMDE_FLOAT32_C(  -30.37), SIMDE_FLOAT32_C( -335.58),
                         SIMDE_FLOAT32_C(  613.59), SIMDE_FLOAT32_C( -997.50),
                         SIMDE_FLOAT32_C( -875.20), SIMDE_FLOAT32_C(   61.16)),
      simde_mm256_set_epi32(INT32_C(-392), INT32_C(-566), INT32_C( -30), INT32_C(-336),
                            INT32_C( 614), INT32_C(-998), INT32_C(-875), INT32_C(  61)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -166.04), SIMDE_FLOAT32_C( -427.79),
                         SIMDE_FLOAT32_C(  609.54), SIMDE_FLOAT32_C( -532.28),
                         SIMDE_FLOAT32_C( -681.53), SIMDE_FLOAT32_C(  504.43),
                         SIMDE_FLOAT32_C( -888.34), SIMDE_FLOAT32_C(  403.18)),
      simde_mm256_set_epi32(INT32_C(-166), INT32_C(-428), INT32_C( 610), INT32_C(-532),
                            INT32_C(-682), INT32_C( 504), INT32_C(-888), INT32_C( 403)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  216.04), SIMDE_FLOAT32_C( -927.91),
                         SIMDE_FLOAT32_C(  780.38), SIMDE_FLOAT32_C(  765.71),
                         SIMDE_FLOAT32_C( -447.72), SIMDE_FLOAT32_C(  -65.81),
                         SIMDE_FLOAT32_C(  716.35), SIMDE_FLOAT32_C(  608.15)),
      simde_mm256_set_epi32(INT32_C( 216), INT32_C(-928), INT32_C( 780), INT32_C( 766),
                            INT32_C(-448), INT32_C( -66), INT32_C( 716), INT32_C( 608)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  971.27), SIMDE_FLOAT32_C( -611.58),
                         SIMDE_FLOAT32_C( -361.36), SIMDE_FLOAT32_C( -851.00),
                         SIMDE_FLOAT32_C(  839.99), SIMDE_FLOAT32_C(  207.87),
                         SIMDE_FLOAT32_C( -947.82), SIMDE_FLOAT32_C( -403.90)),
      simde_mm256_set_epi32(INT32_C( 971), INT32_C(-612), INT32_C(-361), INT32_C(-851),
                            INT32_C( 840), INT32_C( 208), INT32_C(-948), INT32_C(-404)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -507.54), SIMDE_FLOAT32_C(  581.68),
                         SIMDE_FLOAT32_C( -590.23), SIMDE_FLOAT32_C(  417.30),
                         SIMDE_FLOAT32_C(  -87.52), SIMDE_FLOAT32_C( -865.50),
                         SIMDE_FLOAT32_C(  940.51), SIMDE_FLOAT32_C(  910.77)),
      simde_mm256_set_epi32(INT32_C(-508), INT32_C( 582), INT32_C(-590), INT32_C( 417),
                            INT32_C( -88), INT32_C(-866), INT32_C( 941), INT32_C( 911)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_cvtps_epi32(test_vec[i].a);
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_cvtps_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128 a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  846.20), SIMDE_FLOAT32_C(  685.37), SIMDE_FLOAT32_C(  660.41), SIMDE_FLOAT32_C( -309.12)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  846.20), SIMDE_FLOAT64_C(  685.37),
                         SIMDE_FLOAT64_C(  660.41), SIMDE_FLOAT64_C( -309.12)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  900.54), SIMDE_FLOAT32_C(  555.77), SIMDE_FLOAT32_C( -412.48), SIMDE_FLOAT32_C( -684.76)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  900.54), SIMDE_FLOAT64_C(  555.77),
                         SIMDE_FLOAT64_C( -412.48), SIMDE_FLOAT64_C( -684.76)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -530.00), SIMDE_FLOAT32_C(  516.66), SIMDE_FLOAT32_C(  969.93), SIMDE_FLOAT32_C( -956.57)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -530.00), SIMDE_FLOAT64_C(  516.66),
                         SIMDE_FLOAT64_C(  969.93), SIMDE_FLOAT64_C( -956.57)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  876.84), SIMDE_FLOAT32_C(  972.29), SIMDE_FLOAT32_C(  715.44), SIMDE_FLOAT32_C(  -66.38)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  876.84), SIMDE_FLOAT64_C(  972.29),
                         SIMDE_FLOAT64_C(  715.44), SIMDE_FLOAT64_C(  -66.38)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -684.20), SIMDE_FLOAT32_C( -317.33), SIMDE_FLOAT32_C(   88.04), SIMDE_FLOAT32_C(  992.34)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -684.20), SIMDE_FLOAT64_C( -317.33),
                         SIMDE_FLOAT64_C(   88.04), SIMDE_FLOAT64_C(  992.34)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  481.41), SIMDE_FLOAT32_C( -117.39), SIMDE_FLOAT32_C(  583.52), SIMDE_FLOAT32_C(  373.09)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  481.41), SIMDE_FLOAT64_C( -117.39),
                         SIMDE_FLOAT64_C(  583.52), SIMDE_FLOAT64_C(  373.09)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  798.14), SIMDE_FLOAT32_C(  486.93), SIMDE_FLOAT32_C( -832.59), SIMDE_FLOAT32_C(  796.43)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  798.14), SIMDE_FLOAT64_C(  486.93),
                         SIMDE_FLOAT64_C( -832.59), SIMDE_FLOAT64_C(  796.43)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   75.03), SIMDE_FLOAT32_C(  634.86), SIMDE_FLOAT32_C(  319.54), SIMDE_FLOAT32_C( -801.15)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   75.03), SIMDE_FLOAT64_C(  634.86),
                         SIMDE_FLOAT64_C(  319.54), SIMDE_FLOAT64_C( -801.15)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_cvtps_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_cvtsd_f64 (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float64 a[4];
    const simde_float64 r;
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(  -819.83), SIMDE_FLOAT64_C(    57.52), SIMDE_FLOAT64_C(   410.09), SIMDE_FLOAT64_C(  -207.39) },
      SIMDE_FLOAT64_C(-819.834690) },
    { { SIMDE_FLOAT64_C(  -757.62), SIMDE_FLOAT64_C(   670.35), SIMDE_FLOAT64_C(   -81.02), SIMDE_FLOAT64_C(  -418.01) },
      SIMDE_FLOAT64_C(-757.623120) },
    { { SIMDE_FLOAT64_C(   690.51), SIMDE_FLOAT64_C(  -758.25), SIMDE_FLOAT64_C(   941.18), SIMDE_FLOAT64_C(   596.59) },
      SIMDE_FLOAT64_C(690.512804) },
    { { SIMDE_FLOAT64_C(   816.68), SIMDE_FLOAT64_C(   897.92), SIMDE_FLOAT64_C(   232.01), SIMDE_FLOAT64_C(   196.85) },
      SIMDE_FLOAT64_C(816.682969) },
    { { SIMDE_FLOAT64_C(  -223.12), SIMDE_FLOAT64_C(  -393.67), SIMDE_FLOAT64_C(   665.54), SIMDE_FLOAT64_C(  -958.13) },
      SIMDE_FLOAT64_C(-223.120246) },
    { { SIMDE_FLOAT64_C(  -129.08), SIMDE_FLOAT64_C(  -576.73), SIMDE_FLOAT64_C(  -588.43), SIMDE_FLOAT64_C(  -263.46) },
      SIMDE_FLOAT64_C(-129.075903) },
    { { SIMDE_FLOAT64_C(   621.84), SIMDE_FLOAT64_C(   726.67), SIMDE_FLOAT64_C(   204.36), SIMDE_FLOAT64_C(  -348.61) },
      SIMDE_FLOAT64_C(621.835955) },
    { { SIMDE_FLOAT64_C(   679.52), SIMDE_FLOAT64_C(  -219.86), SIMDE_FLOAT64_C(   812.70), SIMDE_FLOAT64_C(   859.69) },
      SIMDE_FLOAT64_C(679.523220) },
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a);
    simde_float64 r = simde_mm256_cvtsd_f64(a);
    simde_assert_equal_f64(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_cvtsi256_si32 (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const int32_t a[8];
    const int32_t r;
  } test_vec[] = {
    { {  INT32_C(  1220605077), -INT32_C(  1798958528),  INT32_C(  1922714309),  INT32_C(  1773856549), -INT32_C(  1781872958), -INT32_C(  1646416163),  INT32_C(  1580794507), -INT32_C(   381163180) },
      INT32_C(1220605077) },
    { {  INT32_C(   506595294), -INT32_C(   541919207),  INT32_C(  1733381442), -INT32_C(   137360588), -INT32_C(  1349739822), -INT32_C(   749967032), -INT32_C(  1070496148),  INT32_C(  1302952047) },
      INT32_C(506595294) },
    { { -INT32_C(  1720919169),  INT32_C(   360193747), -INT32_C(  1602434709), -INT32_C(  1500033580),  INT32_C(   810951655), -INT32_C(   133979508), -INT32_C(  1732758232),  INT32_C(   770007725) },
      INT32_C(-1720919169) },
    { {  INT32_C(   298209597), -INT32_C(   618250640), -INT32_C(   612654329), -INT32_C(   679341328), -INT32_C(  1039673291), -INT32_C(  1548088454), -INT32_C(   314870976),  INT32_C(   303702229) },
      INT32_C(298209597) },
    { { -INT32_C(   500965262),  INT32_C(   633162270), -INT32_C(   587122195), -INT32_C(  2118876341), -INT32_C(   716981157),  INT32_C(   125369799),  INT32_C(  1173664624),  INT32_C(  1180176340) },
      INT32_C(-500965262) },
    { {  INT32_C(   237534191), -INT32_C(  1305221691),  INT32_C(  1787769886),  INT32_C(   317408439), -INT32_C(   941085184), -INT32_C(  1664196565), -INT32_C(   387857900), -INT32_C(  1053935151) },
      INT32_C(237534191) },
    { {  INT32_C(  2060408501),  INT32_C(  1512899131), -INT32_C(   289096649),  INT32_C(      110336),  INT32_C(   147319261),  INT32_C(  1571067209),  INT32_C(   692422232),  INT32_C(  1961522111) },
      INT32_C(2060408501) },
    { {  INT32_C(    99596745), -INT32_C(   211870532), -INT32_C(   639556648), -INT32_C(  1344675118),  INT32_C(   364356043), -INT32_C(  1905107914), -INT32_C(  1581729566), -INT32_C(   183066069) },
      INT32_C(99596745) },
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a);
    int32_t r = simde_mm256_cvtsi256_si32(a);
    simde_assert_equal_i32(r, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_cvtss_f32 (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float32 a[8];
    const simde_float32 r;
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(  -520.55), SIMDE_FLOAT32_C(  -165.02), SIMDE_FLOAT32_C(   891.39), SIMDE_FLOAT32_C(   353.19),
        SIMDE_FLOAT32_C(   212.08), SIMDE_FLOAT32_C(  -507.29), SIMDE_FLOAT32_C(   216.85), SIMDE_FLOAT32_C(  -490.64) },
      SIMDE_FLOAT32_C(-520.547485) },
    { { SIMDE_FLOAT32_C(  -205.41), SIMDE_FLOAT32_C(   942.67), SIMDE_FLOAT32_C(   205.37), SIMDE_FLOAT32_C(  -558.52),
        SIMDE_FLOAT32_C(   450.12), SIMDE_FLOAT32_C(  -888.95), SIMDE_FLOAT32_C(    95.41), SIMDE_FLOAT32_C(  -731.65) },
      SIMDE_FLOAT32_C(-205.409851) },
    { { SIMDE_FLOAT32_C(   958.49), SIMDE_FLOAT32_C(   649.60), SIMDE_FLOAT32_C(   758.41), SIMDE_FLOAT32_C(  -641.04),
        SIMDE_FLOAT32_C(   632.75), SIMDE_FLOAT32_C(  -642.73), SIMDE_FLOAT32_C(  -156.06), SIMDE_FLOAT32_C(   828.00) },
      SIMDE_FLOAT32_C(958.494385) },
    { { SIMDE_FLOAT32_C(  -172.13), SIMDE_FLOAT32_C(   270.97), SIMDE_FLOAT32_C(  -877.82), SIMDE_FLOAT32_C(  -648.51),
        SIMDE_FLOAT32_C(   726.04), SIMDE_FLOAT32_C(   208.53), SIMDE_FLOAT32_C(   427.83), SIMDE_FLOAT32_C(  -794.51) },
      SIMDE_FLOAT32_C(-172.134399) },
    { { SIMDE_FLOAT32_C(  -956.50), SIMDE_FLOAT32_C(   319.22), SIMDE_FLOAT32_C(   558.68), SIMDE_FLOAT32_C(   255.58),
        SIMDE_FLOAT32_C(   811.93), SIMDE_FLOAT32_C(  -224.47), SIMDE_FLOAT32_C(   764.95), SIMDE_FLOAT32_C(  -393.48) },
      SIMDE_FLOAT32_C(-956.495544) },
    { { SIMDE_FLOAT32_C(  -281.80), SIMDE_FLOAT32_C(   -29.68), SIMDE_FLOAT32_C(    48.00), SIMDE_FLOAT32_C(  -831.68),
        SIMDE_FLOAT32_C(    81.37), SIMDE_FLOAT32_C(  -856.59), SIMDE_FLOAT32_C(  -563.33), SIMDE_FLOAT32_C(    39.86) },
      SIMDE_FLOAT32_C(-281.800598) },
    { { SIMDE_FLOAT32_C(   793.01), SIMDE_FLOAT32_C(  -804.92), SIMDE_FLOAT32_C(   398.82), SIMDE_FLOAT32_C(   425.76),
        SIMDE_FLOAT32_C(  -447.64), SIMDE_FLOAT32_C(  -757.23), SIMDE_FLOAT32_C(   253.75), SIMDE_FLOAT32_C(   380.22) },
      SIMDE_FLOAT32_C(793.009399) },
    { { SIMDE_FLOAT32_C(   513.74), SIMDE_FLOAT32_C(   375.93), SIMDE_FLOAT32_C(   731.71), SIMDE_FLOAT32_C(   239.78),
        SIMDE_FLOAT32_C(  -415.54), SIMDE_FLOAT32_C(   159.54), SIMDE_FLOAT32_C(   445.27), SIMDE_FLOAT32_C(  -372.04) },
      SIMDE_FLOAT32_C(513.740845) },
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a);
    simde_float32 r = simde_mm256_cvtss_f32(a);
    simde_assert_equal_f32(r, test_vec[i].r, 1);
  }

  return 0;
}
static int
test_simde_mm256_cvttpd_epi32(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m128i r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -175.82), SIMDE_FLOAT64_C(  -91.19),
                         SIMDE_FLOAT64_C( -855.64), SIMDE_FLOAT64_C(-1000.00)),
      simde_mm_set_epi32(INT32_C(-175), INT32_C( -91), INT32_C(-855), INT32_C(-1000)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  296.54), SIMDE_FLOAT64_C(  312.22),
                         SIMDE_FLOAT64_C( -648.31), SIMDE_FLOAT64_C(  586.65)),
      simde_mm_set_epi32(INT32_C( 296), INT32_C( 312), INT32_C(-648), INT32_C( 586)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  502.08), SIMDE_FLOAT64_C( -904.00),
                         SIMDE_FLOAT64_C(  802.10), SIMDE_FLOAT64_C(  616.09)),
      simde_mm_set_epi32(INT32_C( 502), INT32_C(-904), INT32_C( 802), INT32_C( 616)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  808.28), SIMDE_FLOAT64_C( -212.29),
                         SIMDE_FLOAT64_C(   75.93), SIMDE_FLOAT64_C( -979.81)),
      simde_mm_set_epi32(INT32_C( 808), INT32_C(-212), INT32_C(  75), INT32_C(-979)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -426.54), SIMDE_FLOAT64_C(  577.52),
                         SIMDE_FLOAT64_C(  966.87), SIMDE_FLOAT64_C(  162.81)),
      simde_mm_set_epi32(INT32_C(-426), INT32_C( 577), INT32_C( 966), INT32_C( 162)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -50.83), SIMDE_FLOAT64_C(  928.40),
                         SIMDE_FLOAT64_C(  392.36), SIMDE_FLOAT64_C(  469.60)),
      simde_mm_set_epi32(INT32_C( -50), INT32_C( 928), INT32_C( 392), INT32_C( 469)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  701.55), SIMDE_FLOAT64_C( -334.03),
                         SIMDE_FLOAT64_C(  803.63), SIMDE_FLOAT64_C(  -68.22)),
      simde_mm_set_epi32(INT32_C( 701), INT32_C(-334), INT32_C( 803), INT32_C( -68)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -168.66), SIMDE_FLOAT64_C( -164.87),
                         SIMDE_FLOAT64_C(  824.77), SIMDE_FLOAT64_C( -834.37)),
      simde_mm_set_epi32(INT32_C(-168), INT32_C(-164), INT32_C( 824), INT32_C(-834)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128i r = simde_mm256_cvttpd_epi32(test_vec[i].a);
    simde_assert_m128i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_cvttps_epi32(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -135.75), SIMDE_FLOAT32_C(  534.39),
                         SIMDE_FLOAT32_C(  -81.93), SIMDE_FLOAT32_C( -234.94),
                         SIMDE_FLOAT32_C( -390.94), SIMDE_FLOAT32_C( -625.05),
                         SIMDE_FLOAT32_C(  991.22), SIMDE_FLOAT32_C(  326.76)),
      simde_mm256_set_epi32(INT32_C(-135), INT32_C( 534), INT32_C( -81), INT32_C(-234),
                            INT32_C(-390), INT32_C(-625), INT32_C( 991), INT32_C( 326)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  864.35), SIMDE_FLOAT32_C(  855.52),
                         SIMDE_FLOAT32_C( -619.15), SIMDE_FLOAT32_C( -985.70),
                         SIMDE_FLOAT32_C( -511.44), SIMDE_FLOAT32_C(  327.81),
                         SIMDE_FLOAT32_C(   88.57), SIMDE_FLOAT32_C(  775.15)),
      simde_mm256_set_epi32(INT32_C( 864), INT32_C( 855), INT32_C(-619), INT32_C(-985),
                            INT32_C(-511), INT32_C( 327), INT32_C(  88), INT32_C( 775)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  464.19), SIMDE_FLOAT32_C( -184.66),
                         SIMDE_FLOAT32_C(  626.41), SIMDE_FLOAT32_C(  -54.43),
                         SIMDE_FLOAT32_C(  810.20), SIMDE_FLOAT32_C(  906.68),
                         SIMDE_FLOAT32_C(  -63.04), SIMDE_FLOAT32_C( -182.48)),
      simde_mm256_set_epi32(INT32_C( 464), INT32_C(-184), INT32_C( 626), INT32_C( -54),
                            INT32_C( 810), INT32_C( 906), INT32_C( -63), INT32_C(-182)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  127.49), SIMDE_FLOAT32_C( -473.91),
                         SIMDE_FLOAT32_C( -171.73), SIMDE_FLOAT32_C( -903.89),
                         SIMDE_FLOAT32_C(   73.85), SIMDE_FLOAT32_C( -545.98),
                         SIMDE_FLOAT32_C( -240.40), SIMDE_FLOAT32_C(  286.08)),
      simde_mm256_set_epi32(INT32_C( 127), INT32_C(-473), INT32_C(-171), INT32_C(-903),
                            INT32_C(  73), INT32_C(-545), INT32_C(-240), INT32_C( 286)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -184.43), SIMDE_FLOAT32_C(  171.84),
                         SIMDE_FLOAT32_C( -693.45), SIMDE_FLOAT32_C( -961.18),
                         SIMDE_FLOAT32_C( -527.37), SIMDE_FLOAT32_C(  565.38),
                         SIMDE_FLOAT32_C(  865.23), SIMDE_FLOAT32_C(  998.03)),
      simde_mm256_set_epi32(INT32_C(-184), INT32_C( 171), INT32_C(-693), INT32_C(-961),
                            INT32_C(-527), INT32_C( 565), INT32_C( 865), INT32_C( 998)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  417.68), SIMDE_FLOAT32_C(  795.88),
                         SIMDE_FLOAT32_C( -307.52), SIMDE_FLOAT32_C(   75.71),
                         SIMDE_FLOAT32_C( -179.42), SIMDE_FLOAT32_C( -352.61),
                         SIMDE_FLOAT32_C( -314.52), SIMDE_FLOAT32_C(  250.68)),
      simde_mm256_set_epi32(INT32_C( 417), INT32_C( 795), INT32_C(-307), INT32_C(  75),
                            INT32_C(-179), INT32_C(-352), INT32_C(-314), INT32_C( 250)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   89.37), SIMDE_FLOAT32_C( -634.21),
                         SIMDE_FLOAT32_C(  115.63), SIMDE_FLOAT32_C(  878.23),
                         SIMDE_FLOAT32_C( -321.46), SIMDE_FLOAT32_C(  524.08),
                         SIMDE_FLOAT32_C(  597.19), SIMDE_FLOAT32_C(  940.58)),
      simde_mm256_set_epi32(INT32_C(  89), INT32_C(-634), INT32_C( 115), INT32_C( 878),
                            INT32_C(-321), INT32_C( 524), INT32_C( 597), INT32_C( 940)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -643.91), SIMDE_FLOAT32_C(  102.13),
                         SIMDE_FLOAT32_C(  800.40), SIMDE_FLOAT32_C( -587.08),
                         SIMDE_FLOAT32_C( -734.61), SIMDE_FLOAT32_C(  772.26),
                         SIMDE_FLOAT32_C( -256.23), SIMDE_FLOAT32_C( -452.64)),
      simde_mm256_set_epi32(INT32_C(-643), INT32_C( 102), INT32_C( 800), INT32_C(-587),
                            INT32_C(-734), INT32_C( 772), INT32_C(-256), INT32_C(-452)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_cvttps_epi32(test_vec[i].a);
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_div_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  675.83), SIMDE_FLOAT32_C(  732.26),
                         SIMDE_FLOAT32_C(   -4.57), SIMDE_FLOAT32_C( -168.80),
                         SIMDE_FLOAT32_C( -520.00), SIMDE_FLOAT32_C( -692.17),
                         SIMDE_FLOAT32_C(  934.56), SIMDE_FLOAT32_C(  631.79)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -378.09), SIMDE_FLOAT32_C( -656.19),
                         SIMDE_FLOAT32_C( -265.99), SIMDE_FLOAT32_C( -457.08),
                         SIMDE_FLOAT32_C( -481.51), SIMDE_FLOAT32_C(  732.73),
                         SIMDE_FLOAT32_C(  321.36), SIMDE_FLOAT32_C( -269.65)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -1.79), SIMDE_FLOAT32_C(   -1.12),
                         SIMDE_FLOAT32_C(    0.02), SIMDE_FLOAT32_C(    0.37),
                         SIMDE_FLOAT32_C(    1.08), SIMDE_FLOAT32_C(   -0.94),
                         SIMDE_FLOAT32_C(    2.91), SIMDE_FLOAT32_C(   -2.34)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  118.79), SIMDE_FLOAT32_C(  887.62),
                         SIMDE_FLOAT32_C(  493.85), SIMDE_FLOAT32_C( -554.19),
                         SIMDE_FLOAT32_C(  954.00), SIMDE_FLOAT32_C( -438.20),
                         SIMDE_FLOAT32_C(  457.40), SIMDE_FLOAT32_C( -597.80)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  136.05), SIMDE_FLOAT32_C( -648.50),
                         SIMDE_FLOAT32_C(  975.99), SIMDE_FLOAT32_C(  125.14),
                         SIMDE_FLOAT32_C(  391.49), SIMDE_FLOAT32_C( -989.28),
                         SIMDE_FLOAT32_C( -980.53), SIMDE_FLOAT32_C(  107.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.87), SIMDE_FLOAT32_C(   -1.37),
                         SIMDE_FLOAT32_C(    0.51), SIMDE_FLOAT32_C(   -4.43),
                         SIMDE_FLOAT32_C(    2.44), SIMDE_FLOAT32_C(    0.44),
                         SIMDE_FLOAT32_C(   -0.47), SIMDE_FLOAT32_C(   -5.57)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  788.25), SIMDE_FLOAT32_C( -786.20),
                         SIMDE_FLOAT32_C( -386.26), SIMDE_FLOAT32_C( -761.33),
                         SIMDE_FLOAT32_C(  307.75), SIMDE_FLOAT32_C(  863.78),
                         SIMDE_FLOAT32_C(  634.25), SIMDE_FLOAT32_C(  687.96)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  593.79), SIMDE_FLOAT32_C( -856.02),
                         SIMDE_FLOAT32_C(  504.54), SIMDE_FLOAT32_C(  553.51),
                         SIMDE_FLOAT32_C(  287.73), SIMDE_FLOAT32_C( -351.53),
                         SIMDE_FLOAT32_C( -572.54), SIMDE_FLOAT32_C(  264.37)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    1.33), SIMDE_FLOAT32_C(    0.92),
                         SIMDE_FLOAT32_C(   -0.77), SIMDE_FLOAT32_C(   -1.38),
                         SIMDE_FLOAT32_C(    1.07), SIMDE_FLOAT32_C(   -2.46),
                         SIMDE_FLOAT32_C(   -1.11), SIMDE_FLOAT32_C(    2.60)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  139.28), SIMDE_FLOAT32_C(  906.17),
                         SIMDE_FLOAT32_C(  486.23), SIMDE_FLOAT32_C(  556.78),
                         SIMDE_FLOAT32_C( -178.50), SIMDE_FLOAT32_C( -222.99),
                         SIMDE_FLOAT32_C(  642.44), SIMDE_FLOAT32_C(  839.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -272.29), SIMDE_FLOAT32_C( -752.32),
                         SIMDE_FLOAT32_C( -335.75), SIMDE_FLOAT32_C( -704.13),
                         SIMDE_FLOAT32_C(  526.15), SIMDE_FLOAT32_C( -407.90),
                         SIMDE_FLOAT32_C(  -13.13), SIMDE_FLOAT32_C( -893.18)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.51), SIMDE_FLOAT32_C(   -1.20),
                         SIMDE_FLOAT32_C(   -1.45), SIMDE_FLOAT32_C(   -0.79),
                         SIMDE_FLOAT32_C(   -0.34), SIMDE_FLOAT32_C(    0.55),
                         SIMDE_FLOAT32_C(  -48.93), SIMDE_FLOAT32_C(   -0.94)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -332.01), SIMDE_FLOAT32_C( -279.46),
                         SIMDE_FLOAT32_C(  360.69), SIMDE_FLOAT32_C( -121.43),
                         SIMDE_FLOAT32_C(  819.79), SIMDE_FLOAT32_C(  512.44),
                         SIMDE_FLOAT32_C( -185.75), SIMDE_FLOAT32_C(  503.23)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  621.10), SIMDE_FLOAT32_C(  514.79),
                         SIMDE_FLOAT32_C( -672.95), SIMDE_FLOAT32_C(  -62.01),
                         SIMDE_FLOAT32_C( -155.11), SIMDE_FLOAT32_C(  518.20),
                         SIMDE_FLOAT32_C( -704.10), SIMDE_FLOAT32_C( -249.95)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.53), SIMDE_FLOAT32_C(   -0.54),
                         SIMDE_FLOAT32_C(   -0.54), SIMDE_FLOAT32_C(    1.96),
                         SIMDE_FLOAT32_C(   -5.29), SIMDE_FLOAT32_C(    0.99),
                         SIMDE_FLOAT32_C(    0.26), SIMDE_FLOAT32_C(   -2.01)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -784.88), SIMDE_FLOAT32_C( -855.22),
                         SIMDE_FLOAT32_C( -347.04), SIMDE_FLOAT32_C( -241.02),
                         SIMDE_FLOAT32_C(  748.57), SIMDE_FLOAT32_C( -179.02),
                         SIMDE_FLOAT32_C( -995.77), SIMDE_FLOAT32_C( -927.09)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  868.43), SIMDE_FLOAT32_C(    9.90),
                         SIMDE_FLOAT32_C( -308.85), SIMDE_FLOAT32_C( -944.06),
                         SIMDE_FLOAT32_C( -323.62), SIMDE_FLOAT32_C(  739.02),
                         SIMDE_FLOAT32_C(  -61.38), SIMDE_FLOAT32_C(  426.14)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.90), SIMDE_FLOAT32_C(  -86.39),
                         SIMDE_FLOAT32_C(    1.12), SIMDE_FLOAT32_C(    0.26),
                         SIMDE_FLOAT32_C(   -2.31), SIMDE_FLOAT32_C(   -0.24),
                         SIMDE_FLOAT32_C(   16.22), SIMDE_FLOAT32_C(   -2.18)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  307.07), SIMDE_FLOAT32_C(  591.05),
                         SIMDE_FLOAT32_C( -630.36), SIMDE_FLOAT32_C(  826.28),
                         SIMDE_FLOAT32_C( -436.93), SIMDE_FLOAT32_C(  982.53),
                         SIMDE_FLOAT32_C( -808.08), SIMDE_FLOAT32_C(  630.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -339.08), SIMDE_FLOAT32_C( -235.71),
                         SIMDE_FLOAT32_C( -196.92), SIMDE_FLOAT32_C(   27.62),
                         SIMDE_FLOAT32_C( -443.69), SIMDE_FLOAT32_C(  242.35),
                         SIMDE_FLOAT32_C(  774.01), SIMDE_FLOAT32_C(  833.91)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.91), SIMDE_FLOAT32_C(   -2.51),
                         SIMDE_FLOAT32_C(    3.20), SIMDE_FLOAT32_C(   29.92),
                         SIMDE_FLOAT32_C(    0.98), SIMDE_FLOAT32_C(    4.05),
                         SIMDE_FLOAT32_C(   -1.04), SIMDE_FLOAT32_C(    0.76)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -830.19), SIMDE_FLOAT32_C( -620.56),
                         SIMDE_FLOAT32_C( -306.38), SIMDE_FLOAT32_C( -602.04),
                         SIMDE_FLOAT32_C(  183.46), SIMDE_FLOAT32_C(  824.79),
                         SIMDE_FLOAT32_C( -492.06), SIMDE_FLOAT32_C( -609.65)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -335.15), SIMDE_FLOAT32_C( -390.73),
                         SIMDE_FLOAT32_C(  951.40), SIMDE_FLOAT32_C(  398.19),
                         SIMDE_FLOAT32_C(  181.71), SIMDE_FLOAT32_C( -932.03),
                         SIMDE_FLOAT32_C(  887.77), SIMDE_FLOAT32_C(  257.75)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    2.48), SIMDE_FLOAT32_C(    1.59),
                         SIMDE_FLOAT32_C(   -0.32), SIMDE_FLOAT32_C(   -1.51),
                         SIMDE_FLOAT32_C(    1.01), SIMDE_FLOAT32_C(   -0.88),
                         SIMDE_FLOAT32_C(   -0.55), SIMDE_FLOAT32_C(   -2.37)) }
  };
  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_div_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_div_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   42.76), SIMDE_FLOAT64_C(  925.42),
                         SIMDE_FLOAT64_C(  624.80), SIMDE_FLOAT64_C(  413.87)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -621.50), SIMDE_FLOAT64_C( -651.30),
                         SIMDE_FLOAT64_C( -233.59), SIMDE_FLOAT64_C( -713.35)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -0.07), SIMDE_FLOAT64_C(   -1.42),
                         SIMDE_FLOAT64_C(   -2.67), SIMDE_FLOAT64_C(   -0.58)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  275.93), SIMDE_FLOAT64_C(  360.88),
                         SIMDE_FLOAT64_C(   -7.47), SIMDE_FLOAT64_C( -347.34)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  479.86), SIMDE_FLOAT64_C(  205.26),
                         SIMDE_FLOAT64_C(  174.68), SIMDE_FLOAT64_C(  363.12)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.58), SIMDE_FLOAT64_C(    1.76),
                         SIMDE_FLOAT64_C(   -0.04), SIMDE_FLOAT64_C(   -0.96)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  977.82), SIMDE_FLOAT64_C(  875.25),
                         SIMDE_FLOAT64_C(  775.86), SIMDE_FLOAT64_C(  314.76)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -748.72), SIMDE_FLOAT64_C(  258.24),
                         SIMDE_FLOAT64_C( -578.49), SIMDE_FLOAT64_C( -708.35)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -1.31), SIMDE_FLOAT64_C(    3.39),
                         SIMDE_FLOAT64_C(   -1.34), SIMDE_FLOAT64_C(   -0.44)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  617.56), SIMDE_FLOAT64_C( -254.21),
                         SIMDE_FLOAT64_C( -890.06), SIMDE_FLOAT64_C( -996.38)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -247.08), SIMDE_FLOAT64_C(  661.94),
                         SIMDE_FLOAT64_C( -120.93), SIMDE_FLOAT64_C( -574.61)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -2.50), SIMDE_FLOAT64_C(   -0.38),
                         SIMDE_FLOAT64_C(    7.36), SIMDE_FLOAT64_C(    1.73)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -932.29), SIMDE_FLOAT64_C( -263.62),
                         SIMDE_FLOAT64_C( -571.69), SIMDE_FLOAT64_C(  -83.26)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  395.02), SIMDE_FLOAT64_C(  290.85),
                         SIMDE_FLOAT64_C( -853.00), SIMDE_FLOAT64_C(  928.61)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -2.36), SIMDE_FLOAT64_C(   -0.91),
                         SIMDE_FLOAT64_C(    0.67), SIMDE_FLOAT64_C(   -0.09)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -109.60), SIMDE_FLOAT64_C( -812.14),
                         SIMDE_FLOAT64_C( -474.36), SIMDE_FLOAT64_C( -732.62)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -982.62), SIMDE_FLOAT64_C( -995.34),
                         SIMDE_FLOAT64_C(  -51.94), SIMDE_FLOAT64_C(  973.17)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.11), SIMDE_FLOAT64_C(    0.82),
                         SIMDE_FLOAT64_C(    9.13), SIMDE_FLOAT64_C(   -0.75)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -826.31), SIMDE_FLOAT64_C( -333.99),
                         SIMDE_FLOAT64_C( -238.49), SIMDE_FLOAT64_C( -706.13)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  929.44), SIMDE_FLOAT64_C(  493.44),
                         SIMDE_FLOAT64_C( -539.23), SIMDE_FLOAT64_C( -683.88)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -0.89), SIMDE_FLOAT64_C(   -0.68),
                         SIMDE_FLOAT64_C(    0.44), SIMDE_FLOAT64_C(    1.03)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  419.98), SIMDE_FLOAT64_C(  -51.88),
                         SIMDE_FLOAT64_C( -580.15), SIMDE_FLOAT64_C( -198.88)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  246.03), SIMDE_FLOAT64_C( -149.94),
                         SIMDE_FLOAT64_C( -107.67), SIMDE_FLOAT64_C(  875.62)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    1.71), SIMDE_FLOAT64_C(    0.35),
                         SIMDE_FLOAT64_C(    5.39), SIMDE_FLOAT64_C(   -0.23)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_div_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_floor_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  520.72), SIMDE_FLOAT32_C(  834.16),
                         SIMDE_FLOAT32_C( -945.36), SIMDE_FLOAT32_C( -135.41),
                         SIMDE_FLOAT32_C(  289.19), SIMDE_FLOAT32_C(  462.54),
                         SIMDE_FLOAT32_C( -937.67), SIMDE_FLOAT32_C(  706.09)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  520.00), SIMDE_FLOAT32_C(  834.00),
                         SIMDE_FLOAT32_C( -946.00), SIMDE_FLOAT32_C( -136.00),
                         SIMDE_FLOAT32_C(  289.00), SIMDE_FLOAT32_C(  462.00),
                         SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C(  706.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -9.00), SIMDE_FLOAT32_C(  401.24),
                         SIMDE_FLOAT32_C(  899.70), SIMDE_FLOAT32_C( -258.03),
                         SIMDE_FLOAT32_C( -634.92), SIMDE_FLOAT32_C( -438.26),
                         SIMDE_FLOAT32_C(  433.94), SIMDE_FLOAT32_C( -170.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -9.00), SIMDE_FLOAT32_C(  401.00),
                         SIMDE_FLOAT32_C(  899.00), SIMDE_FLOAT32_C( -259.00),
                         SIMDE_FLOAT32_C( -635.00), SIMDE_FLOAT32_C( -439.00),
                         SIMDE_FLOAT32_C(  433.00), SIMDE_FLOAT32_C( -171.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -908.95), SIMDE_FLOAT32_C( -376.33),
                         SIMDE_FLOAT32_C( -639.88), SIMDE_FLOAT32_C(   40.45),
                         SIMDE_FLOAT32_C( -431.46), SIMDE_FLOAT32_C( -404.49),
                         SIMDE_FLOAT32_C( -411.60), SIMDE_FLOAT32_C(  531.65)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -909.00), SIMDE_FLOAT32_C( -377.00),
                         SIMDE_FLOAT32_C( -640.00), SIMDE_FLOAT32_C(   40.00),
                         SIMDE_FLOAT32_C( -432.00), SIMDE_FLOAT32_C( -405.00),
                         SIMDE_FLOAT32_C( -412.00), SIMDE_FLOAT32_C(  531.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -394.79), SIMDE_FLOAT32_C( -654.48),
                         SIMDE_FLOAT32_C(  223.95), SIMDE_FLOAT32_C( -557.45),
                         SIMDE_FLOAT32_C(  908.61), SIMDE_FLOAT32_C( -493.34),
                         SIMDE_FLOAT32_C(  466.68), SIMDE_FLOAT32_C( -301.36)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -395.00), SIMDE_FLOAT32_C( -655.00),
                         SIMDE_FLOAT32_C(  223.00), SIMDE_FLOAT32_C( -558.00),
                         SIMDE_FLOAT32_C(  908.00), SIMDE_FLOAT32_C( -494.00),
                         SIMDE_FLOAT32_C(  466.00), SIMDE_FLOAT32_C( -302.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -5.40), SIMDE_FLOAT32_C(  828.84),
                         SIMDE_FLOAT32_C(  468.99), SIMDE_FLOAT32_C(  665.66),
                         SIMDE_FLOAT32_C( -648.14), SIMDE_FLOAT32_C( -841.90),
                         SIMDE_FLOAT32_C( -380.33), SIMDE_FLOAT32_C(  740.32)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -6.00), SIMDE_FLOAT32_C(  828.00),
                         SIMDE_FLOAT32_C(  468.00), SIMDE_FLOAT32_C(  665.00),
                         SIMDE_FLOAT32_C( -649.00), SIMDE_FLOAT32_C( -842.00),
                         SIMDE_FLOAT32_C( -381.00), SIMDE_FLOAT32_C(  740.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -752.16), SIMDE_FLOAT32_C( -655.98),
                         SIMDE_FLOAT32_C(  902.13), SIMDE_FLOAT32_C(  972.30),
                         SIMDE_FLOAT32_C( -497.57), SIMDE_FLOAT32_C( -530.16),
                         SIMDE_FLOAT32_C( -966.55), SIMDE_FLOAT32_C(  570.95)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -753.00), SIMDE_FLOAT32_C( -656.00),
                         SIMDE_FLOAT32_C(  902.00), SIMDE_FLOAT32_C(  972.00),
                         SIMDE_FLOAT32_C( -498.00), SIMDE_FLOAT32_C( -531.00),
                         SIMDE_FLOAT32_C( -967.00), SIMDE_FLOAT32_C(  570.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  723.18), SIMDE_FLOAT32_C(  202.51),
                         SIMDE_FLOAT32_C(  -41.39), SIMDE_FLOAT32_C( -372.98),
                         SIMDE_FLOAT32_C(  697.91), SIMDE_FLOAT32_C( -303.11),
                         SIMDE_FLOAT32_C( -180.07), SIMDE_FLOAT32_C(  941.44)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  723.00), SIMDE_FLOAT32_C(  202.00),
                         SIMDE_FLOAT32_C(  -42.00), SIMDE_FLOAT32_C( -373.00),
                         SIMDE_FLOAT32_C(  697.00), SIMDE_FLOAT32_C( -304.00),
                         SIMDE_FLOAT32_C( -181.00), SIMDE_FLOAT32_C(  941.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  995.24), SIMDE_FLOAT32_C(   68.12),
                         SIMDE_FLOAT32_C(  284.41), SIMDE_FLOAT32_C(  723.96),
                         SIMDE_FLOAT32_C( -373.95), SIMDE_FLOAT32_C(   15.43),
                         SIMDE_FLOAT32_C( -498.85), SIMDE_FLOAT32_C(  581.12)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  995.00), SIMDE_FLOAT32_C(   68.00),
                         SIMDE_FLOAT32_C(  284.00), SIMDE_FLOAT32_C(  723.00),
                         SIMDE_FLOAT32_C( -374.00), SIMDE_FLOAT32_C(   15.00),
                         SIMDE_FLOAT32_C( -499.00), SIMDE_FLOAT32_C(  581.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_floor_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_extractf128_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m128d ra;
    simde__m128d rb;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  115.05), SIMDE_FLOAT64_C(  580.50),
                         SIMDE_FLOAT64_C(  784.61), SIMDE_FLOAT64_C(    6.02)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -748.60), SIMDE_FLOAT64_C(  328.25),
                         SIMDE_FLOAT64_C( -515.20), SIMDE_FLOAT64_C(  761.63)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  784.61), SIMDE_FLOAT64_C(    6.02)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -748.60), SIMDE_FLOAT64_C(  328.25)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -201.97), SIMDE_FLOAT64_C(  -32.82),
                         SIMDE_FLOAT64_C(  698.56), SIMDE_FLOAT64_C( -504.23)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -706.22), SIMDE_FLOAT64_C(  296.17),
                         SIMDE_FLOAT64_C(  289.51), SIMDE_FLOAT64_C( -515.71)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  698.56), SIMDE_FLOAT64_C( -504.23)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -706.22), SIMDE_FLOAT64_C(  296.17)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  708.15), SIMDE_FLOAT64_C( -171.50),
                         SIMDE_FLOAT64_C(  534.26), SIMDE_FLOAT64_C( -815.83)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -29.20), SIMDE_FLOAT64_C( -861.78),
                         SIMDE_FLOAT64_C(   -7.26), SIMDE_FLOAT64_C(  861.75)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  534.26), SIMDE_FLOAT64_C( -815.83)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.20), SIMDE_FLOAT64_C( -861.78)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  186.47), SIMDE_FLOAT64_C(  690.51),
                         SIMDE_FLOAT64_C( -956.51), SIMDE_FLOAT64_C(  679.80)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  790.70), SIMDE_FLOAT64_C(  327.68),
                         SIMDE_FLOAT64_C(  -42.45), SIMDE_FLOAT64_C(  443.64)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -956.51), SIMDE_FLOAT64_C(  679.80)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  790.70), SIMDE_FLOAT64_C(  327.68)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -320.95), SIMDE_FLOAT64_C(  190.95),
                         SIMDE_FLOAT64_C( -667.22), SIMDE_FLOAT64_C( -985.92)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -263.54), SIMDE_FLOAT64_C(  946.34),
                         SIMDE_FLOAT64_C(  395.23), SIMDE_FLOAT64_C(  318.77)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -667.22), SIMDE_FLOAT64_C( -985.92)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -263.54), SIMDE_FLOAT64_C(  946.34)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  258.92), SIMDE_FLOAT64_C( -434.64),
                         SIMDE_FLOAT64_C(  431.03), SIMDE_FLOAT64_C( -543.52)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   13.27), SIMDE_FLOAT64_C( -706.44),
                         SIMDE_FLOAT64_C(   14.64), SIMDE_FLOAT64_C( -663.76)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  431.03), SIMDE_FLOAT64_C( -543.52)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(   13.27), SIMDE_FLOAT64_C( -706.44)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -867.30), SIMDE_FLOAT64_C(  693.24),
                         SIMDE_FLOAT64_C( -963.86), SIMDE_FLOAT64_C(   73.79)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -726.99), SIMDE_FLOAT64_C(  691.78),
                         SIMDE_FLOAT64_C(  411.83), SIMDE_FLOAT64_C(  204.72)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -963.86), SIMDE_FLOAT64_C(   73.79)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -726.99), SIMDE_FLOAT64_C(  691.78)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -771.52), SIMDE_FLOAT64_C( -673.29),
                         SIMDE_FLOAT64_C( -291.52), SIMDE_FLOAT64_C( -321.79)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  979.05), SIMDE_FLOAT64_C(  161.61),
                         SIMDE_FLOAT64_C(  682.54), SIMDE_FLOAT64_C(   63.94)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -291.52), SIMDE_FLOAT64_C( -321.79)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  979.05), SIMDE_FLOAT64_C(  161.61)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128d ra = simde_mm256_extractf128_pd(test_vec[i].a, 0);
    simde__m128d rb = simde_mm256_extractf128_pd(test_vec[i].b, 1);
    simde_assert_m128d_equal(ra, test_vec[i].ra);
    simde_assert_m128d_equal(rb, test_vec[i].rb);
  }

  return 0;
}

static int
test_simde_mm256_extractf128_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m128 ra;
    simde__m128 rb;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -982.78), SIMDE_FLOAT32_C(  936.88),
                         SIMDE_FLOAT32_C(  412.85), SIMDE_FLOAT32_C( -941.25),
                         SIMDE_FLOAT32_C(  131.34), SIMDE_FLOAT32_C(  565.12),
                         SIMDE_FLOAT32_C( -716.42), SIMDE_FLOAT32_C( -825.93)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  850.34), SIMDE_FLOAT32_C( -992.50),
                         SIMDE_FLOAT32_C(  927.06), SIMDE_FLOAT32_C( -523.74),
                         SIMDE_FLOAT32_C( -670.83), SIMDE_FLOAT32_C(  805.14),
                         SIMDE_FLOAT32_C( -177.24), SIMDE_FLOAT32_C( -739.27)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  131.34), SIMDE_FLOAT32_C(  565.12), SIMDE_FLOAT32_C( -716.42), SIMDE_FLOAT32_C( -825.93)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -982.78), SIMDE_FLOAT32_C(  936.88), SIMDE_FLOAT32_C(  412.85), SIMDE_FLOAT32_C( -941.25)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -70.97), SIMDE_FLOAT32_C( -281.18),
                         SIMDE_FLOAT32_C(  775.52), SIMDE_FLOAT32_C( -398.03),
                         SIMDE_FLOAT32_C(  484.85), SIMDE_FLOAT32_C( -518.53),
                         SIMDE_FLOAT32_C( -204.80), SIMDE_FLOAT32_C( -550.46)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -638.16), SIMDE_FLOAT32_C(   -1.38),
                         SIMDE_FLOAT32_C( -998.42), SIMDE_FLOAT32_C(   63.00),
                         SIMDE_FLOAT32_C(  880.69), SIMDE_FLOAT32_C(  119.17),
                         SIMDE_FLOAT32_C(   35.15), SIMDE_FLOAT32_C( -586.49)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  484.85), SIMDE_FLOAT32_C( -518.53), SIMDE_FLOAT32_C( -204.80), SIMDE_FLOAT32_C( -550.46)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -70.97), SIMDE_FLOAT32_C( -281.18), SIMDE_FLOAT32_C(  775.52), SIMDE_FLOAT32_C( -398.03)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  899.55), SIMDE_FLOAT32_C(  996.09),
                         SIMDE_FLOAT32_C( -135.99), SIMDE_FLOAT32_C(  141.51),
                         SIMDE_FLOAT32_C(  -55.54), SIMDE_FLOAT32_C(  357.72),
                         SIMDE_FLOAT32_C( -706.21), SIMDE_FLOAT32_C(  310.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  434.09), SIMDE_FLOAT32_C(  834.83),
                         SIMDE_FLOAT32_C( -713.54), SIMDE_FLOAT32_C( -262.36),
                         SIMDE_FLOAT32_C(  332.23), SIMDE_FLOAT32_C(  176.07),
                         SIMDE_FLOAT32_C( -753.38), SIMDE_FLOAT32_C( -978.05)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -55.54), SIMDE_FLOAT32_C(  357.72), SIMDE_FLOAT32_C( -706.21), SIMDE_FLOAT32_C(  310.52)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  899.55), SIMDE_FLOAT32_C(  996.09), SIMDE_FLOAT32_C( -135.99), SIMDE_FLOAT32_C(  141.51)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  683.50), SIMDE_FLOAT32_C( -416.59),
                         SIMDE_FLOAT32_C(  629.11), SIMDE_FLOAT32_C(  891.79),
                         SIMDE_FLOAT32_C( -173.40), SIMDE_FLOAT32_C( -666.21),
                         SIMDE_FLOAT32_C( -628.67), SIMDE_FLOAT32_C(  605.77)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -779.97), SIMDE_FLOAT32_C(  302.06),
                         SIMDE_FLOAT32_C(  734.68), SIMDE_FLOAT32_C( -886.16),
                         SIMDE_FLOAT32_C(  707.40), SIMDE_FLOAT32_C(  833.35),
                         SIMDE_FLOAT32_C(  963.40), SIMDE_FLOAT32_C(  412.83)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -173.40), SIMDE_FLOAT32_C( -666.21), SIMDE_FLOAT32_C( -628.67), SIMDE_FLOAT32_C(  605.77)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  683.50), SIMDE_FLOAT32_C( -416.59), SIMDE_FLOAT32_C(  629.11), SIMDE_FLOAT32_C(  891.79)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -802.04), SIMDE_FLOAT32_C(  699.58),
                         SIMDE_FLOAT32_C(  450.94), SIMDE_FLOAT32_C( -386.39),
                         SIMDE_FLOAT32_C(  494.04), SIMDE_FLOAT32_C(  940.54),
                         SIMDE_FLOAT32_C( -934.60), SIMDE_FLOAT32_C( -970.12)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  648.13), SIMDE_FLOAT32_C(  946.05),
                         SIMDE_FLOAT32_C(  808.22), SIMDE_FLOAT32_C(  517.90),
                         SIMDE_FLOAT32_C( -691.53), SIMDE_FLOAT32_C(  525.44),
                         SIMDE_FLOAT32_C( -474.29), SIMDE_FLOAT32_C( -454.31)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  494.04), SIMDE_FLOAT32_C(  940.54), SIMDE_FLOAT32_C( -934.60), SIMDE_FLOAT32_C( -970.12)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -802.04), SIMDE_FLOAT32_C(  699.58), SIMDE_FLOAT32_C(  450.94), SIMDE_FLOAT32_C( -386.39)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -543.05), SIMDE_FLOAT32_C(  -31.72),
                         SIMDE_FLOAT32_C( -407.93), SIMDE_FLOAT32_C(  926.97),
                         SIMDE_FLOAT32_C(  179.76), SIMDE_FLOAT32_C(  712.03),
                         SIMDE_FLOAT32_C(  463.85), SIMDE_FLOAT32_C( -838.23)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  673.45), SIMDE_FLOAT32_C(  368.84),
                         SIMDE_FLOAT32_C(  678.61), SIMDE_FLOAT32_C(  857.34),
                         SIMDE_FLOAT32_C( -482.39), SIMDE_FLOAT32_C(  -94.31),
                         SIMDE_FLOAT32_C(  471.32), SIMDE_FLOAT32_C(  173.80)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  179.76), SIMDE_FLOAT32_C(  712.03), SIMDE_FLOAT32_C(  463.85), SIMDE_FLOAT32_C( -838.23)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -543.05), SIMDE_FLOAT32_C(  -31.72), SIMDE_FLOAT32_C( -407.93), SIMDE_FLOAT32_C(  926.97)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -929.66), SIMDE_FLOAT32_C( -223.82),
                         SIMDE_FLOAT32_C(  340.48), SIMDE_FLOAT32_C(  717.03),
                         SIMDE_FLOAT32_C(  895.13), SIMDE_FLOAT32_C(  964.64),
                         SIMDE_FLOAT32_C( -654.82), SIMDE_FLOAT32_C(   74.87)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  665.16), SIMDE_FLOAT32_C(  209.26),
                         SIMDE_FLOAT32_C(  133.64), SIMDE_FLOAT32_C(  -42.02),
                         SIMDE_FLOAT32_C( -424.16), SIMDE_FLOAT32_C( -122.50),
                         SIMDE_FLOAT32_C( -788.87), SIMDE_FLOAT32_C( -239.50)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  895.13), SIMDE_FLOAT32_C(  964.64), SIMDE_FLOAT32_C( -654.82), SIMDE_FLOAT32_C(   74.87)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -929.66), SIMDE_FLOAT32_C( -223.82), SIMDE_FLOAT32_C(  340.48), SIMDE_FLOAT32_C(  717.03)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  406.90), SIMDE_FLOAT32_C( -672.60),
                         SIMDE_FLOAT32_C(  803.83), SIMDE_FLOAT32_C( -409.89),
                         SIMDE_FLOAT32_C(  549.46), SIMDE_FLOAT32_C(  773.35),
                         SIMDE_FLOAT32_C( -173.87), SIMDE_FLOAT32_C(  365.19)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  381.05), SIMDE_FLOAT32_C( -236.95),
                         SIMDE_FLOAT32_C( -568.89), SIMDE_FLOAT32_C(  375.92),
                         SIMDE_FLOAT32_C(  259.53), SIMDE_FLOAT32_C( -247.84),
                         SIMDE_FLOAT32_C(  166.06), SIMDE_FLOAT32_C( -963.74)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  549.46), SIMDE_FLOAT32_C(  773.35), SIMDE_FLOAT32_C( -173.87), SIMDE_FLOAT32_C(  365.19)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  406.90), SIMDE_FLOAT32_C( -672.60), SIMDE_FLOAT32_C(  803.83), SIMDE_FLOAT32_C( -409.89)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128 ra = simde_mm256_extractf128_ps(test_vec[i].a, 0);
    simde__m128 rb = simde_mm256_extractf128_ps(test_vec[i].a, 1);
    simde_assert_m128_close(ra, test_vec[i].ra, 1);
    simde_assert_m128_close(rb, test_vec[i].rb, 1);
  }

  return 0;
}

static int
test_simde_mm256_extractf128_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i b;
    simde__m128i ra;
    simde__m128i rb;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C( 1229394801), INT32_C(  992221618), INT32_C(-1388107406), INT32_C(  780445625),
                            INT32_C( 1795700153), INT32_C( -297324271), INT32_C( 1549329146), INT32_C( -534963225)),
      simde_mm256_set_epi32(INT32_C( -867719772), INT32_C(-1804212438), INT32_C( 1849818353), INT32_C(  405560893),
                            INT32_C( 1351338386), INT32_C( -886724662), INT32_C( 1197680760), INT32_C( 1003042592)),
      simde_mm_set_epi32(INT32_C( 1795700153), INT32_C( -297324271), INT32_C( 1549329146), INT32_C( -534963225)),
      simde_mm_set_epi32(INT32_C( 1229394801), INT32_C(  992221618), INT32_C(-1388107406), INT32_C(  780445625)) },
    { simde_mm256_set_epi32(INT32_C( 1839397279), INT32_C( -585358334), INT32_C(  779178160), INT32_C( -362976984),
                            INT32_C(-1015866216), INT32_C( -556342867), INT32_C( -766379029), INT32_C( -130781639)),
      simde_mm256_set_epi32(INT32_C( 2021215895), INT32_C( -422044488), INT32_C(-1385706777), INT32_C(   22702781),
                            INT32_C( 1076807274), INT32_C(-1923875401), INT32_C( -554846936), INT32_C(-1008226174)),
      simde_mm_set_epi32(INT32_C(-1015866216), INT32_C( -556342867), INT32_C( -766379029), INT32_C( -130781639)),
      simde_mm_set_epi32(INT32_C( 1839397279), INT32_C( -585358334), INT32_C(  779178160), INT32_C( -362976984)) },
    { simde_mm256_set_epi32(INT32_C(   23865749), INT32_C( -908972624), INT32_C(-1642418179), INT32_C(  111689864),
                            INT32_C( -835154412), INT32_C( -431540196), INT32_C( -161564683), INT32_C(  204589457)),
      simde_mm256_set_epi32(INT32_C( 2088662618), INT32_C(-1671363325), INT32_C( -997695043), INT32_C( -809764814),
                            INT32_C( 2092581708), INT32_C(-1073689737), INT32_C(-1556963227), INT32_C( -641330488)),
      simde_mm_set_epi32(INT32_C( -835154412), INT32_C( -431540196), INT32_C( -161564683), INT32_C(  204589457)),
      simde_mm_set_epi32(INT32_C(   23865749), INT32_C( -908972624), INT32_C(-1642418179), INT32_C(  111689864)) },
    { simde_mm256_set_epi32(INT32_C( 1727711569), INT32_C(-1915329589), INT32_C( -979233658), INT32_C( -409203179),
                            INT32_C( 1343207861), INT32_C(-1541174422), INT32_C(-2097250480), INT32_C(-1382492089)),
      simde_mm256_set_epi32(INT32_C(  -42175512), INT32_C(-2146588690), INT32_C(-1902868938), INT32_C( 1919945739),
                            INT32_C(  410749235), INT32_C(-1828962645), INT32_C(  525862553), INT32_C( -282512400)),
      simde_mm_set_epi32(INT32_C( 1343207861), INT32_C(-1541174422), INT32_C(-2097250480), INT32_C(-1382492089)),
      simde_mm_set_epi32(INT32_C( 1727711569), INT32_C(-1915329589), INT32_C( -979233658), INT32_C( -409203179)) },
    { simde_mm256_set_epi32(INT32_C(  377369527), INT32_C( 1159197718), INT32_C(  288677560), INT32_C(  828517622),
                            INT32_C( 1815109517), INT32_C( 1103735854), INT32_C( 1342116414), INT32_C( 1750949195)),
      simde_mm256_set_epi32(INT32_C( 1481955155), INT32_C( -119794855), INT32_C(-2109995042), INT32_C(  582656481),
                            INT32_C( 1178951500), INT32_C(  762286037), INT32_C(  628377158), INT32_C( -188026020)),
      simde_mm_set_epi32(INT32_C( 1815109517), INT32_C( 1103735854), INT32_C( 1342116414), INT32_C( 1750949195)),
      simde_mm_set_epi32(INT32_C(  377369527), INT32_C( 1159197718), INT32_C(  288677560), INT32_C(  828517622)) },
    { simde_mm256_set_epi32(INT32_C(-1996051424), INT32_C( -314294760), INT32_C( -770521150), INT32_C(  508113145),
                            INT32_C( -677093043), INT32_C( -527636644), INT32_C( 1238565466), INT32_C(-1592387355)),
      simde_mm256_set_epi32(INT32_C(-1510707643), INT32_C( 1988531398), INT32_C(-1182276921), INT32_C(  363503044),
                            INT32_C( 2086268932), INT32_C( -428647595), INT32_C( 1685321543), INT32_C( 1979089365)),
      simde_mm_set_epi32(INT32_C( -677093043), INT32_C( -527636644), INT32_C( 1238565466), INT32_C(-1592387355)),
      simde_mm_set_epi32(INT32_C(-1996051424), INT32_C( -314294760), INT32_C( -770521150), INT32_C(  508113145)) },
    { simde_mm256_set_epi32(INT32_C(    4593159), INT32_C( 1779671737), INT32_C( -569674634), INT32_C( -184254965),
                            INT32_C( -665786654), INT32_C(  663766301), INT32_C(-1237697897), INT32_C( -260948936)),
      simde_mm256_set_epi32(INT32_C( -575114102), INT32_C( -399786699), INT32_C(-1468780124), INT32_C( 2032090700),
                            INT32_C(  723386747), INT32_C(-1766232746), INT32_C(   73837413), INT32_C(  496540408)),
      simde_mm_set_epi32(INT32_C( -665786654), INT32_C(  663766301), INT32_C(-1237697897), INT32_C( -260948936)),
      simde_mm_set_epi32(INT32_C(    4593159), INT32_C( 1779671737), INT32_C( -569674634), INT32_C( -184254965)) },
    { simde_mm256_set_epi32(INT32_C( -328197013), INT32_C( 1036318270), INT32_C(-1930293157), INT32_C( 1948339432),
                            INT32_C( 1903716614), INT32_C(-1951673698), INT32_C(-1858071379), INT32_C( 2070124471)),
      simde_mm256_set_epi32(INT32_C(-1815372819), INT32_C( -102535612), INT32_C(  115383384), INT32_C( 1004544095),
                            INT32_C( 1506420054), INT32_C(-1014523798), INT32_C(-1776388104), INT32_C( 1550371104)),
      simde_mm_set_epi32(INT32_C( 1903716614), INT32_C(-1951673698), INT32_C(-1858071379), INT32_C( 2070124471)),
      simde_mm_set_epi32(INT32_C( -328197013), INT32_C( 1036318270), INT32_C(-1930293157), INT32_C( 1948339432)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128i ra = simde_mm256_extractf128_si256(test_vec[i].a, 0);
    simde__m128i rb = simde_mm256_extractf128_si256(test_vec[i].a, 1);
    simde_assert_m128i_i32(ra, ==, test_vec[i].ra);
    simde_assert_m128i_i32(rb, ==, test_vec[i].rb);
  }

  return 0;
}

static int
test_simde_mm256_floor_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -86.60), SIMDE_FLOAT64_C(  -29.62),
                         SIMDE_FLOAT64_C(  880.65), SIMDE_FLOAT64_C(  474.01)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -87.00), SIMDE_FLOAT64_C(  -30.00),
                         SIMDE_FLOAT64_C(  880.00), SIMDE_FLOAT64_C(  474.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  335.44), SIMDE_FLOAT64_C(   87.17),
                         SIMDE_FLOAT64_C(  264.70), SIMDE_FLOAT64_C(  435.92)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  335.00), SIMDE_FLOAT64_C(   87.00),
                         SIMDE_FLOAT64_C(  264.00), SIMDE_FLOAT64_C(  435.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  657.73), SIMDE_FLOAT64_C( -255.01),
                         SIMDE_FLOAT64_C( -236.61), SIMDE_FLOAT64_C(  198.74)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  657.00), SIMDE_FLOAT64_C( -256.00),
                         SIMDE_FLOAT64_C( -237.00), SIMDE_FLOAT64_C(  198.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -459.75), SIMDE_FLOAT64_C(  234.86),
                         SIMDE_FLOAT64_C( -517.66), SIMDE_FLOAT64_C( -561.05)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -460.00), SIMDE_FLOAT64_C(  234.00),
                         SIMDE_FLOAT64_C( -518.00), SIMDE_FLOAT64_C( -562.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -815.90), SIMDE_FLOAT64_C( -973.26),
                         SIMDE_FLOAT64_C( -704.97), SIMDE_FLOAT64_C(  629.57)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -816.00), SIMDE_FLOAT64_C( -974.00),
                         SIMDE_FLOAT64_C( -705.00), SIMDE_FLOAT64_C(  629.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -147.09), SIMDE_FLOAT64_C( -283.85),
                         SIMDE_FLOAT64_C(   91.60), SIMDE_FLOAT64_C( -808.32)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -148.00), SIMDE_FLOAT64_C( -284.00),
                         SIMDE_FLOAT64_C(   91.00), SIMDE_FLOAT64_C( -809.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  346.15), SIMDE_FLOAT64_C( -862.92),
                         SIMDE_FLOAT64_C( -616.19), SIMDE_FLOAT64_C( -434.01)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  346.00), SIMDE_FLOAT64_C( -863.00),
                         SIMDE_FLOAT64_C( -617.00), SIMDE_FLOAT64_C( -435.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -593.06), SIMDE_FLOAT64_C( -286.08),
                         SIMDE_FLOAT64_C(  351.32), SIMDE_FLOAT64_C(  -29.06)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -594.00), SIMDE_FLOAT64_C( -287.00),
                         SIMDE_FLOAT64_C(  351.00), SIMDE_FLOAT64_C(  -30.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_floor_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_hadd_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -626.68), SIMDE_FLOAT32_C( -596.09),
                         SIMDE_FLOAT32_C( -988.19), SIMDE_FLOAT32_C(  961.65),
                         SIMDE_FLOAT32_C(  518.43), SIMDE_FLOAT32_C(  334.09),
                         SIMDE_FLOAT32_C(  212.95), SIMDE_FLOAT32_C( -488.35)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -361.77), SIMDE_FLOAT32_C(  121.01),
                         SIMDE_FLOAT32_C( -252.45), SIMDE_FLOAT32_C(  920.40),
                         SIMDE_FLOAT32_C( -660.15), SIMDE_FLOAT32_C( -869.23),
                         SIMDE_FLOAT32_C(  372.46), SIMDE_FLOAT32_C(  408.66)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -240.76), SIMDE_FLOAT32_C(  667.95),
                         SIMDE_FLOAT32_C(-1222.77), SIMDE_FLOAT32_C(  -26.54),
                         SIMDE_FLOAT32_C(-1529.38), SIMDE_FLOAT32_C(  781.12),
                         SIMDE_FLOAT32_C(  852.52), SIMDE_FLOAT32_C( -275.40)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  781.58), SIMDE_FLOAT32_C(  111.71),
                         SIMDE_FLOAT32_C( -214.03), SIMDE_FLOAT32_C( -280.14),
                         SIMDE_FLOAT32_C(  285.11), SIMDE_FLOAT32_C( -159.71),
                         SIMDE_FLOAT32_C(  737.74), SIMDE_FLOAT32_C(  159.06)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -542.95), SIMDE_FLOAT32_C(  311.99),
                         SIMDE_FLOAT32_C( -695.66), SIMDE_FLOAT32_C( -563.82),
                         SIMDE_FLOAT32_C( -378.07), SIMDE_FLOAT32_C(  160.57),
                         SIMDE_FLOAT32_C(  591.32), SIMDE_FLOAT32_C(  -15.88)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -230.96), SIMDE_FLOAT32_C(-1259.48),
                         SIMDE_FLOAT32_C(  893.29), SIMDE_FLOAT32_C( -494.17),
                         SIMDE_FLOAT32_C( -217.50), SIMDE_FLOAT32_C(  575.44),
                         SIMDE_FLOAT32_C(  125.40), SIMDE_FLOAT32_C(  896.80)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -397.87), SIMDE_FLOAT32_C(  582.70),
                         SIMDE_FLOAT32_C(  728.77), SIMDE_FLOAT32_C(  563.76),
                         SIMDE_FLOAT32_C( -874.44), SIMDE_FLOAT32_C( -323.73),
                         SIMDE_FLOAT32_C(  191.14), SIMDE_FLOAT32_C( -425.30)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  398.85), SIMDE_FLOAT32_C(  435.48),
                         SIMDE_FLOAT32_C( -736.85), SIMDE_FLOAT32_C( -251.61),
                         SIMDE_FLOAT32_C(  363.70), SIMDE_FLOAT32_C( -850.74),
                         SIMDE_FLOAT32_C(  513.62), SIMDE_FLOAT32_C(  893.23)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  834.33), SIMDE_FLOAT32_C( -988.46),
                         SIMDE_FLOAT32_C(  184.83), SIMDE_FLOAT32_C( 1292.53),
                         SIMDE_FLOAT32_C( -487.04), SIMDE_FLOAT32_C( 1406.85),
                         SIMDE_FLOAT32_C(-1198.17), SIMDE_FLOAT32_C( -234.16)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -659.61), SIMDE_FLOAT32_C( -996.22),
                         SIMDE_FLOAT32_C(  426.97), SIMDE_FLOAT32_C(   60.91),
                         SIMDE_FLOAT32_C(  175.17), SIMDE_FLOAT32_C(  226.61),
                         SIMDE_FLOAT32_C(  234.99), SIMDE_FLOAT32_C(  755.38)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -152.71), SIMDE_FLOAT32_C(  857.34),
                         SIMDE_FLOAT32_C(  403.84), SIMDE_FLOAT32_C( -862.22),
                         SIMDE_FLOAT32_C(  782.97), SIMDE_FLOAT32_C(  437.87),
                         SIMDE_FLOAT32_C(  825.47), SIMDE_FLOAT32_C(  915.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  704.63), SIMDE_FLOAT32_C( -458.38),
                         SIMDE_FLOAT32_C(-1655.83), SIMDE_FLOAT32_C(  487.88),
                         SIMDE_FLOAT32_C( 1220.84), SIMDE_FLOAT32_C( 1740.75),
                         SIMDE_FLOAT32_C(  401.78), SIMDE_FLOAT32_C(  990.37)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -564.13), SIMDE_FLOAT32_C( -973.32),
                         SIMDE_FLOAT32_C(  233.13), SIMDE_FLOAT32_C(  504.79),
                         SIMDE_FLOAT32_C(  857.55), SIMDE_FLOAT32_C(  275.11),
                         SIMDE_FLOAT32_C(  643.61), SIMDE_FLOAT32_C(  -70.12)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -709.38), SIMDE_FLOAT32_C(  310.06),
                         SIMDE_FLOAT32_C( -793.34), SIMDE_FLOAT32_C( -947.00),
                         SIMDE_FLOAT32_C( -974.52), SIMDE_FLOAT32_C(  878.25),
                         SIMDE_FLOAT32_C( -856.10), SIMDE_FLOAT32_C(  529.04)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -399.32), SIMDE_FLOAT32_C(-1740.34),
                         SIMDE_FLOAT32_C(-1537.45), SIMDE_FLOAT32_C(  737.92),
                         SIMDE_FLOAT32_C(  -96.27), SIMDE_FLOAT32_C( -327.06),
                         SIMDE_FLOAT32_C( 1132.66), SIMDE_FLOAT32_C(  573.49)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  733.99), SIMDE_FLOAT32_C( -737.47),
                         SIMDE_FLOAT32_C( -603.71), SIMDE_FLOAT32_C(  863.52),
                         SIMDE_FLOAT32_C( -639.50), SIMDE_FLOAT32_C(  474.16),
                         SIMDE_FLOAT32_C(  816.39), SIMDE_FLOAT32_C(   75.56)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  412.95), SIMDE_FLOAT32_C(  396.26),
                         SIMDE_FLOAT32_C(  519.43), SIMDE_FLOAT32_C( -413.53),
                         SIMDE_FLOAT32_C( -676.69), SIMDE_FLOAT32_C( -335.15),
                         SIMDE_FLOAT32_C(  961.37), SIMDE_FLOAT32_C(  820.10)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  809.21), SIMDE_FLOAT32_C(  105.90),
                         SIMDE_FLOAT32_C(   -3.48), SIMDE_FLOAT32_C(  259.81),
                         SIMDE_FLOAT32_C(-1011.84), SIMDE_FLOAT32_C( 1781.47),
                         SIMDE_FLOAT32_C( -165.34), SIMDE_FLOAT32_C(  891.95)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -149.16), SIMDE_FLOAT32_C( -430.29),
                         SIMDE_FLOAT32_C( -817.18), SIMDE_FLOAT32_C(  272.68),
                         SIMDE_FLOAT32_C( -899.55), SIMDE_FLOAT32_C( -654.95),
                         SIMDE_FLOAT32_C(  148.93), SIMDE_FLOAT32_C(  957.05)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -940.60), SIMDE_FLOAT32_C(  528.45),
                         SIMDE_FLOAT32_C(  574.67), SIMDE_FLOAT32_C(  993.90),
                         SIMDE_FLOAT32_C( -532.80), SIMDE_FLOAT32_C( -214.29),
                         SIMDE_FLOAT32_C( -506.86), SIMDE_FLOAT32_C(  389.73)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -412.15), SIMDE_FLOAT32_C( 1568.57),
                         SIMDE_FLOAT32_C( -579.45), SIMDE_FLOAT32_C( -544.50),
                         SIMDE_FLOAT32_C( -747.09), SIMDE_FLOAT32_C( -117.13),
                         SIMDE_FLOAT32_C(-1554.50), SIMDE_FLOAT32_C( 1105.98)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -426.98), SIMDE_FLOAT32_C(  653.68),
                         SIMDE_FLOAT32_C(  854.01), SIMDE_FLOAT32_C( -871.75),
                         SIMDE_FLOAT32_C(   60.41), SIMDE_FLOAT32_C(  197.76),
                         SIMDE_FLOAT32_C( -611.16), SIMDE_FLOAT32_C(  848.76)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  243.99), SIMDE_FLOAT32_C( -967.54),
                         SIMDE_FLOAT32_C(  893.59), SIMDE_FLOAT32_C( -630.65),
                         SIMDE_FLOAT32_C( -132.65), SIMDE_FLOAT32_C( -434.90),
                         SIMDE_FLOAT32_C( -516.97), SIMDE_FLOAT32_C(  151.24)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -723.55), SIMDE_FLOAT32_C(  262.94),
                         SIMDE_FLOAT32_C(  226.70), SIMDE_FLOAT32_C(  -17.74),
                         SIMDE_FLOAT32_C( -567.55), SIMDE_FLOAT32_C( -365.73),
                         SIMDE_FLOAT32_C(  258.17), SIMDE_FLOAT32_C(  237.60)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_hadd_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_hadd_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -436.68), SIMDE_FLOAT64_C(  480.99),
                         SIMDE_FLOAT64_C( -278.34), SIMDE_FLOAT64_C(  588.89)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  130.14), SIMDE_FLOAT64_C( -927.67),
                         SIMDE_FLOAT64_C( -646.84), SIMDE_FLOAT64_C(  150.94)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -797.53), SIMDE_FLOAT64_C(   44.31),
                         SIMDE_FLOAT64_C( -495.90), SIMDE_FLOAT64_C(  310.55)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -433.18), SIMDE_FLOAT64_C(  708.48),
                         SIMDE_FLOAT64_C(  534.86), SIMDE_FLOAT64_C( -929.94)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  826.52), SIMDE_FLOAT64_C(   36.50),
                         SIMDE_FLOAT64_C(  561.99), SIMDE_FLOAT64_C( -293.03)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  863.02), SIMDE_FLOAT64_C(  275.30),
                         SIMDE_FLOAT64_C(  268.96), SIMDE_FLOAT64_C( -395.08)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  255.04), SIMDE_FLOAT64_C( -637.84),
                         SIMDE_FLOAT64_C( -513.11), SIMDE_FLOAT64_C( -599.83)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -910.36), SIMDE_FLOAT64_C( -799.15),
                         SIMDE_FLOAT64_C( -982.23), SIMDE_FLOAT64_C( -206.95)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-1709.51), SIMDE_FLOAT64_C( -382.80),
                         SIMDE_FLOAT64_C(-1189.18), SIMDE_FLOAT64_C(-1112.94)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  149.87), SIMDE_FLOAT64_C(  274.56),
                         SIMDE_FLOAT64_C(  400.10), SIMDE_FLOAT64_C(  410.40)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  883.42), SIMDE_FLOAT64_C(   46.03),
                         SIMDE_FLOAT64_C( -600.38), SIMDE_FLOAT64_C(  131.34)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  929.45), SIMDE_FLOAT64_C(  424.43),
                         SIMDE_FLOAT64_C( -469.04), SIMDE_FLOAT64_C(  810.50)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -461.17), SIMDE_FLOAT64_C( -757.61),
                         SIMDE_FLOAT64_C( -114.45), SIMDE_FLOAT64_C(  853.48)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  397.09), SIMDE_FLOAT64_C( -444.66),
                         SIMDE_FLOAT64_C( -909.26), SIMDE_FLOAT64_C(  102.16)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -47.57), SIMDE_FLOAT64_C(-1218.78),
                         SIMDE_FLOAT64_C( -807.10), SIMDE_FLOAT64_C(  739.03)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -703.10), SIMDE_FLOAT64_C( -547.05),
                         SIMDE_FLOAT64_C( -158.62), SIMDE_FLOAT64_C( -256.70)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  137.64), SIMDE_FLOAT64_C( -342.68),
                         SIMDE_FLOAT64_C(  619.75), SIMDE_FLOAT64_C(  498.04)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.04), SIMDE_FLOAT64_C(-1250.15),
                         SIMDE_FLOAT64_C( 1117.79), SIMDE_FLOAT64_C( -415.32)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -818.29), SIMDE_FLOAT64_C(  196.32),
                         SIMDE_FLOAT64_C( -434.03), SIMDE_FLOAT64_C(   36.18)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -181.91), SIMDE_FLOAT64_C(  587.12),
                         SIMDE_FLOAT64_C( -318.37), SIMDE_FLOAT64_C(  -24.13)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  405.21), SIMDE_FLOAT64_C( -621.97),
                         SIMDE_FLOAT64_C( -342.50), SIMDE_FLOAT64_C( -397.85)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  129.06), SIMDE_FLOAT64_C( -240.83),
                         SIMDE_FLOAT64_C( -486.28), SIMDE_FLOAT64_C(  630.75)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  679.75), SIMDE_FLOAT64_C(  -37.94),
                         SIMDE_FLOAT64_C(  761.33), SIMDE_FLOAT64_C( -837.74)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  641.81), SIMDE_FLOAT64_C( -111.77),
                         SIMDE_FLOAT64_C(  -76.41), SIMDE_FLOAT64_C(  144.47)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_hadd_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_hsub_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -183.85), SIMDE_FLOAT32_C(  905.07),
                         SIMDE_FLOAT32_C( -962.47), SIMDE_FLOAT32_C(  739.25),
                         SIMDE_FLOAT32_C(   13.54), SIMDE_FLOAT32_C( -172.40),
                         SIMDE_FLOAT32_C(  456.21), SIMDE_FLOAT32_C(  164.33)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  959.87), SIMDE_FLOAT32_C(  500.02),
                         SIMDE_FLOAT32_C( -991.36), SIMDE_FLOAT32_C(  373.08),
                         SIMDE_FLOAT32_C( -962.56), SIMDE_FLOAT32_C( -502.91),
                         SIMDE_FLOAT32_C( -108.93), SIMDE_FLOAT32_C(  403.37)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -459.85), SIMDE_FLOAT32_C( 1364.44),
                         SIMDE_FLOAT32_C( 1088.92), SIMDE_FLOAT32_C( 1701.72),
                         SIMDE_FLOAT32_C(  459.65), SIMDE_FLOAT32_C(  512.30),
                         SIMDE_FLOAT32_C( -185.94), SIMDE_FLOAT32_C( -291.88)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  716.55), SIMDE_FLOAT32_C( -798.82),
                         SIMDE_FLOAT32_C(  -17.12), SIMDE_FLOAT32_C(  981.07),
                         SIMDE_FLOAT32_C( -241.05), SIMDE_FLOAT32_C(  266.35),
                         SIMDE_FLOAT32_C(  140.17), SIMDE_FLOAT32_C(  285.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  835.80), SIMDE_FLOAT32_C(  802.76),
                         SIMDE_FLOAT32_C( -745.28), SIMDE_FLOAT32_C( -228.38),
                         SIMDE_FLOAT32_C(  -44.09), SIMDE_FLOAT32_C(  991.15),
                         SIMDE_FLOAT32_C(  461.60), SIMDE_FLOAT32_C(   89.29)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -33.04), SIMDE_FLOAT32_C(  516.90),
                         SIMDE_FLOAT32_C(-1515.37), SIMDE_FLOAT32_C(  998.19),
                         SIMDE_FLOAT32_C( 1035.24), SIMDE_FLOAT32_C( -372.31),
                         SIMDE_FLOAT32_C(  507.40), SIMDE_FLOAT32_C(  145.69)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -323.55), SIMDE_FLOAT32_C( -835.05),
                         SIMDE_FLOAT32_C( -869.80), SIMDE_FLOAT32_C( -771.34),
                         SIMDE_FLOAT32_C( -342.71), SIMDE_FLOAT32_C(  374.92),
                         SIMDE_FLOAT32_C( -998.95), SIMDE_FLOAT32_C(   85.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -235.37), SIMDE_FLOAT32_C( -740.75),
                         SIMDE_FLOAT32_C(  568.96), SIMDE_FLOAT32_C(  984.74),
                         SIMDE_FLOAT32_C(  344.48), SIMDE_FLOAT32_C( -384.09),
                         SIMDE_FLOAT32_C( -746.69), SIMDE_FLOAT32_C(  666.35)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -505.38), SIMDE_FLOAT32_C(  415.78),
                         SIMDE_FLOAT32_C( -511.50), SIMDE_FLOAT32_C(   98.46),
                         SIMDE_FLOAT32_C( -728.57), SIMDE_FLOAT32_C( 1413.04),
                         SIMDE_FLOAT32_C(  717.63), SIMDE_FLOAT32_C( 1084.26)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  685.93), SIMDE_FLOAT32_C(  492.23),
                         SIMDE_FLOAT32_C(  668.17), SIMDE_FLOAT32_C( -421.85),
                         SIMDE_FLOAT32_C(  -93.75), SIMDE_FLOAT32_C( -819.96),
                         SIMDE_FLOAT32_C( -246.22), SIMDE_FLOAT32_C( -823.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  304.04), SIMDE_FLOAT32_C( -843.07),
                         SIMDE_FLOAT32_C( -204.07), SIMDE_FLOAT32_C( -879.53),
                         SIMDE_FLOAT32_C(  -83.04), SIMDE_FLOAT32_C( -516.58),
                         SIMDE_FLOAT32_C(  600.96), SIMDE_FLOAT32_C(   84.13)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-1147.11), SIMDE_FLOAT32_C( -675.46),
                         SIMDE_FLOAT32_C( -193.70), SIMDE_FLOAT32_C(-1090.02),
                         SIMDE_FLOAT32_C( -433.54), SIMDE_FLOAT32_C( -516.83),
                         SIMDE_FLOAT32_C( -726.21), SIMDE_FLOAT32_C( -577.29)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  492.05), SIMDE_FLOAT32_C( -121.48),
                         SIMDE_FLOAT32_C(  197.17), SIMDE_FLOAT32_C(  108.83),
                         SIMDE_FLOAT32_C( -910.74), SIMDE_FLOAT32_C( -610.63),
                         SIMDE_FLOAT32_C( -510.98), SIMDE_FLOAT32_C(  996.81)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -210.55), SIMDE_FLOAT32_C(  321.75),
                         SIMDE_FLOAT32_C( -949.93), SIMDE_FLOAT32_C(  547.84),
                         SIMDE_FLOAT32_C(  133.08), SIMDE_FLOAT32_C( -303.40),
                         SIMDE_FLOAT32_C(   47.44), SIMDE_FLOAT32_C( -236.50)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  532.30), SIMDE_FLOAT32_C( 1497.77),
                         SIMDE_FLOAT32_C( -613.53), SIMDE_FLOAT32_C(  -88.34),
                         SIMDE_FLOAT32_C( -436.48), SIMDE_FLOAT32_C( -283.94),
                         SIMDE_FLOAT32_C(  300.11), SIMDE_FLOAT32_C( 1507.79)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.77), SIMDE_FLOAT32_C( -920.82),
                         SIMDE_FLOAT32_C( -807.55), SIMDE_FLOAT32_C(  138.87),
                         SIMDE_FLOAT32_C(   34.09), SIMDE_FLOAT32_C( -826.98),
                         SIMDE_FLOAT32_C( -567.48), SIMDE_FLOAT32_C(  943.56)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -46.97), SIMDE_FLOAT32_C(  474.05),
                         SIMDE_FLOAT32_C(  -64.19), SIMDE_FLOAT32_C( -945.80),
                         SIMDE_FLOAT32_C( -873.08), SIMDE_FLOAT32_C( -569.02),
                         SIMDE_FLOAT32_C( -630.19), SIMDE_FLOAT32_C( -681.68)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  521.02), SIMDE_FLOAT32_C( -881.61),
                         SIMDE_FLOAT32_C(-1125.59), SIMDE_FLOAT32_C(  946.42),
                         SIMDE_FLOAT32_C(  304.06), SIMDE_FLOAT32_C(  -51.49),
                         SIMDE_FLOAT32_C( -861.07), SIMDE_FLOAT32_C( 1511.04)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  470.38), SIMDE_FLOAT32_C( -693.79),
                         SIMDE_FLOAT32_C( -843.14), SIMDE_FLOAT32_C( -640.08),
                         SIMDE_FLOAT32_C(  950.30), SIMDE_FLOAT32_C(  582.04),
                         SIMDE_FLOAT32_C( -585.94), SIMDE_FLOAT32_C(  175.69)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   45.69), SIMDE_FLOAT32_C(   38.77),
                         SIMDE_FLOAT32_C(  194.04), SIMDE_FLOAT32_C(  410.12),
                         SIMDE_FLOAT32_C(  -28.08), SIMDE_FLOAT32_C( -596.23),
                         SIMDE_FLOAT32_C(  -38.68), SIMDE_FLOAT32_C( -731.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -6.92), SIMDE_FLOAT32_C(  216.08),
                         SIMDE_FLOAT32_C(-1164.17), SIMDE_FLOAT32_C(  203.06),
                         SIMDE_FLOAT32_C( -568.15), SIMDE_FLOAT32_C( -692.49),
                         SIMDE_FLOAT32_C( -368.26), SIMDE_FLOAT32_C(  761.63)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -171.77), SIMDE_FLOAT32_C( -878.69),
                         SIMDE_FLOAT32_C( -337.47), SIMDE_FLOAT32_C( -864.26),
                         SIMDE_FLOAT32_C(  976.73), SIMDE_FLOAT32_C(  253.08),
                         SIMDE_FLOAT32_C(  134.24), SIMDE_FLOAT32_C( -737.89)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  167.53), SIMDE_FLOAT32_C( -453.33),
                         SIMDE_FLOAT32_C(   11.70), SIMDE_FLOAT32_C(  471.07),
                         SIMDE_FLOAT32_C( -269.64), SIMDE_FLOAT32_C(  547.27),
                         SIMDE_FLOAT32_C( -313.69), SIMDE_FLOAT32_C( -333.24)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -620.86), SIMDE_FLOAT32_C(  459.37),
                         SIMDE_FLOAT32_C( -706.92), SIMDE_FLOAT32_C( -526.79),
                         SIMDE_FLOAT32_C(  816.91), SIMDE_FLOAT32_C(  -19.55),
                         SIMDE_FLOAT32_C( -723.65), SIMDE_FLOAT32_C( -872.13)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_hsub_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_hsub_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -762.69), SIMDE_FLOAT64_C(  237.58),
                         SIMDE_FLOAT64_C(  832.53), SIMDE_FLOAT64_C(  -18.37)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  974.95), SIMDE_FLOAT64_C(  -69.86),
                         SIMDE_FLOAT64_C(   78.29), SIMDE_FLOAT64_C( -156.35)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-1044.81), SIMDE_FLOAT64_C( 1000.27),
                         SIMDE_FLOAT64_C( -234.64), SIMDE_FLOAT64_C( -850.90)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  220.65), SIMDE_FLOAT64_C( -139.75),
                         SIMDE_FLOAT64_C( -707.34), SIMDE_FLOAT64_C( -798.76)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  397.80), SIMDE_FLOAT64_C( -497.83),
                         SIMDE_FLOAT64_C(  717.31), SIMDE_FLOAT64_C( -807.72)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -895.63), SIMDE_FLOAT64_C( -360.40),
                         SIMDE_FLOAT64_C(-1525.03), SIMDE_FLOAT64_C(  -91.42)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  105.63), SIMDE_FLOAT64_C(  306.00),
                         SIMDE_FLOAT64_C(  281.00), SIMDE_FLOAT64_C(  310.89)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -325.57), SIMDE_FLOAT64_C(  534.52),
                         SIMDE_FLOAT64_C(  987.26), SIMDE_FLOAT64_C(  787.06)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  860.09), SIMDE_FLOAT64_C(  200.37),
                         SIMDE_FLOAT64_C( -200.20), SIMDE_FLOAT64_C(   29.89)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  967.71), SIMDE_FLOAT64_C( -641.51),
                         SIMDE_FLOAT64_C( -759.32), SIMDE_FLOAT64_C(  -97.44)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  348.76), SIMDE_FLOAT64_C( -255.68),
                         SIMDE_FLOAT64_C(  982.70), SIMDE_FLOAT64_C(  155.49)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -604.44), SIMDE_FLOAT64_C(-1609.22),
                         SIMDE_FLOAT64_C( -827.21), SIMDE_FLOAT64_C(  661.88)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  679.95), SIMDE_FLOAT64_C( -269.56),
                         SIMDE_FLOAT64_C( -481.42), SIMDE_FLOAT64_C(  919.16)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -2.64), SIMDE_FLOAT64_C(  468.75),
                         SIMDE_FLOAT64_C(  -36.44), SIMDE_FLOAT64_C(  441.73)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  471.39), SIMDE_FLOAT64_C( -949.51),
                         SIMDE_FLOAT64_C(  478.17), SIMDE_FLOAT64_C( 1400.58)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -496.34), SIMDE_FLOAT64_C(  144.53),
                         SIMDE_FLOAT64_C(   -0.78), SIMDE_FLOAT64_C(  -49.70)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  551.16), SIMDE_FLOAT64_C(    1.32),
                         SIMDE_FLOAT64_C( -388.16), SIMDE_FLOAT64_C(  219.25)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -549.84), SIMDE_FLOAT64_C(  640.87),
                         SIMDE_FLOAT64_C(  607.41), SIMDE_FLOAT64_C(  -48.92)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -33.11), SIMDE_FLOAT64_C( -186.08),
                         SIMDE_FLOAT64_C(  701.92), SIMDE_FLOAT64_C(   14.26)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   92.65), SIMDE_FLOAT64_C( -100.54),
                         SIMDE_FLOAT64_C( -271.34), SIMDE_FLOAT64_C(  -61.14)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -193.19), SIMDE_FLOAT64_C( -152.97),
                         SIMDE_FLOAT64_C(  210.20), SIMDE_FLOAT64_C( -687.66)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  116.05), SIMDE_FLOAT64_C( -582.94),
                         SIMDE_FLOAT64_C(   -9.93), SIMDE_FLOAT64_C( -395.51)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  109.95), SIMDE_FLOAT64_C( -493.57),
                         SIMDE_FLOAT64_C(  927.71), SIMDE_FLOAT64_C(   40.21)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -603.52), SIMDE_FLOAT64_C( -698.99),
                         SIMDE_FLOAT64_C( -887.50), SIMDE_FLOAT64_C( -385.58)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_hsub_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_dp_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -505.73), SIMDE_FLOAT32_C(   -137.42),
                         SIMDE_FLOAT32_C(     17.33), SIMDE_FLOAT32_C(    756.92),
                         SIMDE_FLOAT32_C(   -935.43), SIMDE_FLOAT32_C(    966.58),
                         SIMDE_FLOAT32_C(   -542.20), SIMDE_FLOAT32_C(   -986.95)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -727.11), SIMDE_FLOAT32_C(     41.36),
                         SIMDE_FLOAT32_C(   -966.84), SIMDE_FLOAT32_C(    -80.50),
                         SIMDE_FLOAT32_C(    623.90), SIMDE_FLOAT32_C(   -996.55),
                         SIMDE_FLOAT32_C(   -173.15), SIMDE_FLOAT32_C(   -230.46)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -16755.34), SIMDE_FLOAT32_C( -16755.34),
                         SIMDE_FLOAT32_C( -16755.34), SIMDE_FLOAT32_C( -16755.34),
                         SIMDE_FLOAT32_C(  93881.93), SIMDE_FLOAT32_C(  93881.93),
                         SIMDE_FLOAT32_C(  93881.93), SIMDE_FLOAT32_C(  93881.93)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    203.19), SIMDE_FLOAT32_C(   -301.86),
                         SIMDE_FLOAT32_C(   -510.29), SIMDE_FLOAT32_C(   -548.13),
                         SIMDE_FLOAT32_C(    769.15), SIMDE_FLOAT32_C(    758.71),
                         SIMDE_FLOAT32_C(    788.95), SIMDE_FLOAT32_C(   -308.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    -93.30), SIMDE_FLOAT32_C(   -209.30),
                         SIMDE_FLOAT32_C(    125.32), SIMDE_FLOAT32_C(   -995.11),
                         SIMDE_FLOAT32_C(    443.92), SIMDE_FLOAT32_C(     15.16),
                         SIMDE_FLOAT32_C(    480.88), SIMDE_FLOAT32_C(   -179.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -63949.54), SIMDE_FLOAT32_C( -63949.54),
                         SIMDE_FLOAT32_C( -63949.54), SIMDE_FLOAT32_C( -63949.54),
                         SIMDE_FLOAT32_C( 379390.28), SIMDE_FLOAT32_C( 379390.28),
                         SIMDE_FLOAT32_C( 379390.28), SIMDE_FLOAT32_C( 379390.28)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -642.96), SIMDE_FLOAT32_C(    460.10),
                         SIMDE_FLOAT32_C(    365.68), SIMDE_FLOAT32_C(    149.19),
                         SIMDE_FLOAT32_C(   -863.16), SIMDE_FLOAT32_C(    539.13),
                         SIMDE_FLOAT32_C(    -10.06), SIMDE_FLOAT32_C(   -915.55)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -102.23), SIMDE_FLOAT32_C(     63.93),
                         SIMDE_FLOAT32_C(    220.16), SIMDE_FLOAT32_C(    -95.11),
                         SIMDE_FLOAT32_C(    920.74), SIMDE_FLOAT32_C(   -798.64),
                         SIMDE_FLOAT32_C(    549.26), SIMDE_FLOAT32_C(    150.46)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  80508.11), SIMDE_FLOAT32_C(  80508.11),
                         SIMDE_FLOAT32_C(  80508.11), SIMDE_FLOAT32_C(  80508.11),
                         SIMDE_FLOAT32_C(  -5525.56), SIMDE_FLOAT32_C(  -5525.56),
                         SIMDE_FLOAT32_C(  -5525.56), SIMDE_FLOAT32_C(  -5525.56)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -135.78), SIMDE_FLOAT32_C(   -796.33),
                         SIMDE_FLOAT32_C(   -449.54), SIMDE_FLOAT32_C(   -938.34),
                         SIMDE_FLOAT32_C(    393.31), SIMDE_FLOAT32_C(   -848.57),
                         SIMDE_FLOAT32_C(   -577.93), SIMDE_FLOAT32_C(   -905.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     36.84), SIMDE_FLOAT32_C(    187.09),
                         SIMDE_FLOAT32_C(   -980.80), SIMDE_FLOAT32_C(   -813.89),
                         SIMDE_FLOAT32_C(   -648.02), SIMDE_FLOAT32_C(     86.79),
                         SIMDE_FLOAT32_C(    527.03), SIMDE_FLOAT32_C(   -592.02)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( 440908.84), SIMDE_FLOAT32_C( 440908.84),
                         SIMDE_FLOAT32_C( 440908.84), SIMDE_FLOAT32_C( 440908.84),
                         SIMDE_FLOAT32_C(-304586.47), SIMDE_FLOAT32_C(-304586.47),
                         SIMDE_FLOAT32_C(-304586.47), SIMDE_FLOAT32_C(-304586.47)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(     74.94), SIMDE_FLOAT32_C(    503.91),
                         SIMDE_FLOAT32_C(   -375.34), SIMDE_FLOAT32_C(   -743.13),
                         SIMDE_FLOAT32_C(    569.57), SIMDE_FLOAT32_C(    343.31),
                         SIMDE_FLOAT32_C(    -63.75), SIMDE_FLOAT32_C(   -543.95)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -655.40), SIMDE_FLOAT32_C(   -172.92),
                         SIMDE_FLOAT32_C(    342.45), SIMDE_FLOAT32_C(    357.80),
                         SIMDE_FLOAT32_C(    265.80), SIMDE_FLOAT32_C(   -306.65),
                         SIMDE_FLOAT32_C(      8.98), SIMDE_FLOAT32_C(    608.49)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-128535.19), SIMDE_FLOAT32_C(-128535.19),
                         SIMDE_FLOAT32_C(-128535.19), SIMDE_FLOAT32_C(-128535.19),
                         SIMDE_FLOAT32_C(   -572.47), SIMDE_FLOAT32_C(   -572.47),
                         SIMDE_FLOAT32_C(   -572.47), SIMDE_FLOAT32_C(   -572.47)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    691.92), SIMDE_FLOAT32_C(   -222.27),
                         SIMDE_FLOAT32_C(   -447.07), SIMDE_FLOAT32_C(    147.51),
                         SIMDE_FLOAT32_C(    537.10), SIMDE_FLOAT32_C(    171.81),
                         SIMDE_FLOAT32_C(    347.32), SIMDE_FLOAT32_C(   -960.39)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    144.97), SIMDE_FLOAT32_C(   -282.53),
                         SIMDE_FLOAT32_C(    612.87), SIMDE_FLOAT32_C(   -406.95),
                         SIMDE_FLOAT32_C(   -724.51), SIMDE_FLOAT32_C(   -908.13),
                         SIMDE_FLOAT32_C(    448.37), SIMDE_FLOAT32_C(   -713.68)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-273995.78), SIMDE_FLOAT32_C(-273995.78),
                         SIMDE_FLOAT32_C(-273995.78), SIMDE_FLOAT32_C(-273995.78),
                         SIMDE_FLOAT32_C( 155727.88), SIMDE_FLOAT32_C( 155727.88),
                         SIMDE_FLOAT32_C( 155727.88), SIMDE_FLOAT32_C( 155727.88)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -208.78), SIMDE_FLOAT32_C(    929.10),
                         SIMDE_FLOAT32_C(   -272.86), SIMDE_FLOAT32_C(     86.66),
                         SIMDE_FLOAT32_C(   -744.06), SIMDE_FLOAT32_C(   -881.86),
                         SIMDE_FLOAT32_C(   -663.16), SIMDE_FLOAT32_C(    193.59)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    -31.12), SIMDE_FLOAT32_C(   -303.48),
                         SIMDE_FLOAT32_C(   -758.68), SIMDE_FLOAT32_C(    -57.97),
                         SIMDE_FLOAT32_C(    -16.19), SIMDE_FLOAT32_C(    187.47),
                         SIMDE_FLOAT32_C(     13.98), SIMDE_FLOAT32_C(    577.97)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( 207013.41), SIMDE_FLOAT32_C( 207013.41),
                         SIMDE_FLOAT32_C( 207013.41), SIMDE_FLOAT32_C( 207013.41),
                         SIMDE_FLOAT32_C(  -9270.98), SIMDE_FLOAT32_C(  -9270.98),
                         SIMDE_FLOAT32_C(  -9270.98), SIMDE_FLOAT32_C(  -9270.98)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    696.37), SIMDE_FLOAT32_C(    300.52),
                         SIMDE_FLOAT32_C(   -476.62), SIMDE_FLOAT32_C(    523.01),
                         SIMDE_FLOAT32_C(   -147.94), SIMDE_FLOAT32_C(   -993.31),
                         SIMDE_FLOAT32_C(    910.70), SIMDE_FLOAT32_C(   -650.05)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -298.13), SIMDE_FLOAT32_C(   -528.10),
                         SIMDE_FLOAT32_C(    371.86), SIMDE_FLOAT32_C(    -93.70),
                         SIMDE_FLOAT32_C(   -396.01), SIMDE_FLOAT32_C(   -319.70),
                         SIMDE_FLOAT32_C(   -350.31), SIMDE_FLOAT32_C(   -580.49)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-177235.91), SIMDE_FLOAT32_C(-177235.91),
                         SIMDE_FLOAT32_C(-177235.91), SIMDE_FLOAT32_C(-177235.91),
                         SIMDE_FLOAT32_C(-319027.31), SIMDE_FLOAT32_C(-319027.31),
                         SIMDE_FLOAT32_C(-319027.31), SIMDE_FLOAT32_C(-319027.31)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_dp_ps(test_vec[i].a, test_vec[i].b, 47);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_extract_epi32(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256i a;

  a = simde_mm256_set_epi32(INT32_C( 1385655883), INT32_C(-1710235670), INT32_C( 1840319559), INT32_C(   46971468),
                            INT32_C(  543693501), INT32_C( 1301344915), INT32_C( 1566637023), INT32_C( 2143093505));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 0), INT32_C(2143093505));

  a = simde_mm256_set_epi32(INT32_C( -318872645), INT32_C(  154450085), INT32_C(  940983410), INT32_C(-2131037397),
                            INT32_C( 1379124831), INT32_C(  546028595), INT32_C(  -61508185), INT32_C( 1543937407));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 1), INT32_C(-61508185));

  a = simde_mm256_set_epi32(INT32_C(-1821994577), INT32_C( 1502812176), INT32_C( -436334344), INT32_C( 2003643735),
                            INT32_C(-1707163842), INT32_C(-1393484615), INT32_C(-1517380673), INT32_C( -785551131));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 2), INT32_C(-1393484615));

  a = simde_mm256_set_epi32(INT32_C(-1342559050), INT32_C( 1280753378), INT32_C(  519553994), INT32_C(  134336686),
                            INT32_C(-1696266450), INT32_C( 1521778870), INT32_C(-1336686564), INT32_C( -138601679));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 3), INT32_C(-1696266450));

  a = simde_mm256_set_epi32(INT32_C( -346178226), INT32_C(-1487718780), INT32_C( 1522880003), INT32_C(  588601981),
                            INT32_C( 1931111095), INT32_C(-1466649812), INT32_C(-1669730912), INT32_C(  257973424));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 4), INT32_C(588601981));

  a = simde_mm256_set_epi32(INT32_C(  564328837), INT32_C(  600105992), INT32_C(-1245299261), INT32_C(  186243465),
                            INT32_C( 1677067524), INT32_C( -306576008), INT32_C(-1101353897), INT32_C(  733243889));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 5), INT32_C(-1245299261));

  a = simde_mm256_set_epi32(INT32_C( -460169306), INT32_C( -732910055), INT32_C(  973235428), INT32_C(  784780564),
                            INT32_C( -944171968), INT32_C(-1846057571), INT32_C( 1068840758), INT32_C(  786857160));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 6), INT32_C(-732910055));

  a = simde_mm256_set_epi32(INT32_C( -541300758), INT32_C(-2119881925), INT32_C(-1597449913), INT32_C(  489702841),
                            INT32_C(-1364763213), INT32_C(  217047214), INT32_C( 1964050600), INT32_C(-1022338452));
  simde_assert_equal_i32(simde_mm256_extract_epi32(a, 7), INT32_C(-541300758));

  return 0;
}

static int
test_simde_mm256_extract_epi64(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256i a;

  a = simde_mm256_set_epi64x(INT64_C(-4660489839809071463), INT64_C(-3275104097342830593),
                             INT64_C(-7646437287554588382), INT64_C(-1616723671742758177));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 0), INT64_C(-1616723671742758177));

  a = simde_mm256_set_epi64x(INT64_C(-4654112182052362730), INT64_C( 4380015427737745916),
                             INT64_C(-2237548103057162518), INT64_C( -717271117564457282));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 1), INT64_C(-2237548103057162518));

  a = simde_mm256_set_epi64x(INT64_C(-9053768942501696029), INT64_C(  -11082930357956064),
                             INT64_C( 4721540626264374149), INT64_C(-2904931686008463967));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 2), INT64_C(  -11082930357956064));

  a = simde_mm256_set_epi64x(INT64_C(  952352888602265092), INT64_C(-6100868528376933823),
                             INT64_C(-6435628201550193122), INT64_C(-1473382845492258957));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 3), INT64_C(  952352888602265092));

  a = simde_mm256_set_epi64x(INT64_C( 3571656271242113686), INT64_C(-4496934129483038288),
                             INT64_C(-1076704560171997551), INT64_C( 3873627956598886953));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 0), INT64_C( 3873627956598886953));

  a = simde_mm256_set_epi64x(INT64_C( 4706661489617163532), INT64_C( 5127697303613520544),
                             INT64_C(-7455940824973292748), INT64_C(-7733027913451150656));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 1), INT64_C(-7455940824973292748));

  a = simde_mm256_set_epi64x(INT64_C( 5538080490384191196), INT64_C( -631116641013180065),
                             INT64_C(-8091873078648501156), INT64_C(-6139846631858294273));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 2), INT64_C( -631116641013180065));

  a = simde_mm256_set_epi64x(INT64_C(-5169157996131687226), INT64_C(-8345143279822783157),
                             INT64_C(   28110904629261106), INT64_C(-2611039926221160636));
  simde_assert_equal_i64(simde_mm256_extract_epi64(a, 3), INT64_C(-5169157996131687226));

  return 0;
}

static int
test_simde_mm256_insert_epi8(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256i a, r, e;

  a = simde_mm256_set_epi8(INT8_C(   2), INT8_C(  84), INT8_C(   6), INT8_C( -83),
                           INT8_C( 118), INT8_C(  45), INT8_C(  46), INT8_C( -44),
                           INT8_C(-115), INT8_C( -99), INT8_C(  35), INT8_C(  32),
                           INT8_C( 111), INT8_C(  68), INT8_C( 103), INT8_C(   7),
                           INT8_C( -73), INT8_C(-111), INT8_C(-113), INT8_C(  29),
                           INT8_C(  25), INT8_C( 114), INT8_C(-122), INT8_C(  38),
                           INT8_C( -96), INT8_C( 114), INT8_C(  39), INT8_C(  74),
                           INT8_C(  99), INT8_C(  11), INT8_C(  76), INT8_C(  56));
  r = simde_mm256_insert_epi8(a, INT8_C(-3),  5);
  e = simde_mm256_set_epi8(INT8_C(   2), INT8_C(  84), INT8_C(   6), INT8_C( -83),
                           INT8_C( 118), INT8_C(  45), INT8_C(  46), INT8_C( -44),
                           INT8_C(-115), INT8_C( -99), INT8_C(  35), INT8_C(  32),
                           INT8_C( 111), INT8_C(  68), INT8_C( 103), INT8_C(   7),
                           INT8_C( -73), INT8_C(-111), INT8_C(-113), INT8_C(  29),
                           INT8_C(  25), INT8_C( 114), INT8_C(-122), INT8_C(  38),
                           INT8_C( -96), INT8_C( 114), INT8_C(  -3), INT8_C(  74),
                           INT8_C(  99), INT8_C(  11), INT8_C(  76), INT8_C(  56));
  simde_assert_m256i_i8(r, ==, e);

  a = simde_mm256_set_epi8(INT8_C(-119), INT8_C(-118), INT8_C(  60), INT8_C(  31),
                           INT8_C(-126), INT8_C( -54), INT8_C(  51), INT8_C(-114),
                           INT8_C(-128), INT8_C( -32), INT8_C(  25), INT8_C( 116),
                           INT8_C( 103), INT8_C( -83), INT8_C(  64), INT8_C(  29),
                           INT8_C( -50), INT8_C( -55), INT8_C( 114), INT8_C(  10),
                           INT8_C(  44), INT8_C(-111), INT8_C(-114), INT8_C( -60),
                           INT8_C(-128), INT8_C( -48), INT8_C(  15), INT8_C(  67),
                           INT8_C( -42), INT8_C( -49), INT8_C(  99), INT8_C( 114));
  r = simde_mm256_insert_epi8(a, INT8_C(59),  9);
  e = simde_mm256_set_epi8(INT8_C(-119), INT8_C(-118), INT8_C(  60), INT8_C(  31),
                           INT8_C(-126), INT8_C( -54), INT8_C(  51), INT8_C(-114),
                           INT8_C(-128), INT8_C( -32), INT8_C(  25), INT8_C( 116),
                           INT8_C( 103), INT8_C( -83), INT8_C(  64), INT8_C(  29),
                           INT8_C( -50), INT8_C( -55), INT8_C( 114), INT8_C(  10),
                           INT8_C(  44), INT8_C(-111), INT8_C(  59), INT8_C( -60),
                           INT8_C(-128), INT8_C( -48), INT8_C(  15), INT8_C(  67),
                           INT8_C( -42), INT8_C( -49), INT8_C(  99), INT8_C( 114));
  simde_assert_m256i_i8(r, ==, e);

  a = simde_mm256_set_epi8(INT8_C( -89), INT8_C(  90), INT8_C(-119), INT8_C( -38),
                           INT8_C( -50), INT8_C(  -6), INT8_C(-121), INT8_C( -93),
                           INT8_C(  88), INT8_C(  83), INT8_C( -93), INT8_C( -96),
                           INT8_C( -77), INT8_C( -84), INT8_C(-110), INT8_C( -16),
                           INT8_C( -67), INT8_C(  18), INT8_C(  25), INT8_C( -38),
                           INT8_C( -60), INT8_C(  59), INT8_C( 108), INT8_C( -22),
                           INT8_C(  39), INT8_C( 101), INT8_C(   5), INT8_C( 105),
                           INT8_C(  45), INT8_C( -43), INT8_C(  87), INT8_C(  73));
  r = simde_mm256_insert_epi8(a, INT8_C(-38),  6);
  e = simde_mm256_set_epi8(INT8_C( -89), INT8_C(  90), INT8_C(-119), INT8_C( -38),
                           INT8_C( -50), INT8_C(  -6), INT8_C(-121), INT8_C( -93),
                           INT8_C(  88), INT8_C(  83), INT8_C( -93), INT8_C( -96),
                           INT8_C( -77), INT8_C( -84), INT8_C(-110), INT8_C( -16),
                           INT8_C( -67), INT8_C(  18), INT8_C(  25), INT8_C( -38),
                           INT8_C( -60), INT8_C(  59), INT8_C( 108), INT8_C( -22),
                           INT8_C(  39), INT8_C( -38), INT8_C(   5), INT8_C( 105),
                           INT8_C(  45), INT8_C( -43), INT8_C(  87), INT8_C(  73));
  simde_assert_m256i_i8(r, ==, e);

  a = simde_mm256_set_epi8(INT8_C( 109), INT8_C(  -4), INT8_C(  42), INT8_C(  13),
                           INT8_C(  50), INT8_C(  75), INT8_C( 107), INT8_C(  56),
                           INT8_C(  48), INT8_C( -32), INT8_C(  -5), INT8_C( -89),
                           INT8_C(-120), INT8_C(  97), INT8_C(  24), INT8_C(  11),
                           INT8_C( -12), INT8_C( -17), INT8_C(  64), INT8_C(  44),
                           INT8_C(  42), INT8_C(  13), INT8_C(   7), INT8_C(  86),
                           INT8_C( -64), INT8_C(   9), INT8_C( -71), INT8_C( -23),
                           INT8_C(  74), INT8_C(  49), INT8_C( 126), INT8_C(  43));
  r = simde_mm256_insert_epi8(a, INT8_C(39), 10);
  e = simde_mm256_set_epi8(INT8_C( 109), INT8_C(  -4), INT8_C(  42), INT8_C(  13),
                           INT8_C(  50), INT8_C(  75), INT8_C( 107), INT8_C(  56),
                           INT8_C(  48), INT8_C( -32), INT8_C(  -5), INT8_C( -89),
                           INT8_C(-120), INT8_C(  97), INT8_C(  24), INT8_C(  11),
                           INT8_C( -12), INT8_C( -17), INT8_C(  64), INT8_C(  44),
                           INT8_C(  42), INT8_C(  39), INT8_C(   7), INT8_C(  86),
                           INT8_C( -64), INT8_C(   9), INT8_C( -71), INT8_C( -23),
                           INT8_C(  74), INT8_C(  49), INT8_C( 126), INT8_C(  43));
  simde_assert_m256i_i8(r, ==, e);

  a = simde_mm256_set_epi8(INT8_C(  19), INT8_C( -38), INT8_C(  78), INT8_C( -23),
                           INT8_C(  62), INT8_C( -68), INT8_C(-102), INT8_C(  63),
                           INT8_C( 124), INT8_C(-113), INT8_C( -21), INT8_C( 108),
                           INT8_C( -68), INT8_C( -75), INT8_C(  51), INT8_C( 124),
                           INT8_C(  76), INT8_C( -65), INT8_C( -80), INT8_C(  56),
                           INT8_C(  41), INT8_C(  59), INT8_C( 106), INT8_C(  -7),
                           INT8_C( -87), INT8_C(  20), INT8_C(   3), INT8_C( 106),
                           INT8_C(  72), INT8_C(-115), INT8_C(-103), INT8_C(  26));
  r = simde_mm256_insert_epi8(a, INT8_C(-114), 10);
  e = simde_mm256_set_epi8(INT8_C(  19), INT8_C( -38), INT8_C(  78), INT8_C( -23),
                           INT8_C(  62), INT8_C( -68), INT8_C(-102), INT8_C(  63),
                           INT8_C( 124), INT8_C(-113), INT8_C( -21), INT8_C( 108),
                           INT8_C( -68), INT8_C( -75), INT8_C(  51), INT8_C( 124),
                           INT8_C(  76), INT8_C( -65), INT8_C( -80), INT8_C(  56),
                           INT8_C(  41), INT8_C(-114), INT8_C( 106), INT8_C(  -7),
                           INT8_C( -87), INT8_C(  20), INT8_C(   3), INT8_C( 106),
                           INT8_C(  72), INT8_C(-115), INT8_C(-103), INT8_C(  26));
  simde_assert_m256i_i8(r, ==, e);

  a = simde_mm256_set_epi8(INT8_C( 124), INT8_C(  -4), INT8_C( -73), INT8_C( 108),
                           INT8_C(  66), INT8_C( -17), INT8_C(-121), INT8_C( 100),
                           INT8_C( 124), INT8_C( -94), INT8_C(  17), INT8_C( -78),
                           INT8_C( -99), INT8_C(  31), INT8_C(  28), INT8_C(   7),
                           INT8_C(-108), INT8_C( -43), INT8_C( -23), INT8_C( 104),
                           INT8_C( 122), INT8_C(  61), INT8_C( -93), INT8_C(-102),
                           INT8_C( 125), INT8_C( -79), INT8_C(  24), INT8_C(  49),
                           INT8_C(   9), INT8_C( -93), INT8_C(  36), INT8_C( -74));
  r = simde_mm256_insert_epi8(a, INT8_C(-65),  2);
  e = simde_mm256_set_epi8(INT8_C( 124), INT8_C(  -4), INT8_C( -73), INT8_C( 108),
                           INT8_C(  66), INT8_C( -17), INT8_C(-121), INT8_C( 100),
                           INT8_C( 124), INT8_C( -94), INT8_C(  17), INT8_C( -78),
                           INT8_C( -99), INT8_C(  31), INT8_C(  28), INT8_C(   7),
                           INT8_C(-108), INT8_C( -43), INT8_C( -23), INT8_C( 104),
                           INT8_C( 122), INT8_C(  61), INT8_C( -93), INT8_C(-102),
                           INT8_C( 125), INT8_C( -79), INT8_C(  24), INT8_C(  49),
                           INT8_C(   9), INT8_C( -65), INT8_C(  36), INT8_C( -74));
  simde_assert_m256i_i8(r, ==, e);

  a = simde_mm256_set_epi8(INT8_C(-100), INT8_C(  10), INT8_C(  87), INT8_C( -57),
                           INT8_C( -89), INT8_C( -66), INT8_C( -32), INT8_C( 104),
                           INT8_C(  66), INT8_C( -81), INT8_C( -94), INT8_C(  71),
                           INT8_C(  67), INT8_C( 108), INT8_C( -88), INT8_C(  53),
                           INT8_C( 108), INT8_C( -39), INT8_C( -93), INT8_C(  -2),
                           INT8_C(  93), INT8_C(  67), INT8_C( -85), INT8_C( -42),
                           INT8_C(  28), INT8_C( -64), INT8_C(  83), INT8_C(  -9),
                           INT8_C( -95), INT8_C(  36), INT8_C(  44), INT8_C( 112));
  r = simde_mm256_insert_epi8(a, INT8_C(58),  8);
  e = simde_mm256_set_epi8(INT8_C(-100), INT8_C(  10), INT8_C(  87), INT8_C( -57),
                           INT8_C( -89), INT8_C( -66), INT8_C( -32), INT8_C( 104),
                           INT8_C(  66), INT8_C( -81), INT8_C( -94), INT8_C(  71),
                           INT8_C(  67), INT8_C( 108), INT8_C( -88), INT8_C(  53),
                           INT8_C( 108), INT8_C( -39), INT8_C( -93), INT8_C(  -2),
                           INT8_C(  93), INT8_C(  67), INT8_C( -85), INT8_C(  58),
                           INT8_C(  28), INT8_C( -64), INT8_C(  83), INT8_C(  -9),
                           INT8_C( -95), INT8_C(  36), INT8_C(  44), INT8_C( 112));
  simde_assert_m256i_i8(r, ==, e);

  a = simde_mm256_set_epi8(INT8_C( -16), INT8_C( -47), INT8_C( -77), INT8_C(  42),
                           INT8_C(  89), INT8_C(  -7), INT8_C( -62), INT8_C(  45),
                           INT8_C( -28), INT8_C(  34), INT8_C( -59), INT8_C(  -9),
                           INT8_C( -38), INT8_C(-118), INT8_C(  83), INT8_C(  59),
                           INT8_C( -16), INT8_C(  34), INT8_C( -64), INT8_C(  16),
                           INT8_C( -15), INT8_C( -28), INT8_C(  47), INT8_C(  10),
                           INT8_C(-112), INT8_C( -37), INT8_C(  71), INT8_C( -84),
                           INT8_C( -78), INT8_C( -81), INT8_C(  40), INT8_C( -98));
  r = simde_mm256_insert_epi8(a, INT8_C(121),  7);
  e = simde_mm256_set_epi8(INT8_C( -16), INT8_C( -47), INT8_C( -77), INT8_C(  42),
                           INT8_C(  89), INT8_C(  -7), INT8_C( -62), INT8_C(  45),
                           INT8_C( -28), INT8_C(  34), INT8_C( -59), INT8_C(  -9),
                           INT8_C( -38), INT8_C(-118), INT8_C(  83), INT8_C(  59),
                           INT8_C( -16), INT8_C(  34), INT8_C( -64), INT8_C(  16),
                           INT8_C( -15), INT8_C( -28), INT8_C(  47), INT8_C(  10),
                           INT8_C( 121), INT8_C( -37), INT8_C(  71), INT8_C( -84),
                           INT8_C( -78), INT8_C( -81), INT8_C(  40), INT8_C( -98));
  simde_assert_m256i_i8(r, ==, e);

  return 0;
}

static int
test_simde_mm256_insert_epi16(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256i a, r, e;

  a = simde_mm256_set_epi16(INT16_C(-21602), INT16_C( 27671), INT16_C(-26514), INT16_C( 32139),
                            INT16_C( 27553), INT16_C(  3389), INT16_C( 26164), INT16_C(  3268),
                            INT16_C( -3948), INT16_C( 26700), INT16_C( 31313), INT16_C( 27327),
                            INT16_C(-25076), INT16_C( -6473), INT16_C(-27908), INT16_C(-18876));
  r = simde_mm256_insert_epi16(a, INT16_C(13157),  5);
  e = simde_mm256_set_epi16(INT16_C(-21602), INT16_C( 27671), INT16_C(-26514), INT16_C( 32139),
                            INT16_C( 27553), INT16_C(  3389), INT16_C( 26164), INT16_C(  3268),
                            INT16_C( -3948), INT16_C( 26700), INT16_C( 13157), INT16_C( 27327),
                            INT16_C(-25076), INT16_C( -6473), INT16_C(-27908), INT16_C(-18876));
  simde_assert_m256i_i16(r, ==, e);

  a = simde_mm256_set_epi16(INT16_C(-15739), INT16_C(-22477), INT16_C( 24105), INT16_C(  1501),
                            INT16_C(-14518), INT16_C( 18176), INT16_C( 14482), INT16_C( 20288),
                            INT16_C(-15586), INT16_C( 12200), INT16_C( -9527), INT16_C( -9462),
                            INT16_C(-20273), INT16_C(-22514), INT16_C(  1070), INT16_C(-15309));
  r = simde_mm256_insert_epi16(a, INT16_C(369),  3);
  e = simde_mm256_set_epi16(INT16_C(-15739), INT16_C(-22477), INT16_C( 24105), INT16_C(  1501),
                            INT16_C(-14518), INT16_C( 18176), INT16_C( 14482), INT16_C( 20288),
                            INT16_C(-15586), INT16_C( 12200), INT16_C( -9527), INT16_C( -9462),
                            INT16_C(   369), INT16_C(-22514), INT16_C(  1070), INT16_C(-15309));
  simde_assert_m256i_i16(r, ==, e);

  a = simde_mm256_set_epi16(INT16_C(-20578), INT16_C( 31339), INT16_C(-21867), INT16_C(  6148),
                            INT16_C(-32342), INT16_C(-12751), INT16_C(-22422), INT16_C( 12556),
                            INT16_C(-31526), INT16_C(-24860), INT16_C( 25156), INT16_C( -4916),
                            INT16_C(-20990), INT16_C(-13542), INT16_C(-25587), INT16_C(-20477));
  r = simde_mm256_insert_epi16(a, INT16_C(-32428),  1);
  e = simde_mm256_set_epi16(INT16_C(-20578), INT16_C( 31339), INT16_C(-21867), INT16_C(  6148),
                            INT16_C(-32342), INT16_C(-12751), INT16_C(-22422), INT16_C( 12556),
                            INT16_C(-31526), INT16_C(-24860), INT16_C( 25156), INT16_C( -4916),
                            INT16_C(-20990), INT16_C(-13542), INT16_C(-32428), INT16_C(-20477));
  simde_assert_m256i_i16(r, ==, e);

  a = simde_mm256_set_epi16(INT16_C( 16682), INT16_C(  9974), INT16_C( -6779), INT16_C(  1747),
                            INT16_C(-26827), INT16_C(-32182), INT16_C( 17867), INT16_C(-23355),
                            INT16_C( 15404), INT16_C( -2091), INT16_C(  -560), INT16_C(-24442),
                            INT16_C( 12274), INT16_C(-19942), INT16_C(  8401), INT16_C(-15722));
  r = simde_mm256_insert_epi16(a, INT16_C(16950), 12);
  e = simde_mm256_set_epi16(INT16_C( 16682), INT16_C(  9974), INT16_C( -6779), INT16_C( 16950),
                            INT16_C(-26827), INT16_C(-32182), INT16_C( 17867), INT16_C(-23355),
                            INT16_C( 15404), INT16_C( -2091), INT16_C(  -560), INT16_C(-24442),
                            INT16_C( 12274), INT16_C(-19942), INT16_C(  8401), INT16_C(-15722));
  simde_assert_m256i_i16(r, ==, e);

  a = simde_mm256_set_epi16(INT16_C( 31205), INT16_C( 25676), INT16_C(  7342), INT16_C(  4880),
                            INT16_C( -8533), INT16_C(-32080), INT16_C( -7595), INT16_C(-22500),
                            INT16_C(-10840), INT16_C( 19996), INT16_C( -4449), INT16_C(-31416),
                            INT16_C(-26476), INT16_C( -3822), INT16_C( 13156), INT16_C(-26200));
  r = simde_mm256_insert_epi16(a, INT16_C(27229), 15);
  e = simde_mm256_set_epi16(INT16_C( 27229), INT16_C( 25676), INT16_C(  7342), INT16_C(  4880),
                            INT16_C( -8533), INT16_C(-32080), INT16_C( -7595), INT16_C(-22500),
                            INT16_C(-10840), INT16_C( 19996), INT16_C( -4449), INT16_C(-31416),
                            INT16_C(-26476), INT16_C( -3822), INT16_C( 13156), INT16_C(-26200));
  simde_assert_m256i_i16(r, ==, e);

  a = simde_mm256_set_epi16(INT16_C( -8749), INT16_C(-27202), INT16_C(-11704), INT16_C(    52),
                            INT16_C(-10454), INT16_C( -3314), INT16_C( -8238), INT16_C(-18856),
                            INT16_C(  6163), INT16_C(-27363), INT16_C(  1816), INT16_C(-31045),
                            INT16_C( 28943), INT16_C(-22635), INT16_C(  1291), INT16_C(-31630));
  r = simde_mm256_insert_epi16(a, INT16_C(25149),  9);
  e = simde_mm256_set_epi16(INT16_C( -8749), INT16_C(-27202), INT16_C(-11704), INT16_C(    52),
                            INT16_C(-10454), INT16_C( -3314), INT16_C( 25149), INT16_C(-18856),
                            INT16_C(  6163), INT16_C(-27363), INT16_C(  1816), INT16_C(-31045),
                            INT16_C( 28943), INT16_C(-22635), INT16_C(  1291), INT16_C(-31630));
  simde_assert_m256i_i16(r, ==, e);

  a = simde_mm256_set_epi16(INT16_C( -5789), INT16_C( 32645), INT16_C(-25474), INT16_C( -6052),
                            INT16_C( 30501), INT16_C( 13572), INT16_C( 32362), INT16_C( 31220),
                            INT16_C( 21812), INT16_C( 21730), INT16_C(-10684), INT16_C(-29591),
                            INT16_C( 23321), INT16_C(  4014), INT16_C( 18929), INT16_C(  -646));
  r = simde_mm256_insert_epi16(a, INT16_C(10486), 11);
  e = simde_mm256_set_epi16(INT16_C( -5789), INT16_C( 32645), INT16_C(-25474), INT16_C( -6052),
                            INT16_C( 10486), INT16_C( 13572), INT16_C( 32362), INT16_C( 31220),
                            INT16_C( 21812), INT16_C( 21730), INT16_C(-10684), INT16_C(-29591),
                            INT16_C( 23321), INT16_C(  4014), INT16_C( 18929), INT16_C(  -646));
  simde_assert_m256i_i16(r, ==, e);

  a = simde_mm256_set_epi16(INT16_C( -2271), INT16_C(  8016), INT16_C( 26327), INT16_C( 27397),
                            INT16_C( 19036), INT16_C( 25193), INT16_C(-11253), INT16_C(-15734),
                            INT16_C(  -521), INT16_C( 20581), INT16_C(-18434), INT16_C(  4365),
                            INT16_C(-18143), INT16_C( 23566), INT16_C(-32412), INT16_C(-20606));
  r = simde_mm256_insert_epi16(a, INT16_C(4940),  6);
  e = simde_mm256_set_epi16(INT16_C( -2271), INT16_C(  8016), INT16_C( 26327), INT16_C( 27397),
                            INT16_C( 19036), INT16_C( 25193), INT16_C(-11253), INT16_C(-15734),
                            INT16_C(  -521), INT16_C(  4940), INT16_C(-18434), INT16_C(  4365),
                            INT16_C(-18143), INT16_C( 23566), INT16_C(-32412), INT16_C(-20606));
  simde_assert_m256i_i16(r, ==, e);

  return 0;
}

static int
test_simde_mm256_insert_epi32(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256i a, r, e;

  a = simde_mm256_set_epi32(INT32_C( 1527893980), INT32_C(  272236058), INT32_C( 1771532776), INT32_C( -527129145),
                            INT32_C(-1867900811), INT32_C( 1959964247), INT32_C( 1343894165), INT32_C( 1334695580));
  r = simde_mm256_insert_epi32(a, INT32_C( -707274869),  0);

  e = simde_mm256_set_epi32(INT32_C( 1527893980), INT32_C(  272236058), INT32_C( 1771532776), INT32_C( -527129145),
                            INT32_C(-1867900811), INT32_C( 1959964247), INT32_C( 1343894165), INT32_C( -707274869));
  simde_assert_m256i_i32(r, ==, e);

  a = simde_mm256_set_epi32(INT32_C( -661063243), INT32_C(-1710175048), INT32_C(  930876847), INT32_C(-1520459634),
                            INT32_C( -463408284), INT32_C( -440016671), INT32_C( 1321845686), INT32_C( 1111303375));
  r = simde_mm256_insert_epi32(a, INT32_C( 1750585714),  6);

  e = simde_mm256_set_epi32(INT32_C( -661063243), INT32_C( 1750585714), INT32_C(  930876847), INT32_C(-1520459634),
                            INT32_C( -463408284), INT32_C( -440016671), INT32_C( 1321845686), INT32_C( 1111303375));
  simde_assert_m256i_i32(r, ==, e);

  a = simde_mm256_set_epi32(INT32_C(  922514807), INT32_C(-1345830052), INT32_C(  578930278), INT32_C( -474805558),
                            INT32_C( 1483567706), INT32_C( 1465521628), INT32_C( 1619162073), INT32_C( -603337611));
  r = simde_mm256_insert_epi32(a, INT32_C( 1709940880),  5);

  e = simde_mm256_set_epi32(INT32_C(  922514807), INT32_C(-1345830052), INT32_C( 1709940880), INT32_C( -474805558),
                            INT32_C( 1483567706), INT32_C( 1465521628), INT32_C( 1619162073), INT32_C( -603337611));
  simde_assert_m256i_i32(r, ==, e);

  a = simde_mm256_set_epi32(INT32_C( 1682496014), INT32_C( -265998243), INT32_C( -696943616), INT32_C( -723203182),
                            INT32_C( 1593791374), INT32_C( -358344217), INT32_C(  813656782), INT32_C(   58704738));
  r = simde_mm256_insert_epi32(a, INT32_C( 1488485361),  5);

  e = simde_mm256_set_epi32(INT32_C( 1682496014), INT32_C( -265998243), INT32_C( 1488485361), INT32_C( -723203182),
                            INT32_C( 1593791374), INT32_C( -358344217), INT32_C(  813656782), INT32_C(   58704738));
  simde_assert_m256i_i32(r, ==, e);

  a = simde_mm256_set_epi32(INT32_C( 1847223436), INT32_C( -901858482), INT32_C( 1721924326), INT32_C(  291173023),
                            INT32_C(-1897007668), INT32_C(-1769936815), INT32_C(-1568319650), INT32_C(   -5176498));
  r = simde_mm256_insert_epi32(a, INT32_C(-1531789383),  1);

  e = simde_mm256_set_epi32(INT32_C( 1847223436), INT32_C( -901858482), INT32_C( 1721924326), INT32_C(  291173023),
                            INT32_C(-1897007668), INT32_C(-1769936815), INT32_C(-1531789383), INT32_C(   -5176498));
  simde_assert_m256i_i32(r, ==, e);

  a = simde_mm256_set_epi32(INT32_C(  350203051), INT32_C( -910749534), INT32_C(  265750572), INT32_C( 1641173073),
                            INT32_C( -538285717), INT32_C( -840003501), INT32_C( 1271510949), INT32_C(-1020673062));
  r = simde_mm256_insert_epi32(a, INT32_C( 1836561709),  3);

  e = simde_mm256_set_epi32(INT32_C(  350203051), INT32_C( -910749534), INT32_C(  265750572), INT32_C( 1641173073),
                            INT32_C( 1836561709), INT32_C( -840003501), INT32_C( 1271510949), INT32_C(-1020673062));
  simde_assert_m256i_i32(r, ==, e);

  a = simde_mm256_set_epi32(INT32_C(  547373175), INT32_C(-1592451622), INT32_C( 2039829911), INT32_C( -457221951),
                            INT32_C(-1618527211), INT32_C(-1978334272), INT32_C( 1765993380), INT32_C( 1580829317));
  r = simde_mm256_insert_epi32(a, INT32_C( 1732675568),  7);

  e = simde_mm256_set_epi32(INT32_C( 1732675568), INT32_C(-1592451622), INT32_C( 2039829911), INT32_C( -457221951),
                            INT32_C(-1618527211), INT32_C(-1978334272), INT32_C( 1765993380), INT32_C( 1580829317));
  simde_assert_m256i_i32(r, ==, e);

  a = simde_mm256_set_epi32(INT32_C( 1958144037), INT32_C(  549134406), INT32_C( -361892467), INT32_C( -550141532),
                            INT32_C(-2067091063), INT32_C( 1069015288), INT32_C(-1107603429), INT32_C( 1078737418));
  r = simde_mm256_insert_epi32(a, INT32_C(  896343144),  1);

  e = simde_mm256_set_epi32(INT32_C( 1958144037), INT32_C(  549134406), INT32_C( -361892467), INT32_C( -550141532),
                            INT32_C(-2067091063), INT32_C( 1069015288), INT32_C(  896343144), INT32_C( 1078737418));
  simde_assert_m256i_i32(r, ==, e);

  return 0;
}

static int
test_simde_mm256_insert_epi64(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256i a, r, e;

  a = simde_mm256_set_epi64x(INT64_C(-4505752726775834758), INT64_C( 5929367822849325121),
                             INT64_C( 7859909444158050752), INT64_C( 8378794972787494426));
  r = simde_mm256_insert_epi64(a, INT64_C( 3154696592882520417),  1);
  e = simde_mm256_set_epi64x(INT64_C(-4505752726775834758), INT64_C( 5929367822849325121),
                             INT64_C( 3154696592882520417), INT64_C( 8378794972787494426));
  simde_assert_m256i_i64(r, ==, e);

  a = simde_mm256_set_epi64x(INT64_C(  396574839952628801), INT64_C( 4240491953132221671),
                             INT64_C( -278319971650278791), INT64_C( 6491233263195366023));
  r = simde_mm256_insert_epi64(a, INT64_C(-3297610962929123976),  3);
  e = simde_mm256_set_epi64x(INT64_C(-3297610962929123976), INT64_C( 4240491953132221671),
                             INT64_C( -278319971650278791), INT64_C( 6491233263195366023));
  simde_assert_m256i_i64(r, ==, e);

  a = simde_mm256_set_epi64x(INT64_C(-1728252678477676185), INT64_C( 4096939565061250649),
                             INT64_C( -899841113114403992), INT64_C( 5786373883955623560));
  r = simde_mm256_insert_epi64(a, INT64_C(-3905247737278663189),  0);
  e = simde_mm256_set_epi64x(INT64_C(-1728252678477676185), INT64_C( 4096939565061250649),
                             INT64_C( -899841113114403992), INT64_C(-3905247737278663189));
  simde_assert_m256i_i64(r, ==, e);

  a = simde_mm256_set_epi64x(INT64_C(-7506903298948604025), INT64_C(-5148494998623595939),
                             INT64_C( 1486007124617083344), INT64_C( 4658534095800830357));
  r = simde_mm256_insert_epi64(a, INT64_C(-7835784393738508471),  1);
  e = simde_mm256_set_epi64x(INT64_C(-7506903298948604025), INT64_C(-5148494998623595939),
                             INT64_C(-7835784393738508471), INT64_C( 4658534095800830357));
  simde_assert_m256i_i64(r, ==, e);

  a = simde_mm256_set_epi64x(INT64_C(-2191440769984549047), INT64_C( 3908463436719096448),
                             INT64_C( 8526354170218817669), INT64_C(-3858780869273911597));
  r = simde_mm256_insert_epi64(a, INT64_C( 4403056273253937364),  3);
  e = simde_mm256_set_epi64x(INT64_C( 4403056273253937364), INT64_C( 3908463436719096448),
                             INT64_C( 8526354170218817669), INT64_C(-3858780869273911597));
  simde_assert_m256i_i64(r, ==, e);

  a = simde_mm256_set_epi64x(INT64_C( 6661310305483280859), INT64_C(-6601747037924714764),
                             INT64_C(-1630628469313698153), INT64_C( 1161250947816487188));
  r = simde_mm256_insert_epi64(a, INT64_C(-7130294008098064663),  1);
  e = simde_mm256_set_epi64x(INT64_C( 6661310305483280859), INT64_C(-6601747037924714764),
                             INT64_C(-7130294008098064663), INT64_C( 1161250947816487188));
  simde_assert_m256i_i64(r, ==, e);

  a = simde_mm256_set_epi64x(INT64_C(  782834070832404014), INT64_C(-2125384153009736171),
                             INT64_C(-8527778016730746462), INT64_C( 3421940001003476372));
  r = simde_mm256_insert_epi64(a, INT64_C( 3833121528156448342),  2);
  e = simde_mm256_set_epi64x(INT64_C(  782834070832404014), INT64_C( 3833121528156448342),
                             INT64_C(-8527778016730746462), INT64_C( 3421940001003476372));
  simde_assert_m256i_i64(r, ==, e);

  a = simde_mm256_set_epi64x(INT64_C( -656366593936952908), INT64_C(-1584520372107281742),
                             INT64_C( 8511510589800984870), INT64_C( 3076176925060453392));
  r = simde_mm256_insert_epi64(a, INT64_C( 7232381223726455225),  0);
  e = simde_mm256_set_epi64x(INT64_C( -656366593936952908), INT64_C(-1584520372107281742),
                             INT64_C( 8511510589800984870), INT64_C( 7232381223726455225));
  simde_assert_m256i_i64(r, ==, e);

  return 0;
}

static int
test_simde_mm256_insertf128_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m128 b;
    simde__m256 ra;
    simde__m256 rb;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   57.86), SIMDE_FLOAT32_C(  900.49),
                         SIMDE_FLOAT32_C(  678.15), SIMDE_FLOAT32_C( -551.43),
                         SIMDE_FLOAT32_C(  431.88), SIMDE_FLOAT32_C( -426.33),
                         SIMDE_FLOAT32_C( -705.72), SIMDE_FLOAT32_C(  809.23)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C( -625.43), SIMDE_FLOAT32_C( -829.28),
                         SIMDE_FLOAT32_C(  -42.04), SIMDE_FLOAT32_C( -643.64)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   57.86), SIMDE_FLOAT32_C(  900.49),
                         SIMDE_FLOAT32_C(  678.15), SIMDE_FLOAT32_C( -551.43),
                         SIMDE_FLOAT32_C( -625.43), SIMDE_FLOAT32_C( -829.28),
                         SIMDE_FLOAT32_C(  -42.04), SIMDE_FLOAT32_C( -643.64)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -625.43), SIMDE_FLOAT32_C( -829.28),
                         SIMDE_FLOAT32_C(  -42.04), SIMDE_FLOAT32_C( -643.64),
                         SIMDE_FLOAT32_C(  431.88), SIMDE_FLOAT32_C( -426.33),
                         SIMDE_FLOAT32_C( -705.72), SIMDE_FLOAT32_C(  809.23)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -837.79), SIMDE_FLOAT32_C(  338.83),
                         SIMDE_FLOAT32_C(  296.45), SIMDE_FLOAT32_C(  172.80),
                         SIMDE_FLOAT32_C(  220.09), SIMDE_FLOAT32_C(  171.14),
                         SIMDE_FLOAT32_C(  492.30), SIMDE_FLOAT32_C( -224.75)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C( -479.01), SIMDE_FLOAT32_C(  686.13),
                         SIMDE_FLOAT32_C( -518.69), SIMDE_FLOAT32_C( -606.38)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -837.79), SIMDE_FLOAT32_C(  338.83),
                         SIMDE_FLOAT32_C(  296.45), SIMDE_FLOAT32_C(  172.80),
                         SIMDE_FLOAT32_C( -479.01), SIMDE_FLOAT32_C(  686.13),
                         SIMDE_FLOAT32_C( -518.69), SIMDE_FLOAT32_C( -606.38)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -479.01), SIMDE_FLOAT32_C(  686.13),
                         SIMDE_FLOAT32_C( -518.69), SIMDE_FLOAT32_C( -606.38),
                         SIMDE_FLOAT32_C(  220.09), SIMDE_FLOAT32_C(  171.14),
                         SIMDE_FLOAT32_C(  492.30), SIMDE_FLOAT32_C( -224.75)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -665.08), SIMDE_FLOAT32_C( -599.25),
                         SIMDE_FLOAT32_C( -107.42), SIMDE_FLOAT32_C( -565.87),
                         SIMDE_FLOAT32_C( -588.15), SIMDE_FLOAT32_C(  906.13),
                         SIMDE_FLOAT32_C(  481.87), SIMDE_FLOAT32_C(  540.93)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C(  308.44), SIMDE_FLOAT32_C( -387.39),
                         SIMDE_FLOAT32_C(  312.59), SIMDE_FLOAT32_C( -811.76)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -665.08), SIMDE_FLOAT32_C( -599.25),
                         SIMDE_FLOAT32_C( -107.42), SIMDE_FLOAT32_C( -565.87),
                         SIMDE_FLOAT32_C(  308.44), SIMDE_FLOAT32_C( -387.39),
                         SIMDE_FLOAT32_C(  312.59), SIMDE_FLOAT32_C( -811.76)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  308.44), SIMDE_FLOAT32_C( -387.39),
                         SIMDE_FLOAT32_C(  312.59), SIMDE_FLOAT32_C( -811.76),
                         SIMDE_FLOAT32_C( -588.15), SIMDE_FLOAT32_C(  906.13),
                         SIMDE_FLOAT32_C(  481.87), SIMDE_FLOAT32_C(  540.93)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -499.97), SIMDE_FLOAT32_C( -474.63),
                         SIMDE_FLOAT32_C( -449.49), SIMDE_FLOAT32_C(  941.31),
                         SIMDE_FLOAT32_C( -102.84), SIMDE_FLOAT32_C( -165.66),
                         SIMDE_FLOAT32_C( -680.74), SIMDE_FLOAT32_C(   98.73)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C( -600.34), SIMDE_FLOAT32_C(  321.05),
                         SIMDE_FLOAT32_C(  438.78), SIMDE_FLOAT32_C(  -70.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -499.97), SIMDE_FLOAT32_C( -474.63),
                         SIMDE_FLOAT32_C( -449.49), SIMDE_FLOAT32_C(  941.31),
                         SIMDE_FLOAT32_C( -600.34), SIMDE_FLOAT32_C(  321.05),
                         SIMDE_FLOAT32_C(  438.78), SIMDE_FLOAT32_C(  -70.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -600.34), SIMDE_FLOAT32_C(  321.05),
                         SIMDE_FLOAT32_C(  438.78), SIMDE_FLOAT32_C(  -70.17),
                         SIMDE_FLOAT32_C( -102.84), SIMDE_FLOAT32_C( -165.66),
                         SIMDE_FLOAT32_C( -680.74), SIMDE_FLOAT32_C(   98.73)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.57), SIMDE_FLOAT32_C( -372.27),
                         SIMDE_FLOAT32_C( -839.54), SIMDE_FLOAT32_C(  507.35),
                         SIMDE_FLOAT32_C( -596.72), SIMDE_FLOAT32_C(  333.88),
                         SIMDE_FLOAT32_C( -839.21), SIMDE_FLOAT32_C( -624.72)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C( -109.04), SIMDE_FLOAT32_C( -997.63),
                         SIMDE_FLOAT32_C(  959.39), SIMDE_FLOAT32_C( -856.10)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.57), SIMDE_FLOAT32_C( -372.27),
                         SIMDE_FLOAT32_C( -839.54), SIMDE_FLOAT32_C(  507.35),
                         SIMDE_FLOAT32_C( -109.04), SIMDE_FLOAT32_C( -997.63),
                         SIMDE_FLOAT32_C(  959.39), SIMDE_FLOAT32_C( -856.10)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -109.04), SIMDE_FLOAT32_C( -997.63),
                         SIMDE_FLOAT32_C(  959.39), SIMDE_FLOAT32_C( -856.10),
                         SIMDE_FLOAT32_C( -596.72), SIMDE_FLOAT32_C(  333.88),
                         SIMDE_FLOAT32_C( -839.21), SIMDE_FLOAT32_C( -624.72)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -148.40), SIMDE_FLOAT32_C( -137.24),
                         SIMDE_FLOAT32_C(  665.88), SIMDE_FLOAT32_C( -239.38),
                         SIMDE_FLOAT32_C(  864.82), SIMDE_FLOAT32_C(  415.07),
                         SIMDE_FLOAT32_C(  223.96), SIMDE_FLOAT32_C(  144.96)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C(  -29.35), SIMDE_FLOAT32_C( -415.61),
                         SIMDE_FLOAT32_C(  231.08), SIMDE_FLOAT32_C( -375.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -148.40), SIMDE_FLOAT32_C( -137.24),
                         SIMDE_FLOAT32_C(  665.88), SIMDE_FLOAT32_C( -239.38),
                         SIMDE_FLOAT32_C(  -29.35), SIMDE_FLOAT32_C( -415.61),
                         SIMDE_FLOAT32_C(  231.08), SIMDE_FLOAT32_C( -375.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -29.35), SIMDE_FLOAT32_C( -415.61),
                         SIMDE_FLOAT32_C(  231.08), SIMDE_FLOAT32_C( -375.28),
                         SIMDE_FLOAT32_C(  864.82), SIMDE_FLOAT32_C(  415.07),
                         SIMDE_FLOAT32_C(  223.96), SIMDE_FLOAT32_C(  144.96)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  384.21), SIMDE_FLOAT32_C( -172.99),
                         SIMDE_FLOAT32_C( -651.42), SIMDE_FLOAT32_C(  104.60),
                         SIMDE_FLOAT32_C( -412.61), SIMDE_FLOAT32_C( -685.74),
                         SIMDE_FLOAT32_C(  349.45), SIMDE_FLOAT32_C(  431.71)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C(  810.19), SIMDE_FLOAT32_C(   94.73),
                         SIMDE_FLOAT32_C(  542.66), SIMDE_FLOAT32_C(  824.78)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  384.21), SIMDE_FLOAT32_C( -172.99),
                         SIMDE_FLOAT32_C( -651.42), SIMDE_FLOAT32_C(  104.60),
                         SIMDE_FLOAT32_C(  810.19), SIMDE_FLOAT32_C(   94.73),
                         SIMDE_FLOAT32_C(  542.66), SIMDE_FLOAT32_C(  824.78)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  810.19), SIMDE_FLOAT32_C(   94.73),
                         SIMDE_FLOAT32_C(  542.66), SIMDE_FLOAT32_C(  824.78),
                         SIMDE_FLOAT32_C( -412.61), SIMDE_FLOAT32_C( -685.74),
                         SIMDE_FLOAT32_C(  349.45), SIMDE_FLOAT32_C(  431.71)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  117.02), SIMDE_FLOAT32_C(  874.06),
                         SIMDE_FLOAT32_C( -896.71), SIMDE_FLOAT32_C(  927.83),
                         SIMDE_FLOAT32_C( -471.09), SIMDE_FLOAT32_C(  907.26),
                         SIMDE_FLOAT32_C(  774.08), SIMDE_FLOAT32_C(  141.60)),
      simde_mm_set_ps   (SIMDE_FLOAT32_C(   69.32), SIMDE_FLOAT32_C(  645.62),
                         SIMDE_FLOAT32_C(  860.89), SIMDE_FLOAT32_C(  694.26)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  117.02), SIMDE_FLOAT32_C(  874.06),
                         SIMDE_FLOAT32_C( -896.71), SIMDE_FLOAT32_C(  927.83),
                         SIMDE_FLOAT32_C(   69.32), SIMDE_FLOAT32_C(  645.62),
                         SIMDE_FLOAT32_C(  860.89), SIMDE_FLOAT32_C(  694.26)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   69.32), SIMDE_FLOAT32_C(  645.62),
                         SIMDE_FLOAT32_C(  860.89), SIMDE_FLOAT32_C(  694.26),
                         SIMDE_FLOAT32_C( -471.09), SIMDE_FLOAT32_C(  907.26),
                         SIMDE_FLOAT32_C(  774.08), SIMDE_FLOAT32_C(  141.60)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 ra = simde_mm256_insertf128_ps(test_vec[i].a, test_vec[i].b, 0);
    simde__m256 rb = simde_mm256_insertf128_ps(test_vec[i].a, test_vec[i].b, 1);
    simde_assert_m256_close(ra, test_vec[i].ra, 1);
    simde_assert_m256_close(rb, test_vec[i].rb, 1);
  }

  return 0;
}

static int
test_simde_mm256_insertf128_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m128d b;
    simde__m256d ra;
    simde__m256d rb;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  500.07), SIMDE_FLOAT64_C(   24.20),
                         SIMDE_FLOAT64_C( -264.31), SIMDE_FLOAT64_C(  584.01)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C(  431.47), SIMDE_FLOAT64_C(  318.12)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  500.07), SIMDE_FLOAT64_C(   24.20),
                         SIMDE_FLOAT64_C(  431.47), SIMDE_FLOAT64_C(  318.12)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  431.47), SIMDE_FLOAT64_C(  318.12),
                         SIMDE_FLOAT64_C( -264.31), SIMDE_FLOAT64_C(  584.01)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  669.13), SIMDE_FLOAT64_C( -378.72),
                         SIMDE_FLOAT64_C( -204.56), SIMDE_FLOAT64_C(  289.88)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C(  609.30), SIMDE_FLOAT64_C(  491.95)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  669.13), SIMDE_FLOAT64_C( -378.72),
                         SIMDE_FLOAT64_C(  609.30), SIMDE_FLOAT64_C(  491.95)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  609.30), SIMDE_FLOAT64_C(  491.95),
                         SIMDE_FLOAT64_C( -204.56), SIMDE_FLOAT64_C(  289.88)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -439.47), SIMDE_FLOAT64_C(  501.94),
                         SIMDE_FLOAT64_C( -311.14), SIMDE_FLOAT64_C( -486.50)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C(  460.51), SIMDE_FLOAT64_C(  800.13)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -439.47), SIMDE_FLOAT64_C(  501.94),
                         SIMDE_FLOAT64_C(  460.51), SIMDE_FLOAT64_C(  800.13)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  460.51), SIMDE_FLOAT64_C(  800.13),
                         SIMDE_FLOAT64_C( -311.14), SIMDE_FLOAT64_C( -486.50)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -828.83), SIMDE_FLOAT64_C(  892.34),
                         SIMDE_FLOAT64_C(  849.35), SIMDE_FLOAT64_C(   71.26)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C(  690.69), SIMDE_FLOAT64_C( -666.59)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -828.83), SIMDE_FLOAT64_C(  892.34),
                         SIMDE_FLOAT64_C(  690.69), SIMDE_FLOAT64_C( -666.59)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  690.69), SIMDE_FLOAT64_C( -666.59),
                         SIMDE_FLOAT64_C(  849.35), SIMDE_FLOAT64_C(   71.26)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -393.25), SIMDE_FLOAT64_C( -143.78),
                         SIMDE_FLOAT64_C(  452.34), SIMDE_FLOAT64_C(  313.17)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C(    2.43), SIMDE_FLOAT64_C( -405.20)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -393.25), SIMDE_FLOAT64_C( -143.78),
                         SIMDE_FLOAT64_C(    2.43), SIMDE_FLOAT64_C( -405.20)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    2.43), SIMDE_FLOAT64_C( -405.20),
                         SIMDE_FLOAT64_C(  452.34), SIMDE_FLOAT64_C(  313.17)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -19.72), SIMDE_FLOAT64_C( -282.65),
                         SIMDE_FLOAT64_C( -261.63), SIMDE_FLOAT64_C( -641.13)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C(  818.65), SIMDE_FLOAT64_C( -240.18)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -19.72), SIMDE_FLOAT64_C( -282.65),
                         SIMDE_FLOAT64_C(  818.65), SIMDE_FLOAT64_C( -240.18)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  818.65), SIMDE_FLOAT64_C( -240.18),
                         SIMDE_FLOAT64_C( -261.63), SIMDE_FLOAT64_C( -641.13)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  809.87), SIMDE_FLOAT64_C(  692.31),
                         SIMDE_FLOAT64_C(  848.43), SIMDE_FLOAT64_C( -514.36)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C( -330.16), SIMDE_FLOAT64_C(  670.26)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  809.87), SIMDE_FLOAT64_C(  692.31),
                         SIMDE_FLOAT64_C( -330.16), SIMDE_FLOAT64_C(  670.26)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -330.16), SIMDE_FLOAT64_C(  670.26),
                         SIMDE_FLOAT64_C(  848.43), SIMDE_FLOAT64_C( -514.36)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -917.99), SIMDE_FLOAT64_C(  637.33),
                         SIMDE_FLOAT64_C(  143.49), SIMDE_FLOAT64_C(  390.85)),
      simde_mm_set_pd   (SIMDE_FLOAT64_C( -606.83), SIMDE_FLOAT64_C(  948.25)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -917.99), SIMDE_FLOAT64_C(  637.33),
                         SIMDE_FLOAT64_C( -606.83), SIMDE_FLOAT64_C(  948.25)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -606.83), SIMDE_FLOAT64_C(  948.25),
                         SIMDE_FLOAT64_C(  143.49), SIMDE_FLOAT64_C(  390.85)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d ra = simde_mm256_insertf128_pd(test_vec[i].a, test_vec[i].b, 0);
    simde__m256d rb = simde_mm256_insertf128_pd(test_vec[i].a, test_vec[i].b, 1);
    simde_assert_m256d_close(ra, test_vec[i].ra, 1);
    simde_assert_m256d_close(rb, test_vec[i].rb, 1);
  }

  return 0;
}

static int
test_simde_mm256_insertf128_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m128i b;
    simde__m256i ra;
    simde__m256i rb;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C( 1732788931), INT32_C( -493919285), INT32_C( -171391193), INT32_C( 1397412103),
                            INT32_C( -356536147), INT32_C(-1692932708), INT32_C(-1699348696), INT32_C( -647395099)),
      simde_mm_set_epi32   (INT32_C(-1522680411), INT32_C(-1731979321), INT32_C( 1240335413), INT32_C(  201854332)),
      simde_mm256_set_epi32(INT32_C( 1732788931), INT32_C( -493919285), INT32_C( -171391193), INT32_C( 1397412103),
                            INT32_C(-1522680411), INT32_C(-1731979321), INT32_C( 1240335413), INT32_C(  201854332)),
      simde_mm256_set_epi32(INT32_C(-1522680411), INT32_C(-1731979321), INT32_C( 1240335413), INT32_C(  201854332),
                            INT32_C( -356536147), INT32_C(-1692932708), INT32_C(-1699348696), INT32_C( -647395099)) },
    { simde_mm256_set_epi32(INT32_C(-1444875329), INT32_C( 1610023191), INT32_C( -708588022), INT32_C( -172947680),
                            INT32_C(  545675582), INT32_C( 1925063203), INT32_C(  200249152), INT32_C(  925361522)),
      simde_mm_set_epi32   (INT32_C( -719778838), INT32_C( -908663617), INT32_C(-1043096582), INT32_C( 2027106265)),
      simde_mm256_set_epi32(INT32_C(-1444875329), INT32_C( 1610023191), INT32_C( -708588022), INT32_C( -172947680),
                            INT32_C( -719778838), INT32_C( -908663617), INT32_C(-1043096582), INT32_C( 2027106265)),
      simde_mm256_set_epi32(INT32_C( -719778838), INT32_C( -908663617), INT32_C(-1043096582), INT32_C( 2027106265),
                            INT32_C(  545675582), INT32_C( 1925063203), INT32_C(  200249152), INT32_C(  925361522)) },
    { simde_mm256_set_epi32(INT32_C(  819255641), INT32_C(  758383634), INT32_C( -712717178), INT32_C( 1831898363),
                            INT32_C( -652589148), INT32_C(  437505059), INT32_C(-1426201125), INT32_C(  915542579)),
      simde_mm_set_epi32   (INT32_C( 1102980249), INT32_C( 1568821342), INT32_C( 1031497605), INT32_C( 1535564672)),
      simde_mm256_set_epi32(INT32_C(  819255641), INT32_C(  758383634), INT32_C( -712717178), INT32_C( 1831898363),
                            INT32_C( 1102980249), INT32_C( 1568821342), INT32_C( 1031497605), INT32_C( 1535564672)),
      simde_mm256_set_epi32(INT32_C( 1102980249), INT32_C( 1568821342), INT32_C( 1031497605), INT32_C( 1535564672),
                            INT32_C( -652589148), INT32_C(  437505059), INT32_C(-1426201125), INT32_C(  915542579)) },
    { simde_mm256_set_epi32(INT32_C(-1576300711), INT32_C(  804080573), INT32_C(-1947930635), INT32_C( -773073118),
                            INT32_C( -708044343), INT32_C( 1025803241), INT32_C(-1542400953), INT32_C( 1513652867)),
      simde_mm_set_epi32   (INT32_C( -630903986), INT32_C( 1210274072), INT32_C(-1479627472), INT32_C( 1540958491)),
      simde_mm256_set_epi32(INT32_C(-1576300711), INT32_C(  804080573), INT32_C(-1947930635), INT32_C( -773073118),
                            INT32_C( -630903986), INT32_C( 1210274072), INT32_C(-1479627472), INT32_C( 1540958491)),
      simde_mm256_set_epi32(INT32_C( -630903986), INT32_C( 1210274072), INT32_C(-1479627472), INT32_C( 1540958491),
                            INT32_C( -708044343), INT32_C( 1025803241), INT32_C(-1542400953), INT32_C( 1513652867)) },
    { simde_mm256_set_epi32(INT32_C(-1474400259), INT32_C( 1988182849), INT32_C(-1345043070), INT32_C(-2043590369),
                            INT32_C( -792511350), INT32_C(-1919476039), INT32_C( -711077027), INT32_C(-1924737697)),
      simde_mm_set_epi32   (INT32_C(-1229154872), INT32_C( 1506932355), INT32_C(  529233496), INT32_C(  900061932)),
      simde_mm256_set_epi32(INT32_C(-1474400259), INT32_C( 1988182849), INT32_C(-1345043070), INT32_C(-2043590369),
                            INT32_C(-1229154872), INT32_C( 1506932355), INT32_C(  529233496), INT32_C(  900061932)),
      simde_mm256_set_epi32(INT32_C(-1229154872), INT32_C( 1506932355), INT32_C(  529233496), INT32_C(  900061932),
                            INT32_C( -792511350), INT32_C(-1919476039), INT32_C( -711077027), INT32_C(-1924737697)) },
    { simde_mm256_set_epi32(INT32_C( 1011012252), INT32_C(-1383487313), INT32_C( -799281089), INT32_C(-1421799289),
                            INT32_C(-1020863292), INT32_C( -870274327), INT32_C(  767506840), INT32_C(  905532467)),
      simde_mm_set_epi32   (INT32_C(  535053718), INT32_C( 1571414305), INT32_C(  327456521), INT32_C(  562021450)),
      simde_mm256_set_epi32(INT32_C( 1011012252), INT32_C(-1383487313), INT32_C( -799281089), INT32_C(-1421799289),
                            INT32_C(  535053718), INT32_C( 1571414305), INT32_C(  327456521), INT32_C(  562021450)),
      simde_mm256_set_epi32(INT32_C(  535053718), INT32_C( 1571414305), INT32_C(  327456521), INT32_C(  562021450),
                            INT32_C(-1020863292), INT32_C( -870274327), INT32_C(  767506840), INT32_C(  905532467)) },
    { simde_mm256_set_epi32(INT32_C(-1892816233), INT32_C( -258025342), INT32_C(-1474147149), INT32_C(  200557748),
                            INT32_C(  863187861), INT32_C( 1974870245), INT32_C( 1114174400), INT32_C( -122006961)),
      simde_mm_set_epi32   (INT32_C( 1960728456), INT32_C(-1615388317), INT32_C(  728614642), INT32_C(  181559353)),
      simde_mm256_set_epi32(INT32_C(-1892816233), INT32_C( -258025342), INT32_C(-1474147149), INT32_C(  200557748),
                            INT32_C( 1960728456), INT32_C(-1615388317), INT32_C(  728614642), INT32_C(  181559353)),
      simde_mm256_set_epi32(INT32_C( 1960728456), INT32_C(-1615388317), INT32_C(  728614642), INT32_C(  181559353),
                            INT32_C(  863187861), INT32_C( 1974870245), INT32_C( 1114174400), INT32_C( -122006961)) },
    { simde_mm256_set_epi32(INT32_C( -172940012), INT32_C(-1274554211), INT32_C( -855665209), INT32_C(  935611457),
                            INT32_C( -592164168), INT32_C(  945068232), INT32_C(  755470781), INT32_C(-1762512447)),
      simde_mm_set_epi32   (INT32_C(-1172491108), INT32_C(-1413112125), INT32_C(   65588240), INT32_C(-1859214337)),
      simde_mm256_set_epi32(INT32_C( -172940012), INT32_C(-1274554211), INT32_C( -855665209), INT32_C(  935611457),
                            INT32_C(-1172491108), INT32_C(-1413112125), INT32_C(   65588240), INT32_C(-1859214337)),
      simde_mm256_set_epi32(INT32_C(-1172491108), INT32_C(-1413112125), INT32_C(   65588240), INT32_C(-1859214337),
                            INT32_C( -592164168), INT32_C(  945068232), INT32_C(  755470781), INT32_C(-1762512447)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i ra = simde_mm256_insertf128_si256(test_vec[i].a, test_vec[i].b, 0);
    simde__m256i rb = simde_mm256_insertf128_si256(test_vec[i].a, test_vec[i].b, 1);
    simde_assert_m256i_i32(ra, ==, test_vec[i].ra);
    simde_assert_m256i_i32(rb, ==, test_vec[i].rb);
  }

  return 0;
}

static int
test_simde_mm256_lddqu_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C( -208613396), INT32_C(  972060947), INT32_C( 1079690819), INT32_C(-1629141358),
                            INT32_C( -291568998), INT32_C( -706346303), INT32_C( 1782265269), INT32_C(  663843445)),
      simde_mm256_set_epi32(INT32_C( -208613396), INT32_C(  972060947), INT32_C( 1079690819), INT32_C(-1629141358),
                            INT32_C( -291568998), INT32_C( -706346303), INT32_C( 1782265269), INT32_C(  663843445)) },
    { simde_mm256_set_epi32(INT32_C( -542385526), INT32_C(-1915647746), INT32_C(  251129882), INT32_C(  290247368),
                            INT32_C(  363399145), INT32_C(  688121978), INT32_C(  600807845), INT32_C( 1456401224)),
      simde_mm256_set_epi32(INT32_C( -542385526), INT32_C(-1915647746), INT32_C(  251129882), INT32_C(  290247368),
                            INT32_C(  363399145), INT32_C(  688121978), INT32_C(  600807845), INT32_C( 1456401224)) },
    { simde_mm256_set_epi32(INT32_C(  862880243), INT32_C(  961555167), INT32_C( -704902562), INT32_C(-2017515450),
                            INT32_C(-1906482322), INT32_C(-1699379933), INT32_C( 1894527886), INT32_C( 2049947519)),
      simde_mm256_set_epi32(INT32_C(  862880243), INT32_C(  961555167), INT32_C( -704902562), INT32_C(-2017515450),
                            INT32_C(-1906482322), INT32_C(-1699379933), INT32_C( 1894527886), INT32_C( 2049947519)) },
    { simde_mm256_set_epi32(INT32_C( 1564827830), INT32_C( -831950379), INT32_C(  815117120), INT32_C( -372364589),
                            INT32_C(-1095370522), INT32_C( 1608512554), INT32_C( 1210942744), INT32_C(  816264608)),
      simde_mm256_set_epi32(INT32_C( 1564827830), INT32_C( -831950379), INT32_C(  815117120), INT32_C( -372364589),
                            INT32_C(-1095370522), INT32_C( 1608512554), INT32_C( 1210942744), INT32_C(  816264608)) },
    { simde_mm256_set_epi32(INT32_C( 1014835213), INT32_C(  419509758), INT32_C( -940172407), INT32_C( 2075423717),
                            INT32_C( -958302313), INT32_C( 2056263130), INT32_C( -179845947), INT32_C( -487391602)),
      simde_mm256_set_epi32(INT32_C( 1014835213), INT32_C(  419509758), INT32_C( -940172407), INT32_C( 2075423717),
                            INT32_C( -958302313), INT32_C( 2056263130), INT32_C( -179845947), INT32_C( -487391602)) },
    { simde_mm256_set_epi32(INT32_C(  750230136), INT32_C(  830844077), INT32_C( 1366738463), INT32_C( 1719449608),
                            INT32_C(  953227083), INT32_C( -624601508), INT32_C( -983006206), INT32_C( 1138640848)),
      simde_mm256_set_epi32(INT32_C(  750230136), INT32_C(  830844077), INT32_C( 1366738463), INT32_C( 1719449608),
                            INT32_C(  953227083), INT32_C( -624601508), INT32_C( -983006206), INT32_C( 1138640848)) },
    { simde_mm256_set_epi32(INT32_C(-2045061394), INT32_C( -759814821), INT32_C( 1064937743), INT32_C(-1124388611),
                            INT32_C( -168818003), INT32_C( -757055903), INT32_C(-1606176919), INT32_C(  254467933)),
      simde_mm256_set_epi32(INT32_C(-2045061394), INT32_C( -759814821), INT32_C( 1064937743), INT32_C(-1124388611),
                            INT32_C( -168818003), INT32_C( -757055903), INT32_C(-1606176919), INT32_C(  254467933)) },
    { simde_mm256_set_epi32(INT32_C(-1387663431), INT32_C( 2083885974), INT32_C(  524830617), INT32_C( 1548734942),
                            INT32_C( 1378860315), INT32_C(-1149727640), INT32_C( 1373643603), INT32_C(  772353923)),
      simde_mm256_set_epi32(INT32_C(-1387663431), INT32_C( 2083885974), INT32_C(  524830617), INT32_C( 1548734942),
                            INT32_C( 1378860315), INT32_C(-1149727640), INT32_C( 1373643603), INT32_C(  772353923)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_lddqu_si256(&(test_vec[i].a));
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_load_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float64 a[sizeof(simde__m256d) / sizeof(simde_float64)];
    simde__m256d r;
  } test_vec[8] = {
    { { SIMDE_FLOAT64_C( -338.67), SIMDE_FLOAT64_C(  630.84), SIMDE_FLOAT64_C( -302.19), SIMDE_FLOAT64_C( -238.77) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -238.77), SIMDE_FLOAT64_C( -302.19),
                         SIMDE_FLOAT64_C(  630.84), SIMDE_FLOAT64_C( -338.67)) },
    { { SIMDE_FLOAT64_C(  725.41), SIMDE_FLOAT64_C( -787.32), SIMDE_FLOAT64_C( -819.45), SIMDE_FLOAT64_C(  657.50) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  657.50), SIMDE_FLOAT64_C( -819.45),
                         SIMDE_FLOAT64_C( -787.32), SIMDE_FLOAT64_C(  725.41)) },
    { { SIMDE_FLOAT64_C( -519.61), SIMDE_FLOAT64_C(  692.74), SIMDE_FLOAT64_C(   96.96), SIMDE_FLOAT64_C(  -63.30) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -63.30), SIMDE_FLOAT64_C(   96.96),
                         SIMDE_FLOAT64_C(  692.74), SIMDE_FLOAT64_C( -519.61)) },
    { { SIMDE_FLOAT64_C(  577.54), SIMDE_FLOAT64_C( -524.47), SIMDE_FLOAT64_C( -254.05), SIMDE_FLOAT64_C(  614.55) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  614.55), SIMDE_FLOAT64_C( -254.05),
                         SIMDE_FLOAT64_C( -524.47), SIMDE_FLOAT64_C(  577.54)) },
    { { SIMDE_FLOAT64_C( -608.94), SIMDE_FLOAT64_C(  345.46), SIMDE_FLOAT64_C( -476.81), SIMDE_FLOAT64_C( -532.19) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -532.19), SIMDE_FLOAT64_C( -476.81),
                         SIMDE_FLOAT64_C(  345.46), SIMDE_FLOAT64_C( -608.94)) },
    { { SIMDE_FLOAT64_C(   96.64), SIMDE_FLOAT64_C( -218.82), SIMDE_FLOAT64_C( -345.29), SIMDE_FLOAT64_C( -716.59) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -716.59), SIMDE_FLOAT64_C( -345.29),
                         SIMDE_FLOAT64_C( -218.82), SIMDE_FLOAT64_C(   96.64)) },
    { { SIMDE_FLOAT64_C(  896.80), SIMDE_FLOAT64_C( -999.47), SIMDE_FLOAT64_C(  692.69), SIMDE_FLOAT64_C(   75.34) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   75.34), SIMDE_FLOAT64_C(  692.69),
                         SIMDE_FLOAT64_C( -999.47), SIMDE_FLOAT64_C(  896.80)) },
    { { SIMDE_FLOAT64_C( -936.41), SIMDE_FLOAT64_C(  832.42), SIMDE_FLOAT64_C(  861.03), SIMDE_FLOAT64_C( -909.25) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -909.25), SIMDE_FLOAT64_C(  861.03),
                         SIMDE_FLOAT64_C(  832.42), SIMDE_FLOAT64_C( -936.41)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_load_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_load_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    SIMDE_ALIGN_LIKE_32(simde__m256) simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)];
    simde__m256 r;
  } test_vec[8] = {
    { { SIMDE_FLOAT32_C(  -651.15), SIMDE_FLOAT32_C(   486.09),
        SIMDE_FLOAT32_C(   809.52), SIMDE_FLOAT32_C(   897.18),
        SIMDE_FLOAT32_C(  -164.76), SIMDE_FLOAT32_C(   925.08),
        SIMDE_FLOAT32_C(  -141.17), SIMDE_FLOAT32_C(   524.77) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   524.77), SIMDE_FLOAT32_C(  -141.17),
                         SIMDE_FLOAT32_C(   925.08), SIMDE_FLOAT32_C(  -164.76),
                         SIMDE_FLOAT32_C(   897.18), SIMDE_FLOAT32_C(   809.52),
                         SIMDE_FLOAT32_C(   486.09), SIMDE_FLOAT32_C(  -651.15)) },
    { { SIMDE_FLOAT32_C(   154.61), SIMDE_FLOAT32_C(  -436.96),
        SIMDE_FLOAT32_C(  -109.54), SIMDE_FLOAT32_C(  -422.39),
        SIMDE_FLOAT32_C(  -113.81), SIMDE_FLOAT32_C(  -740.60),
        SIMDE_FLOAT32_C(  -581.05), SIMDE_FLOAT32_C(   534.88) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   534.88), SIMDE_FLOAT32_C(  -581.05),
                         SIMDE_FLOAT32_C(  -740.60), SIMDE_FLOAT32_C(  -113.81),
                         SIMDE_FLOAT32_C(  -422.39), SIMDE_FLOAT32_C(  -109.54),
                         SIMDE_FLOAT32_C(  -436.96), SIMDE_FLOAT32_C(   154.61)) },
    { { SIMDE_FLOAT32_C(   689.49), SIMDE_FLOAT32_C(  -831.99),
        SIMDE_FLOAT32_C(   872.86), SIMDE_FLOAT32_C(   554.28),
        SIMDE_FLOAT32_C(   799.73), SIMDE_FLOAT32_C(  -331.18),
        SIMDE_FLOAT32_C(   338.85), SIMDE_FLOAT32_C(   425.19) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   425.19), SIMDE_FLOAT32_C(   338.85),
                         SIMDE_FLOAT32_C(  -331.18), SIMDE_FLOAT32_C(   799.73),
                         SIMDE_FLOAT32_C(   554.28), SIMDE_FLOAT32_C(   872.86),
                         SIMDE_FLOAT32_C(  -831.99), SIMDE_FLOAT32_C(   689.49)) },
    { { SIMDE_FLOAT32_C(    22.85), SIMDE_FLOAT32_C(  -436.81),
        SIMDE_FLOAT32_C(   473.32), SIMDE_FLOAT32_C(   132.51),
        SIMDE_FLOAT32_C(  -295.42), SIMDE_FLOAT32_C(    74.04),
        SIMDE_FLOAT32_C(   445.74), SIMDE_FLOAT32_C(   574.68) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   574.68), SIMDE_FLOAT32_C(   445.74),
                         SIMDE_FLOAT32_C(    74.04), SIMDE_FLOAT32_C(  -295.42),
                         SIMDE_FLOAT32_C(   132.51), SIMDE_FLOAT32_C(   473.32),
                         SIMDE_FLOAT32_C(  -436.81), SIMDE_FLOAT32_C(    22.85)) },
    { { SIMDE_FLOAT32_C(   105.79), SIMDE_FLOAT32_C(   -21.01),
        SIMDE_FLOAT32_C(  -754.65), SIMDE_FLOAT32_C(  -355.76),
        SIMDE_FLOAT32_C(   716.76), SIMDE_FLOAT32_C(  -141.32),
        SIMDE_FLOAT32_C(   300.83), SIMDE_FLOAT32_C(   -21.61) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -21.61), SIMDE_FLOAT32_C(   300.83),
                         SIMDE_FLOAT32_C(  -141.32), SIMDE_FLOAT32_C(   716.76),
                         SIMDE_FLOAT32_C(  -355.76), SIMDE_FLOAT32_C(  -754.65),
                         SIMDE_FLOAT32_C(   -21.01), SIMDE_FLOAT32_C(   105.79)) },
    { { SIMDE_FLOAT32_C(  -421.92), SIMDE_FLOAT32_C(   236.64),
        SIMDE_FLOAT32_C(  -349.60), SIMDE_FLOAT32_C(   710.87),
        SIMDE_FLOAT32_C(  -664.65), SIMDE_FLOAT32_C(    50.17),
        SIMDE_FLOAT32_C(    82.89), SIMDE_FLOAT32_C(  -240.57) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -240.57), SIMDE_FLOAT32_C(    82.89),
                         SIMDE_FLOAT32_C(    50.17), SIMDE_FLOAT32_C(  -664.65),
                         SIMDE_FLOAT32_C(   710.87), SIMDE_FLOAT32_C(  -349.60),
                         SIMDE_FLOAT32_C(   236.64), SIMDE_FLOAT32_C(  -421.92)) },
    { { SIMDE_FLOAT32_C(    68.48), SIMDE_FLOAT32_C(   518.42),
        SIMDE_FLOAT32_C(   968.06), SIMDE_FLOAT32_C(  -197.34),
        SIMDE_FLOAT32_C(   351.10), SIMDE_FLOAT32_C(   113.17),
        SIMDE_FLOAT32_C(   713.12), SIMDE_FLOAT32_C(  -462.23) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -462.23), SIMDE_FLOAT32_C(   713.12),
                         SIMDE_FLOAT32_C(   113.17), SIMDE_FLOAT32_C(   351.10),
                         SIMDE_FLOAT32_C(  -197.34), SIMDE_FLOAT32_C(   968.06),
                         SIMDE_FLOAT32_C(   518.42), SIMDE_FLOAT32_C(    68.48)) },
    { { SIMDE_FLOAT32_C(  -676.83), SIMDE_FLOAT32_C(   745.78),
        SIMDE_FLOAT32_C(  -436.07), SIMDE_FLOAT32_C(   808.02),
        SIMDE_FLOAT32_C(   901.47), SIMDE_FLOAT32_C(  -652.23),
        SIMDE_FLOAT32_C(  -649.97), SIMDE_FLOAT32_C(  -289.44) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -289.44), SIMDE_FLOAT32_C(  -649.97),
                         SIMDE_FLOAT32_C(  -652.23), SIMDE_FLOAT32_C(   901.47),
                         SIMDE_FLOAT32_C(   808.02), SIMDE_FLOAT32_C(  -436.07),
                         SIMDE_FLOAT32_C(   745.78), SIMDE_FLOAT32_C(  -676.83)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_load_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_load_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C(   93433077), INT32_C(  912488615), INT32_C( -849505573), INT32_C( -538760324),
                            INT32_C(  576018808), INT32_C(  306399285), INT32_C(  761465198), INT32_C(   67322681)),
      simde_mm256_set_epi32(INT32_C(   93433077), INT32_C(  912488615), INT32_C( -849505573), INT32_C( -538760324),
                            INT32_C(  576018808), INT32_C(  306399285), INT32_C(  761465198), INT32_C(   67322681)) },
    { simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C(  769308925), INT32_C( -545741767),
                            INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)),
      simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C(  769308925), INT32_C( -545741767),
                            INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)) },
    { simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027),
                            INT32_C( 1112848703), INT32_C(  757887555), INT32_C( -808479029), INT32_C( 1524821649)),
      simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027),
                            INT32_C( 1112848703), INT32_C(  757887555), INT32_C( -808479029), INT32_C( 1524821649)) },
    { simde_mm256_set_epi32(INT32_C(  419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302),
                            INT32_C( 1608176387), INT32_C(  174748447), INT32_C(-1944132629), INT32_C(-1618941327)),
      simde_mm256_set_epi32(INT32_C(  419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302),
                            INT32_C( 1608176387), INT32_C(  174748447), INT32_C(-1944132629), INT32_C(-1618941327)) },
    { simde_mm256_set_epi32(INT32_C(  133578927), INT32_C(  -89176331), INT32_C(  533976318), INT32_C(  686005880),
                            INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)),
      simde_mm256_set_epi32(INT32_C(  133578927), INT32_C(  -89176331), INT32_C(  533976318), INT32_C(  686005880),
                            INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)) },
    { simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204),
                            INT32_C( 1571538617), INT32_C(  392630938), INT32_C(   44925707), INT32_C(-1288122501)),
      simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204),
                            INT32_C( 1571538617), INT32_C(  392630938), INT32_C(   44925707), INT32_C(-1288122501)) },
    { simde_mm256_set_epi32(INT32_C(  932954327), INT32_C(  884951875), INT32_C(-1145840174), INT32_C( 2040117874),
                            INT32_C(   39201359), INT32_C( -102892947), INT32_C(  740751736), INT32_C( 1598969461)),
      simde_mm256_set_epi32(INT32_C(  932954327), INT32_C(  884951875), INT32_C(-1145840174), INT32_C( 2040117874),
                            INT32_C(   39201359), INT32_C( -102892947), INT32_C(  740751736), INT32_C( 1598969461)) },
    { simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513),
                            INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C(  263226824)),
      simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513),
                            INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C(  263226824)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_load_si256(&(test_vec[i].a));
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_loadu_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float64 a[sizeof(simde__m256d) / sizeof(simde_float64)];
    simde__m256d r;
  } test_vec[8] = {
    { { SIMDE_FLOAT64_C( -245.76), SIMDE_FLOAT64_C( -764.95),
        SIMDE_FLOAT64_C(  498.87), SIMDE_FLOAT64_C( -327.12) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -327.12), SIMDE_FLOAT64_C(  498.87),
                         SIMDE_FLOAT64_C( -764.95), SIMDE_FLOAT64_C( -245.76)) },
    { { SIMDE_FLOAT64_C( -747.96), SIMDE_FLOAT64_C(  887.55),
        SIMDE_FLOAT64_C( -714.24), SIMDE_FLOAT64_C(  189.85) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  189.85), SIMDE_FLOAT64_C( -714.24),
                         SIMDE_FLOAT64_C(  887.55), SIMDE_FLOAT64_C( -747.96)) },
    { { SIMDE_FLOAT64_C( -816.60), SIMDE_FLOAT64_C(  548.05),
        SIMDE_FLOAT64_C( -852.03), SIMDE_FLOAT64_C(  683.50) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  683.50), SIMDE_FLOAT64_C( -852.03),
                         SIMDE_FLOAT64_C(  548.05), SIMDE_FLOAT64_C( -816.60)) },
    { { SIMDE_FLOAT64_C(  957.12), SIMDE_FLOAT64_C(  857.15),
        SIMDE_FLOAT64_C( -289.83), SIMDE_FLOAT64_C( -642.05) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -642.05), SIMDE_FLOAT64_C( -289.83),
                         SIMDE_FLOAT64_C(  857.15), SIMDE_FLOAT64_C(  957.12)) },
    { { SIMDE_FLOAT64_C(  279.73), SIMDE_FLOAT64_C(   98.54),
        SIMDE_FLOAT64_C(  917.87), SIMDE_FLOAT64_C( -218.86) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -218.86), SIMDE_FLOAT64_C(  917.87),
                         SIMDE_FLOAT64_C(   98.54), SIMDE_FLOAT64_C(  279.73)) },
    { { SIMDE_FLOAT64_C( -705.64), SIMDE_FLOAT64_C(  -89.39),
        SIMDE_FLOAT64_C( -237.89), SIMDE_FLOAT64_C(    9.05) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    9.05), SIMDE_FLOAT64_C( -237.89),
                         SIMDE_FLOAT64_C(  -89.39), SIMDE_FLOAT64_C( -705.64)) },
    { { SIMDE_FLOAT64_C(  359.06), SIMDE_FLOAT64_C(  630.19),
        SIMDE_FLOAT64_C( -718.76), SIMDE_FLOAT64_C(  263.72) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  263.72), SIMDE_FLOAT64_C( -718.76),
                         SIMDE_FLOAT64_C(  630.19), SIMDE_FLOAT64_C(  359.06)) },
    { { SIMDE_FLOAT64_C(  705.88), SIMDE_FLOAT64_C(  454.13),
        SIMDE_FLOAT64_C(  871.24), SIMDE_FLOAT64_C( -794.27) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -794.27), SIMDE_FLOAT64_C(  871.24),
                         SIMDE_FLOAT64_C(  454.13), SIMDE_FLOAT64_C(  705.88)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_load_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_loadu_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)];
    simde__m256 r;
  } test_vec[8] = {
    { { SIMDE_FLOAT32_C(   989.38), SIMDE_FLOAT32_C(  -636.59),
        SIMDE_FLOAT32_C(   969.19), SIMDE_FLOAT32_C(   802.78),
        SIMDE_FLOAT32_C(  -677.79), SIMDE_FLOAT32_C(   669.00),
        SIMDE_FLOAT32_C(  -625.50), SIMDE_FLOAT32_C(  -971.80) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -971.80), SIMDE_FLOAT32_C(  -625.50),
                         SIMDE_FLOAT32_C(   669.00), SIMDE_FLOAT32_C(  -677.79),
                         SIMDE_FLOAT32_C(   802.78), SIMDE_FLOAT32_C(   969.19),
                         SIMDE_FLOAT32_C(  -636.59), SIMDE_FLOAT32_C(   989.38)) },
    { { SIMDE_FLOAT32_C(   483.87), SIMDE_FLOAT32_C(   313.54),
        SIMDE_FLOAT32_C(  -722.81), SIMDE_FLOAT32_C(   175.58),
        SIMDE_FLOAT32_C(  -520.14), SIMDE_FLOAT32_C(  -222.39),
        SIMDE_FLOAT32_C(   889.56), SIMDE_FLOAT32_C(  -141.86) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -141.86), SIMDE_FLOAT32_C(   889.56),
                         SIMDE_FLOAT32_C(  -222.39), SIMDE_FLOAT32_C(  -520.14),
                         SIMDE_FLOAT32_C(   175.58), SIMDE_FLOAT32_C(  -722.81),
                         SIMDE_FLOAT32_C(   313.54), SIMDE_FLOAT32_C(   483.87)) },
    { { SIMDE_FLOAT32_C(    28.06), SIMDE_FLOAT32_C(   709.83),
        SIMDE_FLOAT32_C(  -372.28), SIMDE_FLOAT32_C(   743.18),
        SIMDE_FLOAT32_C(  -465.26), SIMDE_FLOAT32_C(  -871.71),
        SIMDE_FLOAT32_C(   213.87), SIMDE_FLOAT32_C(    34.60) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    34.60), SIMDE_FLOAT32_C(   213.87),
                         SIMDE_FLOAT32_C(  -871.71), SIMDE_FLOAT32_C(  -465.26),
                         SIMDE_FLOAT32_C(   743.18), SIMDE_FLOAT32_C(  -372.28),
                         SIMDE_FLOAT32_C(   709.83), SIMDE_FLOAT32_C(    28.06)) },
    { { SIMDE_FLOAT32_C(   290.56), SIMDE_FLOAT32_C(   408.42),
        SIMDE_FLOAT32_C(  -438.13), SIMDE_FLOAT32_C(  -460.46),
        SIMDE_FLOAT32_C(  -639.21), SIMDE_FLOAT32_C(  -231.83),
        SIMDE_FLOAT32_C(   590.87), SIMDE_FLOAT32_C(  -474.24) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -474.24), SIMDE_FLOAT32_C(   590.87),
                         SIMDE_FLOAT32_C(  -231.83), SIMDE_FLOAT32_C(  -639.21),
                         SIMDE_FLOAT32_C(  -460.46), SIMDE_FLOAT32_C(  -438.13),
                         SIMDE_FLOAT32_C(   408.42), SIMDE_FLOAT32_C(   290.56)) },
    { { SIMDE_FLOAT32_C(  -304.73), SIMDE_FLOAT32_C(   108.23),
        SIMDE_FLOAT32_C(   -73.19), SIMDE_FLOAT32_C(   188.25),
        SIMDE_FLOAT32_C(   420.93), SIMDE_FLOAT32_C(   522.97),
        SIMDE_FLOAT32_C(   234.89), SIMDE_FLOAT32_C(  -731.34) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -731.34), SIMDE_FLOAT32_C(   234.89),
                         SIMDE_FLOAT32_C(   522.97), SIMDE_FLOAT32_C(   420.93),
                         SIMDE_FLOAT32_C(   188.25), SIMDE_FLOAT32_C(   -73.19),
                         SIMDE_FLOAT32_C(   108.23), SIMDE_FLOAT32_C(  -304.73)) },
    { { SIMDE_FLOAT32_C(   708.07), SIMDE_FLOAT32_C(   370.70),
        SIMDE_FLOAT32_C(  -989.08), SIMDE_FLOAT32_C(  -602.45),
        SIMDE_FLOAT32_C(  -987.01), SIMDE_FLOAT32_C(   154.31),
        SIMDE_FLOAT32_C(  -220.43), SIMDE_FLOAT32_C(   262.39) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   262.39), SIMDE_FLOAT32_C(  -220.43),
                         SIMDE_FLOAT32_C(   154.31), SIMDE_FLOAT32_C(  -987.01),
                         SIMDE_FLOAT32_C(  -602.45), SIMDE_FLOAT32_C(  -989.08),
                         SIMDE_FLOAT32_C(   370.70), SIMDE_FLOAT32_C(   708.07)) },
    { { SIMDE_FLOAT32_C(   947.64), SIMDE_FLOAT32_C(   -74.77),
        SIMDE_FLOAT32_C(   902.77), SIMDE_FLOAT32_C(  -429.19),
        SIMDE_FLOAT32_C(  -305.81), SIMDE_FLOAT32_C(   762.65),
        SIMDE_FLOAT32_C(  -261.04), SIMDE_FLOAT32_C(  -156.66) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -156.66), SIMDE_FLOAT32_C(  -261.04),
                         SIMDE_FLOAT32_C(   762.65), SIMDE_FLOAT32_C(  -305.81),
                         SIMDE_FLOAT32_C(  -429.19), SIMDE_FLOAT32_C(   902.77),
                         SIMDE_FLOAT32_C(   -74.77), SIMDE_FLOAT32_C(   947.64)) },
    { { SIMDE_FLOAT32_C(  -313.48), SIMDE_FLOAT32_C(  -237.38),
        SIMDE_FLOAT32_C(   572.62), SIMDE_FLOAT32_C(  -800.42),
        SIMDE_FLOAT32_C(    -6.98), SIMDE_FLOAT32_C(   968.23),
        SIMDE_FLOAT32_C(   417.54), SIMDE_FLOAT32_C(   107.47) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   107.47), SIMDE_FLOAT32_C(   417.54),
                         SIMDE_FLOAT32_C(   968.23), SIMDE_FLOAT32_C(    -6.98),
                         SIMDE_FLOAT32_C(  -800.42), SIMDE_FLOAT32_C(   572.62),
                         SIMDE_FLOAT32_C(  -237.38), SIMDE_FLOAT32_C(  -313.48)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_loadu_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_loadu_si256(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C(   93433077), INT32_C(  912488615), INT32_C( -849505573), INT32_C( -538760324),
                            INT32_C(  576018808), INT32_C(  306399285), INT32_C(  761465198), INT32_C(   67322681)),
      simde_mm256_set_epi32(INT32_C(   93433077), INT32_C(  912488615), INT32_C( -849505573), INT32_C( -538760324),
                            INT32_C(  576018808), INT32_C(  306399285), INT32_C(  761465198), INT32_C(   67322681)) },
    { simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C(  769308925), INT32_C( -545741767),
                            INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)),
      simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C(  769308925), INT32_C( -545741767),
                            INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)) },
    { simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027),
                            INT32_C( 1112848703), INT32_C(  757887555), INT32_C( -808479029), INT32_C( 1524821649)),
      simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027),
                            INT32_C( 1112848703), INT32_C(  757887555), INT32_C( -808479029), INT32_C( 1524821649)) },
    { simde_mm256_set_epi32(INT32_C(  419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302),
                            INT32_C( 1608176387), INT32_C(  174748447), INT32_C(-1944132629), INT32_C(-1618941327)),
      simde_mm256_set_epi32(INT32_C(  419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302),
                            INT32_C( 1608176387), INT32_C(  174748447), INT32_C(-1944132629), INT32_C(-1618941327)) },
    { simde_mm256_set_epi32(INT32_C(  133578927), INT32_C(  -89176331), INT32_C(  533976318), INT32_C(  686005880),
                            INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)),
      simde_mm256_set_epi32(INT32_C(  133578927), INT32_C(  -89176331), INT32_C(  533976318), INT32_C(  686005880),
                            INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)) },
    { simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204),
                            INT32_C( 1571538617), INT32_C(  392630938), INT32_C(   44925707), INT32_C(-1288122501)),
      simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204),
                            INT32_C( 1571538617), INT32_C(  392630938), INT32_C(   44925707), INT32_C(-1288122501)) },
    { simde_mm256_set_epi32(INT32_C(  932954327), INT32_C(  884951875), INT32_C(-1145840174), INT32_C( 2040117874),
                            INT32_C(   39201359), INT32_C( -102892947), INT32_C(  740751736), INT32_C( 1598969461)),
      simde_mm256_set_epi32(INT32_C(  932954327), INT32_C(  884951875), INT32_C(-1145840174), INT32_C( 2040117874),
                            INT32_C(   39201359), INT32_C( -102892947), INT32_C(  740751736), INT32_C( 1598969461)) },
    { simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513),
                            INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C(  263226824)),
      simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513),
                            INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C(  263226824)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_loadu_si256(&(test_vec[i].a));
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_loadu2_m128(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float32 a[sizeof(simde__m128) / sizeof(simde_float32)];
    simde_float32 b[sizeof(simde__m128) / sizeof(simde_float32)];
    simde__m256 r;
  } test_vec[8] = {
    { { SIMDE_FLOAT32_C(    13.39), SIMDE_FLOAT32_C(   253.33), SIMDE_FLOAT32_C(   769.78), SIMDE_FLOAT32_C(   607.23) },
      { SIMDE_FLOAT32_C(   382.59), SIMDE_FLOAT32_C(   295.37), SIMDE_FLOAT32_C(  -847.51), SIMDE_FLOAT32_C(  -193.22) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   607.23), SIMDE_FLOAT32_C(   769.78),
                         SIMDE_FLOAT32_C(   253.33), SIMDE_FLOAT32_C(    13.39),
                         SIMDE_FLOAT32_C(  -193.22), SIMDE_FLOAT32_C(  -847.51),
                         SIMDE_FLOAT32_C(   295.37), SIMDE_FLOAT32_C(   382.59)) },
    { { SIMDE_FLOAT32_C(  -621.90), SIMDE_FLOAT32_C(   305.75), SIMDE_FLOAT32_C(  -907.35), SIMDE_FLOAT32_C(  -378.43) },
      { SIMDE_FLOAT32_C(   165.24), SIMDE_FLOAT32_C(   212.29), SIMDE_FLOAT32_C(   823.95), SIMDE_FLOAT32_C(   837.28) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -378.43), SIMDE_FLOAT32_C(  -907.35),
                         SIMDE_FLOAT32_C(   305.75), SIMDE_FLOAT32_C(  -621.90),
                         SIMDE_FLOAT32_C(   837.28), SIMDE_FLOAT32_C(   823.95),
                         SIMDE_FLOAT32_C(   212.29), SIMDE_FLOAT32_C(   165.24)) },
    { { SIMDE_FLOAT32_C(  -207.02), SIMDE_FLOAT32_C(   949.44), SIMDE_FLOAT32_C(   953.63), SIMDE_FLOAT32_C(  -540.83) },
      { SIMDE_FLOAT32_C(  -239.63), SIMDE_FLOAT32_C(  -907.66), SIMDE_FLOAT32_C(  -840.87), SIMDE_FLOAT32_C(   300.80) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -540.83), SIMDE_FLOAT32_C(   953.63),
                         SIMDE_FLOAT32_C(   949.44), SIMDE_FLOAT32_C(  -207.02),
                         SIMDE_FLOAT32_C(   300.80), SIMDE_FLOAT32_C(  -840.87),
                         SIMDE_FLOAT32_C(  -907.66), SIMDE_FLOAT32_C(  -239.63)) },
    { { SIMDE_FLOAT32_C(   568.29), SIMDE_FLOAT32_C(  -558.59), SIMDE_FLOAT32_C(    -1.20), SIMDE_FLOAT32_C(  -521.17) },
      { SIMDE_FLOAT32_C(   772.77), SIMDE_FLOAT32_C(  -729.14), SIMDE_FLOAT32_C(  -873.98), SIMDE_FLOAT32_C(   142.46) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -521.17), SIMDE_FLOAT32_C(    -1.20),
                         SIMDE_FLOAT32_C(  -558.59), SIMDE_FLOAT32_C(   568.29),
                         SIMDE_FLOAT32_C(   142.46), SIMDE_FLOAT32_C(  -873.98),
                         SIMDE_FLOAT32_C(  -729.14), SIMDE_FLOAT32_C(   772.77)) },
    { { SIMDE_FLOAT32_C(   499.82), SIMDE_FLOAT32_C(  -346.37), SIMDE_FLOAT32_C(   357.98), SIMDE_FLOAT32_C(  -982.20) },
      { SIMDE_FLOAT32_C(   429.05), SIMDE_FLOAT32_C(   743.13), SIMDE_FLOAT32_C(   351.79), SIMDE_FLOAT32_C(  -106.23) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -982.20), SIMDE_FLOAT32_C(   357.98),
                         SIMDE_FLOAT32_C(  -346.37), SIMDE_FLOAT32_C(   499.82),
                         SIMDE_FLOAT32_C(  -106.23), SIMDE_FLOAT32_C(   351.79),
                         SIMDE_FLOAT32_C(   743.13), SIMDE_FLOAT32_C(   429.05)) },
    { { SIMDE_FLOAT32_C(  -764.00), SIMDE_FLOAT32_C(   204.78), SIMDE_FLOAT32_C(   842.05), SIMDE_FLOAT32_C(   473.10) },
      { SIMDE_FLOAT32_C(  -181.50), SIMDE_FLOAT32_C(  -509.59), SIMDE_FLOAT32_C(   968.67), SIMDE_FLOAT32_C(   585.40) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   473.10), SIMDE_FLOAT32_C(   842.05),
                         SIMDE_FLOAT32_C(   204.78), SIMDE_FLOAT32_C(  -764.00),
                         SIMDE_FLOAT32_C(   585.40), SIMDE_FLOAT32_C(   968.67),
                         SIMDE_FLOAT32_C(  -509.59), SIMDE_FLOAT32_C(  -181.50)) },
    { { SIMDE_FLOAT32_C(  -248.73), SIMDE_FLOAT32_C(  -498.50), SIMDE_FLOAT32_C(  -186.56), SIMDE_FLOAT32_C(   244.41) },
      { SIMDE_FLOAT32_C(   987.29), SIMDE_FLOAT32_C(   541.99), SIMDE_FLOAT32_C(   577.71), SIMDE_FLOAT32_C(   147.41) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   244.41), SIMDE_FLOAT32_C(  -186.56),
                         SIMDE_FLOAT32_C(  -498.50), SIMDE_FLOAT32_C(  -248.73),
                         SIMDE_FLOAT32_C(   147.41), SIMDE_FLOAT32_C(   577.71),
                         SIMDE_FLOAT32_C(   541.99), SIMDE_FLOAT32_C(   987.29)) },
    { { SIMDE_FLOAT32_C(   -53.98), SIMDE_FLOAT32_C(   -59.84), SIMDE_FLOAT32_C(  -791.34), SIMDE_FLOAT32_C(     7.53) },
      { SIMDE_FLOAT32_C(     2.84), SIMDE_FLOAT32_C(   254.21), SIMDE_FLOAT32_C(   404.98), SIMDE_FLOAT32_C(  -410.67) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     7.53), SIMDE_FLOAT32_C(  -791.34),
                         SIMDE_FLOAT32_C(   -59.84), SIMDE_FLOAT32_C(   -53.98),
                         SIMDE_FLOAT32_C(  -410.67), SIMDE_FLOAT32_C(   404.98),
                         SIMDE_FLOAT32_C(   254.21), SIMDE_FLOAT32_C(     2.84)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_loadu2_m128(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_loadu2_m128d(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float64 a[sizeof(simde__m128d) / sizeof(simde_float64)];
    simde_float64 b[sizeof(simde__m128d) / sizeof(simde_float64)];
    simde__m256d r;
  } test_vec[8] = {
    { { SIMDE_FLOAT64_C(  193.14), SIMDE_FLOAT64_C( -237.27) },
      { SIMDE_FLOAT64_C(  826.89), SIMDE_FLOAT64_C( -516.49) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -237.27), SIMDE_FLOAT64_C(  193.14),
                         SIMDE_FLOAT64_C( -516.49), SIMDE_FLOAT64_C(  826.89)) },
    { { SIMDE_FLOAT64_C( -640.74), SIMDE_FLOAT64_C( -449.08) },
      { SIMDE_FLOAT64_C(  244.98), SIMDE_FLOAT64_C( -467.92) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -449.08), SIMDE_FLOAT64_C( -640.74),
                         SIMDE_FLOAT64_C( -467.92), SIMDE_FLOAT64_C(  244.98)) },
    { { SIMDE_FLOAT64_C(  384.40), SIMDE_FLOAT64_C( -595.56) },
      { SIMDE_FLOAT64_C( -808.24), SIMDE_FLOAT64_C(  198.37) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -595.56), SIMDE_FLOAT64_C(  384.40),
                         SIMDE_FLOAT64_C(  198.37), SIMDE_FLOAT64_C( -808.24)) },
    { { SIMDE_FLOAT64_C(  647.94), SIMDE_FLOAT64_C( -628.27) },
      { SIMDE_FLOAT64_C( -496.78), SIMDE_FLOAT64_C( -569.08) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -628.27), SIMDE_FLOAT64_C(  647.94),
                         SIMDE_FLOAT64_C( -569.08), SIMDE_FLOAT64_C( -496.78)) },
    { { SIMDE_FLOAT64_C(  911.82), SIMDE_FLOAT64_C( -491.30) },
      { SIMDE_FLOAT64_C(  365.77), SIMDE_FLOAT64_C( -898.74) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -491.30), SIMDE_FLOAT64_C(  911.82),
                         SIMDE_FLOAT64_C( -898.74), SIMDE_FLOAT64_C(  365.77)) },
    { { SIMDE_FLOAT64_C( -297.53), SIMDE_FLOAT64_C( -521.34) },
      { SIMDE_FLOAT64_C(  145.28), SIMDE_FLOAT64_C(  488.58) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -521.34), SIMDE_FLOAT64_C( -297.53),
                         SIMDE_FLOAT64_C(  488.58), SIMDE_FLOAT64_C(  145.28)) },
    { { SIMDE_FLOAT64_C( -224.71), SIMDE_FLOAT64_C(   -7.50) },
      { SIMDE_FLOAT64_C(  -86.35), SIMDE_FLOAT64_C(  810.88) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -7.50), SIMDE_FLOAT64_C( -224.71),
                         SIMDE_FLOAT64_C(  810.88), SIMDE_FLOAT64_C(  -86.35)) },
    { { SIMDE_FLOAT64_C(  885.68), SIMDE_FLOAT64_C( -940.09) },
      { SIMDE_FLOAT64_C( -481.99), SIMDE_FLOAT64_C( -433.50) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -940.09), SIMDE_FLOAT64_C(  885.68),
                         SIMDE_FLOAT64_C( -433.50), SIMDE_FLOAT64_C( -481.99)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_loadu2_m128d(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_loadu2_m128i(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128i a;
    simde__m128i b;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm_set_epi32(INT32_C(  354008351), INT32_C( 1710178598), INT32_C( 1223789711), INT32_C(-1500329554)),
      simde_mm_set_epi32(INT32_C(-1388022686), INT32_C( -390861004), INT32_C( -560834160), INT32_C( 1618430517)),
      simde_mm256_set_epi32(INT32_C(  354008351), INT32_C( 1710178598), INT32_C( 1223789711), INT32_C(-1500329554),
                            INT32_C(-1388022686), INT32_C( -390861004), INT32_C( -560834160), INT32_C( 1618430517)) },
    { simde_mm_set_epi32(INT32_C(-2097010594), INT32_C(-1953861975), INT32_C( 1525655088), INT32_C(-1479248872)),
      simde_mm_set_epi32(INT32_C( -212387035), INT32_C( -783086135), INT32_C( -464607138), INT32_C( -807907186)),
      simde_mm256_set_epi32(INT32_C(-2097010594), INT32_C(-1953861975), INT32_C( 1525655088), INT32_C(-1479248872),
                            INT32_C( -212387035), INT32_C( -783086135), INT32_C( -464607138), INT32_C( -807907186)) },
    { simde_mm_set_epi32(INT32_C( 1556453306), INT32_C( -628648157), INT32_C(-1070645220), INT32_C( 1816365112)),
      simde_mm_set_epi32(INT32_C( -449670221), INT32_C(  758539132), INT32_C(  894912628), INT32_C( 2013246533)),
      simde_mm256_set_epi32(INT32_C( 1556453306), INT32_C( -628648157), INT32_C(-1070645220), INT32_C( 1816365112),
                            INT32_C( -449670221), INT32_C(  758539132), INT32_C(  894912628), INT32_C( 2013246533)) },
    { simde_mm_set_epi32(INT32_C(  973055118), INT32_C(  267011876), INT32_C( -970751985), INT32_C( -790620326)),
      simde_mm_set_epi32(INT32_C(-1774701032), INT32_C(  110651775), INT32_C(-2029162765), INT32_C( -644927818)),
      simde_mm256_set_epi32(INT32_C(  973055118), INT32_C(  267011876), INT32_C( -970751985), INT32_C( -790620326),
                            INT32_C(-1774701032), INT32_C(  110651775), INT32_C(-2029162765), INT32_C( -644927818)) },
    { simde_mm_set_epi32(INT32_C( 1343331807), INT32_C( -752743183), INT32_C( -212726727), INT32_C(  673547091)),
      simde_mm_set_epi32(INT32_C(  510472604), INT32_C(   30606375), INT32_C(-1460649586), INT32_C( -783315263)),
      simde_mm256_set_epi32(INT32_C( 1343331807), INT32_C( -752743183), INT32_C( -212726727), INT32_C(  673547091),
                            INT32_C(  510472604), INT32_C(   30606375), INT32_C(-1460649586), INT32_C( -783315263)) },
    { simde_mm_set_epi32(INT32_C( 1773008222), INT32_C( -172973908), INT32_C( -578745695), INT32_C( 1088863920)),
      simde_mm_set_epi32(INT32_C(-2064848056), INT32_C(  207858402), INT32_C(-1299831865), INT32_C(-1364624980)),
      simde_mm256_set_epi32(INT32_C( 1773008222), INT32_C( -172973908), INT32_C( -578745695), INT32_C( 1088863920),
                            INT32_C(-2064848056), INT32_C(  207858402), INT32_C(-1299831865), INT32_C(-1364624980)) },
    { simde_mm_set_epi32(INT32_C( -608977283), INT32_C(-1563798803), INT32_C(-1827655569), INT32_C( -382597224)),
      simde_mm_set_epi32(INT32_C(-1788804177), INT32_C(-1217503299), INT32_C(   57159833), INT32_C(  -53652220)),
      simde_mm256_set_epi32(INT32_C( -608977283), INT32_C(-1563798803), INT32_C(-1827655569), INT32_C( -382597224),
                            INT32_C(-1788804177), INT32_C(-1217503299), INT32_C(   57159833), INT32_C(  -53652220)) },
    { simde_mm_set_epi32(INT32_C( 2096190829), INT32_C(  255970451), INT32_C( 2016421031), INT32_C( -950647181)),
      simde_mm_set_epi32(INT32_C( -133085873), INT32_C(-1605552420), INT32_C( -147782601), INT32_C( -870212282)),
      simde_mm256_set_epi32(INT32_C( 2096190829), INT32_C(  255970451), INT32_C( 2016421031), INT32_C( -950647181),
                            INT32_C( -133085873), INT32_C(-1605552420), INT32_C( -147782601), INT32_C( -870212282)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_loadu2_m128i(&(test_vec[i].a), &(test_vec[i].b));
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm_maskload_pd (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float64 mem_addr[2];
    const int64_t mask[2];
    const simde_float64 r[2];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(   -44.84), SIMDE_FLOAT64_C(  -187.23) },
      {  INT64_C(  697350032114386965), -INT64_C( 6822977484778790260) },
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(  -187.23) } },
    { { SIMDE_FLOAT64_C(  -686.00), SIMDE_FLOAT64_C(  -486.25) },
      { -INT64_C( 9072093096164548123),  INT64_C( 8577706021278762060) },
      { SIMDE_FLOAT64_C(  -686.00), SIMDE_FLOAT64_C(     0.00) } },
    { { SIMDE_FLOAT64_C(   345.89), SIMDE_FLOAT64_C(  -846.86) },
      {  INT64_C( 7283870107845829619),  INT64_C( 5554042763219526763) },
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00) } },
    { { SIMDE_FLOAT64_C(  -979.25), SIMDE_FLOAT64_C(  -524.07) },
      {  INT64_C( 1733613083399169728), -INT64_C( 8664218374432089815) },
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(  -524.07) } },
    { { SIMDE_FLOAT64_C(   327.25), SIMDE_FLOAT64_C(   112.82) },
      { -INT64_C(  800106376127047672),  INT64_C( 7751542069822355551) },
      { SIMDE_FLOAT64_C(   327.25), SIMDE_FLOAT64_C(     0.00) } },
    { { SIMDE_FLOAT64_C(   407.41), SIMDE_FLOAT64_C(  -401.19) },
      { -INT64_C( 9026860482835374478), -INT64_C( 1097169102464975702) },
      { SIMDE_FLOAT64_C(   407.41), SIMDE_FLOAT64_C(  -401.19) } },
    { { SIMDE_FLOAT64_C(   -14.88), SIMDE_FLOAT64_C(   573.00) },
      { -INT64_C( 3084833370581537693), -INT64_C( 2835100346349403270) },
      { SIMDE_FLOAT64_C(   -14.88), SIMDE_FLOAT64_C(   573.00) } },
    { { SIMDE_FLOAT64_C(  -778.55), SIMDE_FLOAT64_C(   193.17) },
      {  INT64_C(  672843420433189374), -INT64_C( 7606477107942056835) },
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   193.17) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m128i mask = simde_x_mm_loadu_epi64(test_vec[i].mask);
    simde__m128d r = simde_mm_maskload_pd(test_vec[i].mem_addr, mask);
    simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

#if !defined(HEDLEY_MSVC_VERSION) && !defined(__wasi__)
static int
test_simde_mm_maskload_pd_no_illegal_memory_access (SIMDE_MUNIT_TEST_ARGS) {
  // ref: https://github.com/simd-everywhere/simde/issues/998
  // make sure maskload never accesses memory for masked out regions
  // will segfault in case memory is accessed
  #if defined(_GNU_SOURCE)
    simde_float64 *ptr = HEDLEY_STATIC_CAST(simde_float64 *, mmap(NULL, 2 * sizeof(simde_float64), PROT_NONE , MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  #else
    simde_float64 *ptr = HEDLEY_STATIC_CAST(simde_float64 *, mmap(NULL, 2 * sizeof(simde_float64), PROT_NONE , MAP_PRIVATE, -1, 0));
  #endif
  const simde__m128i mask = simde_mm_set_epi64x(INT64_C(0), INT64_C(0));
  simde__m128d test = simde_mm_maskload_pd(ptr, mask);
  simde_float64 r[2] = { SIMDE_FLOAT64_C(0.00), SIMDE_FLOAT64_C(0.00) };
  simde_test_x86_assert_equal_f64x2(test, simde_mm_loadu_pd(r), 1);
  return 0;
}
#endif

static int
test_simde_mm256_maskload_pd (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float64 mem_addr[4];
    const int64_t mask[4];
    const simde_float64 r[4];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(   845.03), SIMDE_FLOAT64_C(   274.61), SIMDE_FLOAT64_C(   515.17), SIMDE_FLOAT64_C(   654.86) },
      { -INT64_C( 1562028826953646494), -INT64_C( 6547821859740641223),  INT64_C( 5461221024099586812), -INT64_C( 6926067570004073380) },
      { SIMDE_FLOAT64_C(   845.03), SIMDE_FLOAT64_C(   274.61), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   654.86) } },
    { { SIMDE_FLOAT64_C(    87.04), SIMDE_FLOAT64_C(  -185.45), SIMDE_FLOAT64_C(   566.76), SIMDE_FLOAT64_C(  -222.61) },
      { -INT64_C( 8140185020693102094), -INT64_C( 7935186431243966026), -INT64_C( 3692834531731199052), -INT64_C(  687403654194683627) },
      { SIMDE_FLOAT64_C(    87.04), SIMDE_FLOAT64_C(  -185.45), SIMDE_FLOAT64_C(   566.76), SIMDE_FLOAT64_C(  -222.61) } },
    { { SIMDE_FLOAT64_C(  -387.34), SIMDE_FLOAT64_C(   667.72), SIMDE_FLOAT64_C(   351.98), SIMDE_FLOAT64_C(   185.90) },
      {  INT64_C( 5746656153388198486), -INT64_C( 2698573944803254074), -INT64_C(  938136386737386456), -INT64_C( 2523130118312267541) },
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   667.72), SIMDE_FLOAT64_C(   351.98), SIMDE_FLOAT64_C(   185.90) } },
    { { SIMDE_FLOAT64_C(   -78.35), SIMDE_FLOAT64_C(  -352.03), SIMDE_FLOAT64_C(   326.83), SIMDE_FLOAT64_C(   368.88) },
      { -INT64_C( 7675339611453347526),  INT64_C( 4744848230774212468), -INT64_C( 3847732952440777688), -INT64_C( 3251158471971203291) },
      { SIMDE_FLOAT64_C(   -78.35), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   326.83), SIMDE_FLOAT64_C(   368.88) } },
    { { SIMDE_FLOAT64_C(  -253.08), SIMDE_FLOAT64_C(    10.09), SIMDE_FLOAT64_C(   790.44), SIMDE_FLOAT64_C(  -217.02) },
      { -INT64_C( 6233112357282165138),  INT64_C( 8276240822704953760), -INT64_C( 6505481490158291400), -INT64_C( 4741646846794426252) },
      { SIMDE_FLOAT64_C(  -253.08), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   790.44), SIMDE_FLOAT64_C(  -217.02) } },
    { { SIMDE_FLOAT64_C(   308.28), SIMDE_FLOAT64_C(  -190.54), SIMDE_FLOAT64_C(  -550.36), SIMDE_FLOAT64_C(  -303.22) },
      { -INT64_C( 6395814632349097515), -INT64_C( 7263366602557941603), -INT64_C( 3216775732650775751), -INT64_C( 5871229529546912511) },
      { SIMDE_FLOAT64_C(   308.28), SIMDE_FLOAT64_C(  -190.54), SIMDE_FLOAT64_C(  -550.36), SIMDE_FLOAT64_C(  -303.22) } },
    { { SIMDE_FLOAT64_C(   657.64), SIMDE_FLOAT64_C(   674.06), SIMDE_FLOAT64_C(   624.26), SIMDE_FLOAT64_C(   941.85) },
      {  INT64_C(  508501554445574299), -INT64_C( 5706936849136467483), -INT64_C( 8199905151792502630),  INT64_C( 8100955200803354953) },
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   674.06), SIMDE_FLOAT64_C(   624.26), SIMDE_FLOAT64_C(     0.00) } },
    { { SIMDE_FLOAT64_C(   517.21), SIMDE_FLOAT64_C(    15.58), SIMDE_FLOAT64_C(   172.93), SIMDE_FLOAT64_C(  -730.24) },
      {  INT64_C( 2699593483387123569),  INT64_C( 2376308967288947396), -INT64_C( 4803955517750890898),  INT64_C( 1459965220665278538) },
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   172.93), SIMDE_FLOAT64_C(     0.00) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256i mask = simde_x_mm256_loadu_epi64(test_vec[i].mask);
    simde__m256d r = simde_mm256_maskload_pd(test_vec[i].mem_addr, mask);
    simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1);
  }

  return 0;
}

#if !defined(HEDLEY_MSVC_VERSION) && !defined(__wasi__)
static int
test_simde_mm256_maskload_pd_no_illegal_memory_access (SIMDE_MUNIT_TEST_ARGS) {
  // ref: https://github.com/simd-everywhere/simde/issues/998
  // make sure maskload never accesses memory for masked out regions
  // will segfault in case memory is accessed
  #if defined(_GNU_SOURCE)
    simde_float64 *ptr = HEDLEY_STATIC_CAST(simde_float64 *, mmap(NULL, 4 * sizeof(simde_float64), PROT_NONE , MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  #else
    simde_float64 *ptr = HEDLEY_STATIC_CAST(simde_float64 *, mmap(NULL, 4 * sizeof(simde_float64), PROT_NONE , MAP_PRIVATE, -1, 0));
  #endif
  const simde__m256i mask = simde_mm256_set_epi64x(INT64_C(0), INT64_C(0), INT64_C(0), INT64_C(0));
  simde__m256d test = simde_mm256_maskload_pd(ptr, mask);
  simde_float64 r[4] = { SIMDE_FLOAT64_C(0.00), SIMDE_FLOAT64_C(0.00), SIMDE_FLOAT64_C(0.00), SIMDE_FLOAT64_C(0.00) };
  simde_test_x86_assert_equal_f64x4(test, simde_mm256_loadu_pd(r), 1);
  return 0;
}
#endif

static int
test_simde_mm_maskload_ps (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float32 mem_addr[4];
    const int32_t mask[4];
    const simde_float32 r[4];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(  -560.98), SIMDE_FLOAT32_C(   330.23), SIMDE_FLOAT32_C(  -571.08), SIMDE_FLOAT32_C(  -900.52) },
      {  INT32_C(   552414127), -INT32_C(   630594570), -INT32_C(  1291956017), -INT32_C(  1030767749) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   330.23), SIMDE_FLOAT32_C(  -571.08), SIMDE_FLOAT32_C(  -900.52) } },
    { { SIMDE_FLOAT32_C(   -52.84), SIMDE_FLOAT32_C(  -695.38), SIMDE_FLOAT32_C(  -631.11), SIMDE_FLOAT32_C(   296.05) },
      { -INT32_C(  1978494141),  INT32_C(  1682818151),  INT32_C(  2012703432), -INT32_C(  1952979819) },
      { SIMDE_FLOAT32_C(   -52.84), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   296.05) } },
    { { SIMDE_FLOAT32_C(   460.61), SIMDE_FLOAT32_C(  -394.20), SIMDE_FLOAT32_C(   440.23), SIMDE_FLOAT32_C(  -609.13) },
      { -INT32_C(   867605424),  INT32_C(    42915871), -INT32_C(   268280147),  INT32_C(  1501238513) },
      { SIMDE_FLOAT32_C(   460.61), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   440.23), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(   481.64), SIMDE_FLOAT32_C(   724.40), SIMDE_FLOAT32_C(  -863.54), SIMDE_FLOAT32_C(   137.47) },
      { -INT32_C(   954682062),  INT32_C(  1632874393), -INT32_C(      542289),  INT32_C(  1019953181) },
      { SIMDE_FLOAT32_C(   481.64), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(  -863.54), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(   811.63), SIMDE_FLOAT32_C(  -828.49), SIMDE_FLOAT32_C(   881.09), SIMDE_FLOAT32_C(  -936.46) },
      { -INT32_C(  1447608137),  INT32_C(   771895893),  INT32_C(   768589818),  INT32_C(   250931060) },
      { SIMDE_FLOAT32_C(   811.63), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(  -741.71), SIMDE_FLOAT32_C(  -343.75), SIMDE_FLOAT32_C(  -821.30), SIMDE_FLOAT32_C(    50.98) },
      {  INT32_C(   491021824), -INT32_C(  1067904857),  INT32_C(   562468969),  INT32_C(   768228824) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(  -343.75), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(   335.89), SIMDE_FLOAT32_C(   701.50), SIMDE_FLOAT32_C(  -340.99), SIMDE_FLOAT32_C(  -135.85) },
      {  INT32_C(     9906827), -INT32_C(  1492219119), -INT32_C(   739476013), -INT32_C(  1963904541) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   701.50), SIMDE_FLOAT32_C(  -340.99), SIMDE_FLOAT32_C(  -135.85) } },
    { { SIMDE_FLOAT32_C(   167.10), SIMDE_FLOAT32_C(   398.88), SIMDE_FLOAT32_C(  -514.86), SIMDE_FLOAT32_C(   423.86) },
      { -INT32_C(  1144270366),  INT32_C(  2129171726), -INT32_C(   303545247), -INT32_C(  2131918994) },
      { SIMDE_FLOAT32_C(   167.10), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(  -514.86), SIMDE_FLOAT32_C(   423.86) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m128i mask = simde_x_mm_loadu_epi32(test_vec[i].mask);
    simde__m128 r = simde_mm_maskload_ps(test_vec[i].mem_addr, mask);
    simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

#if !defined(HEDLEY_MSVC_VERSION) && !defined(__wasi__)
static int
test_simde_mm_maskload_ps_no_illegal_memory_access (SIMDE_MUNIT_TEST_ARGS) {
  // ref: https://github.com/simd-everywhere/simde/issues/998
  // make sure maskload never accesses memory for masked out regions
  // will segfault in case memory is accessed
  #if defined(_GNU_SOURCE)
    simde_float32 *ptr = HEDLEY_STATIC_CAST(simde_float32 *, mmap(NULL, 4 * sizeof(simde_float32), PROT_NONE , MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  #else
    simde_float32 *ptr = HEDLEY_STATIC_CAST(simde_float32 *, mmap(NULL, 4 * sizeof(simde_float32), PROT_NONE , MAP_PRIVATE, -1, 0));
  #endif
  const simde__m128i mask = simde_mm_set_epi32(0, 0, 0, 0);
  simde__m128 test = simde_mm_maskload_ps(ptr, mask);
  simde_float32 r[4] = { SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00) };
  simde_test_x86_assert_equal_f32x4(test, simde_mm_loadu_ps(r), 1);
  return 0;
}
#endif


static int
test_simde_mm256_maskload_ps (SIMDE_MUNIT_TEST_ARGS) {
  static const struct {
    const simde_float32 mem_addr[8];
    const int32_t mask[8];
    const simde_float32 r[8];
  } test_vec[] = {
    { { SIMDE_FLOAT32_C(  -588.55), SIMDE_FLOAT32_C(  -156.14), SIMDE_FLOAT32_C(   765.50), SIMDE_FLOAT32_C(  -514.50),
        SIMDE_FLOAT32_C(   262.17), SIMDE_FLOAT32_C(  -363.89), SIMDE_FLOAT32_C(  -808.48), SIMDE_FLOAT32_C(   781.30) },
      { -INT32_C(   576220470), -INT32_C(  1010639970),  INT32_C(  1590785915), -INT32_C(   714613675), -INT32_C(   779948395), -INT32_C(   245937156), -INT32_C(   509561887), -INT32_C(   616351727) },
      { SIMDE_FLOAT32_C(  -588.55), SIMDE_FLOAT32_C(  -156.14), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(  -514.50),
        SIMDE_FLOAT32_C(   262.17), SIMDE_FLOAT32_C(  -363.89), SIMDE_FLOAT32_C(  -808.48), SIMDE_FLOAT32_C(   781.30) } },
    { { SIMDE_FLOAT32_C(   420.97), SIMDE_FLOAT32_C(  -838.24), SIMDE_FLOAT32_C(  -392.68), SIMDE_FLOAT32_C(   299.50),
        SIMDE_FLOAT32_C(   207.95), SIMDE_FLOAT32_C(   278.96), SIMDE_FLOAT32_C(  -847.51), SIMDE_FLOAT32_C(   417.49) },
      {  INT32_C(  1184956145),  INT32_C(  1880819674), -INT32_C(   247357707), -INT32_C(   891119127),  INT32_C(  1571521100), -INT32_C(  2059800645), -INT32_C(  1594953254),  INT32_C(  1575099244) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(  -392.68), SIMDE_FLOAT32_C(   299.50),
        SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   278.96), SIMDE_FLOAT32_C(  -847.51), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(   165.20), SIMDE_FLOAT32_C(  -625.22), SIMDE_FLOAT32_C(  -748.60), SIMDE_FLOAT32_C(  -111.47),
        SIMDE_FLOAT32_C(   463.62), SIMDE_FLOAT32_C(  -236.00), SIMDE_FLOAT32_C(  -639.66), SIMDE_FLOAT32_C(  -407.20) },
      {  INT32_C(  1181787485), -INT32_C(  1978576322), -INT32_C(  1863795499), -INT32_C(  2062212693),  INT32_C(  2116420626),  INT32_C(   953944095),  INT32_C(   338395275), -INT32_C(  1651273921) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(  -625.22), SIMDE_FLOAT32_C(  -748.60), SIMDE_FLOAT32_C(  -111.47),
        SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(  -407.20) } },
    { { SIMDE_FLOAT32_C(  -291.43), SIMDE_FLOAT32_C(  -760.80), SIMDE_FLOAT32_C(   348.26), SIMDE_FLOAT32_C(  -222.79),
        SIMDE_FLOAT32_C(  -485.28), SIMDE_FLOAT32_C(   543.93), SIMDE_FLOAT32_C(   -34.22), SIMDE_FLOAT32_C(  -759.27) },
      {  INT32_C(  1555752113),  INT32_C(   417517573), -INT32_C(   208271148), -INT32_C(  1691651568),  INT32_C(   850417394),  INT32_C(   433013733), -INT32_C(  1685343674),  INT32_C(  1489406119) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   348.26), SIMDE_FLOAT32_C(  -222.79),
        SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   -34.22), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(   321.91), SIMDE_FLOAT32_C(   814.50), SIMDE_FLOAT32_C(   803.71), SIMDE_FLOAT32_C(   833.48),
        SIMDE_FLOAT32_C(   558.02), SIMDE_FLOAT32_C(   442.93), SIMDE_FLOAT32_C(   -87.03), SIMDE_FLOAT32_C(   798.77) },
      { -INT32_C(  1357438818), -INT32_C(  2058730861),  INT32_C(  2125986457), -INT32_C(  2087156163), -INT32_C(   534830279),  INT32_C(   406381995), -INT32_C(  1232409499),  INT32_C(   567940227) },
      { SIMDE_FLOAT32_C(   321.91), SIMDE_FLOAT32_C(   814.50), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   833.48),
        SIMDE_FLOAT32_C(   558.02), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   -87.03), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(  -551.06), SIMDE_FLOAT32_C(   227.15), SIMDE_FLOAT32_C(  -106.44), SIMDE_FLOAT32_C(   271.24),
        SIMDE_FLOAT32_C(   412.99), SIMDE_FLOAT32_C(   -27.66), SIMDE_FLOAT32_C(   187.06), SIMDE_FLOAT32_C(   798.87) },
      {  INT32_C(  1397490709), -INT32_C(   992549749), -INT32_C(  1281035001),  INT32_C(  1087102170),  INT32_C(  1274435016), -INT32_C(  1653747607), -INT32_C(   144359998),  INT32_C(  1858384472) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   227.15), SIMDE_FLOAT32_C(  -106.44), SIMDE_FLOAT32_C(     0.00),
        SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   -27.66), SIMDE_FLOAT32_C(   187.06), SIMDE_FLOAT32_C(     0.00) } },
    { { SIMDE_FLOAT32_C(   516.26), SIMDE_FLOAT32_C(  -512.15), SIMDE_FLOAT32_C(   869.28), SIMDE_FLOAT32_C(  -650.32),
        SIMDE_FLOAT32_C(  -650.22), SIMDE_FLOAT32_C(  -362.47), SIMDE_FLOAT32_C(   974.91), SIMDE_FLOAT32_C(   374.68) },
      {  INT32_C(  1739565453),  INT32_C(  1319598725),  INT32_C(   949591503),  INT32_C(   819267182), -INT32_C(  1675150780), -INT32_C(  1559565076), -INT32_C(   236663812), -INT32_C(   252843421) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
        SIMDE_FLOAT32_C(  -650.22), SIMDE_FLOAT32_C(  -362.47), SIMDE_FLOAT32_C(   974.91), SIMDE_FLOAT32_C(   374.68) } },
    { { SIMDE_FLOAT32_C(   -61.68), SIMDE_FLOAT32_C(   929.24), SIMDE_FLOAT32_C(   912.15), SIMDE_FLOAT32_C(  -644.38),
        SIMDE_FLOAT32_C(  -898.12), SIMDE_FLOAT32_C(  -627.24), SIMDE_FLOAT32_C(   292.92), SIMDE_FLOAT32_C(   202.99) },
      {  INT32_C(   169672860),  INT32_C(  1262219783),  INT32_C(   434594349),  INT32_C(  1270673998),  INT32_C(   540844477),  INT32_C(   504375691), -INT32_C(   617125691),  INT32_C(    62955111) },
      { SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00),
        SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(     0.00), SIMDE_FLOAT32_C(   292.92), SIMDE_FLOAT32_C(     0.00) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
    simde__m256i mask = simde_x_mm256_loadu_epi32(test_vec[i].mask);
    simde__m256 r = simde_mm256_maskload_ps(test_vec[i].mem_addr, mask);
    simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
  }

  return 0;
}

#if !defined(HEDLEY_MSVC_VERSION) && !defined(__wasi__)
static int
test_simde_mm256_maskload_ps_no_illegal_memory_access (SIMDE_MUNIT_TEST_ARGS) {
  // ref: https://github.com/simd-everywhere/simde/issues/998
  // make sure maskload never accesses memory for masked out regions
  // will segfault in case memory is accessed
  #if defined(_GNU_SOURCE)
    simde_float32 *ptr = HEDLEY_STATIC_CAST(simde_float32 *, mmap(NULL, 8 * sizeof(simde_float32), PROT_NONE , MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  #else
    simde_float32 *ptr = HEDLEY_STATIC_CAST(simde_float32 *, mmap(NULL, 8 * sizeof(simde_float32), PROT_NONE , MAP_PRIVATE, -1, 0));
  #endif
  const simde__m256i mask = simde_mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, 0);
  simde__m256 test = simde_mm256_maskload_ps(ptr, mask);
  simde_float32 r[8] = { SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00),
                         SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00), SIMDE_FLOAT32_C(0.00) };
  simde_test_x86_assert_equal_f32x8(test, simde_mm256_loadu_ps(r), 1);
  return 0;
}
#endif

static int
test_simde_mm_maskstore_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128d a;
    simde__m128i mask;
    double ri[2];
    double ro[2];
  } test_vec[8] = {
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  291.40), SIMDE_FLOAT64_C( -747.46)),
      simde_mm_set_epi64x(INT64_C( -901196363302656956), INT64_C(  423467829629286510)),
      { SIMDE_FLOAT64_C( -279.11), SIMDE_FLOAT64_C( -707.31) },
      { SIMDE_FLOAT64_C( -279.11), SIMDE_FLOAT64_C(  291.40) } },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(   36.11), SIMDE_FLOAT64_C( -279.03)),
      simde_mm_set_epi64x(INT64_C(-5374148835716618800), INT64_C( 4687824648494664977)),
      { SIMDE_FLOAT64_C( -513.32), SIMDE_FLOAT64_C(  997.01) },
      { SIMDE_FLOAT64_C( -513.32), SIMDE_FLOAT64_C(   36.11) } },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  252.18), SIMDE_FLOAT64_C( -396.70)),
      simde_mm_set_epi64x(INT64_C(-2340838553401196290), INT64_C(-8255671198755410933)),
      { SIMDE_FLOAT64_C( -313.87), SIMDE_FLOAT64_C(  648.77) },
      { SIMDE_FLOAT64_C( -396.70), SIMDE_FLOAT64_C(  252.18) } },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -154.32), SIMDE_FLOAT64_C(  707.71)),
      simde_mm_set_epi64x(INT64_C( -336577207510206055), INT64_C(-8731515008786621717)),
      { SIMDE_FLOAT64_C( -956.34), SIMDE_FLOAT64_C(  661.79) },
      { SIMDE_FLOAT64_C(  707.71), SIMDE_FLOAT64_C( -154.32) } },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -482.35), SIMDE_FLOAT64_C(  870.14)),
      simde_mm_set_epi64x(INT64_C(-6428326320006280400), INT64_C( 2370968363897859860)),
      { SIMDE_FLOAT64_C(  -88.00), SIMDE_FLOAT64_C( -393.63) },
      { SIMDE_FLOAT64_C(  -88.00), SIMDE_FLOAT64_C( -482.35) } },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -798.76), SIMDE_FLOAT64_C(  378.53)),
      simde_mm_set_epi64x(INT64_C( 2158346412704669322), INT64_C(-1741710562990070947)),
      { SIMDE_FLOAT64_C( -459.19), SIMDE_FLOAT64_C( -261.60) },
      { SIMDE_FLOAT64_C(  378.53), SIMDE_FLOAT64_C( -261.60) } },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(   47.39), SIMDE_FLOAT64_C(  457.82)),
      simde_mm_set_epi64x(INT64_C( 3118706775454689373), INT64_C(-4723277293636004112)),
      { SIMDE_FLOAT64_C( -206.48), SIMDE_FLOAT64_C(  663.61) },
      { SIMDE_FLOAT64_C(  457.82), SIMDE_FLOAT64_C(  663.61) } },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -497.78), SIMDE_FLOAT64_C(  -27.40)),
      simde_mm_set_epi64x(INT64_C(-5481981628135809029), INT64_C(-7037919562781567894)),
      { SIMDE_FLOAT64_C(   54.28), SIMDE_FLOAT64_C(  -36.05) },
      { SIMDE_FLOAT64_C(  -27.40), SIMDE_FLOAT64_C( -497.78) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    double r[2] = { test_vec[i].ri[0], test_vec[i].ri[1], };
    simde_memcpy(r, test_vec[i].ri, sizeof(r));
    simde_mm_maskstore_pd(r, test_vec[i].mask, test_vec[i].a);
    simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1);
  }

  return 0;
}

static int
test_simde_mm256_maskstore_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256i mask;
    simde_float64 ri[4];
    simde_float64 ro[4];
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  256.10), SIMDE_FLOAT64_C(  343.75),
                         SIMDE_FLOAT64_C( -441.90), SIMDE_FLOAT64_C(  609.80)),
      simde_mm256_set_epi64x(INT64_C( 4260458650207424972), INT64_C( 7445494124920454187),
                             INT64_C( 3286955945790099662), INT64_C(-7285974739268381254)),
      { SIMDE_FLOAT64_C( -289.65), SIMDE_FLOAT64_C(  426.76),
        SIMDE_FLOAT64_C(   -9.11), SIMDE_FLOAT64_C( -274.93) },
      { SIMDE_FLOAT64_C(  609.80), SIMDE_FLOAT64_C(  426.76),
        SIMDE_FLOAT64_C(   -9.11), SIMDE_FLOAT64_C( -274.93) } },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  571.67), SIMDE_FLOAT64_C(  518.20),
                         SIMDE_FLOAT64_C(  -98.22), SIMDE_FLOAT64_C( -751.94)),
      simde_mm256_set_epi64x(INT64_C( 1638253588391173148), INT64_C( 1793291230565330203),
                             INT64_C(-9111784699029565866), INT64_C( -227326109536357972)),
      { SIMDE_FLOAT64_C( -486.54), SIMDE_FLOAT64_C(  729.14),
        SIMDE_FLOAT64_C( -705.07), SIMDE_FLOAT64_C( -433.33) },
      { SIMDE_FLOAT64_C( -751.94), SIMDE_FLOAT64_C(  -98.22),
        SIMDE_FLOAT64_C( -705.07), SIMDE_FLOAT64_C( -433.33) } },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  102.13), SIMDE_FLOAT64_C( -818.78),
                         SIMDE_FLOAT64_C( -736.90), SIMDE_FLOAT64_C( -616.20)),
      simde_mm256_set_epi64x(INT64_C(-2892759574131760065), INT64_C(-3440936018861750870),
                             INT64_C( -525494054977382250), INT64_C(-2782562282709585632)),
      { SIMDE_FLOAT64_C( -465.13), SIMDE_FLOAT64_C(  232.40),
        SIMDE_FLOAT64_C( -478.53), SIMDE_FLOAT64_C(  -53.86) },
      { SIMDE_FLOAT64_C( -616.20), SIMDE_FLOAT64_C( -736.90),
        SIMDE_FLOAT64_C( -818.78), SIMDE_FLOAT64_C(  102.13) } },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  748.35), SIMDE_FLOAT64_C(  412.39),
                         SIMDE_FLOAT64_C(  180.73), SIMDE_FLOAT64_C( -772.53)),
      simde_mm256_set_epi64x(INT64_C(-7252528024816875961), INT64_C( 3768666429054031776),
                             INT64_C(-5502653220660844988), INT64_C( 6869378166726947276)),
      { SIMDE_FLOAT64_C( -276.66), SIMDE_FLOAT64_C( -248.39),
        SIMDE_FLOAT64_C( -589.21), SIMDE_FLOAT64_C(  826.33) },
      { SIMDE_FLOAT64_C( -276.66), SIMDE_FLOAT64_C(  180.73),
        SIMDE_FLOAT64_C( -589.21), SIMDE_FLOAT64_C(  748.35) } },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  837.77), SIMDE_FLOAT64_C(  582.83),
                         SIMDE_FLOAT64_C(  901.95), SIMDE_FLOAT64_C(  440.28)),
      simde_mm256_set_epi64x(INT64_C( 7132263712774217761), INT64_C( 7513359651930322343),
                             INT64_C( 7271276353319921669), INT64_C(-6353645951073475265)),
      { SIMDE_FLOAT64_C(  194.29), SIMDE_FLOAT64_C( -702.43),
        SIMDE_FLOAT64_C(  663.08), SIMDE_FLOAT64_C( -837.37) },
      { SIMDE_FLOAT64_C(  440.28), SIMDE_FLOAT64_C( -702.43),
        SIMDE_FLOAT64_C(  663.08), SIMDE_FLOAT64_C( -837.37) } },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -476.01), SIMDE_FLOAT64_C(  964.03),
                         SIMDE_FLOAT64_C( -620.01), SIMDE_FLOAT64_C( -190.51)),
      simde_mm256_set_epi64x(INT64_C(-6904573933630117437), INT64_C( 8435505992452950995),
                             INT64_C(-2030909113789010322), INT64_C(-3516031824252737762)),
      { SIMDE_FLOAT64_C(  412.73), SIMDE_FLOAT64_C( -375.82),
        SIMDE_FLOAT64_C(  493.97), SIMDE_FLOAT64_C( -325.91) },
      { SIMDE_FLOAT64_C( -190.51), SIMDE_FLOAT64_C( -620.01),
        SIMDE_FLOAT64_C(  493.97), SIMDE_FLOAT64_C( -476.01) } },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  109.41), SIMDE_FLOAT64_C( -105.03),
                         SIMDE_FLOAT64_C(  942.38), SIMDE_FLOAT64_C(  492.62)),
      simde_mm256_set_epi64x(INT64_C(-4507038716603653937), INT64_C(-5597740526711762453),
                             INT64_C(-4892847490676269188), INT64_C(-2050275303632712946)),
      { SIMDE_FLOAT64_C(  999.52), SIMDE_FLOAT64_C(   91.29),
        SIMDE_FLOAT64_C( -389.17), SIMDE_FLOAT64_C( -828.90) },
      { SIMDE_FLOAT64_C(  492.62), SIMDE_FLOAT64_C(  942.38),
        SIMDE_FLOAT64_C( -105.03), SIMDE_FLOAT64_C(  109.41) } },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -984.18), SIMDE_FLOAT64_C(  526.23),
                         SIMDE_FLOAT64_C(  210.69), SIMDE_FLOAT64_C( -960.42)),
      simde_mm256_set_epi64x(INT64_C(-2327918596051776606), INT64_C(-6284034566091225578),
                             INT64_C( 5326594562181579270), INT64_C( 7458656096830697285)),
      { SIMDE_FLOAT64_C(  719.28), SIMDE_FLOAT64_C( -272.00),
        SIMDE_FLOAT64_C( -305.09), SIMDE_FLOAT64_C(  995.54) },
      { SIMDE_FLOAT64_C(  719.28), SIMDE_FLOAT64_C( -272.00),
        SIMDE_FLOAT64_C(  526.23), SIMDE_FLOAT64_C( -984.18) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde_float64 r[4];
    simde_memcpy(r, test_vec[i].ri, sizeof(r));
    simde_mm256_maskstore_pd(r, test_vec[i].mask, test_vec[i].a);
    simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1);
  }

  return 0;
}

static int
test_simde_mm_maskstore_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128 a;
    simde__m128i mask;
    simde_float32 ri[4];
    simde_float32 ro[4];
  } test_vec[8] = {
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  674.56), SIMDE_FLOAT32_C( -800.90), SIMDE_FLOAT32_C( -515.11), SIMDE_FLOAT32_C(  918.12)),
      simde_mm_set_epi32(INT32_C( 1108579007), INT32_C( 1980053353), INT32_C( 1803323457), INT32_C( -407836103)),
      { SIMDE_FLOAT32_C( -619.39), SIMDE_FLOAT32_C( -235.61), SIMDE_FLOAT32_C( -100.19), SIMDE_FLOAT32_C(  132.85) },
      { SIMDE_FLOAT32_C(  918.12), SIMDE_FLOAT32_C( -235.61), SIMDE_FLOAT32_C( -100.19), SIMDE_FLOAT32_C(  132.85) } },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -921.95), SIMDE_FLOAT32_C( -150.48), SIMDE_FLOAT32_C(  -68.61), SIMDE_FLOAT32_C(  516.22)),
      simde_mm_set_epi32(INT32_C(  992186029), INT32_C( 1116343160), INT32_C(  312629428), INT32_C( -102955009)),
      { SIMDE_FLOAT32_C(  590.22), SIMDE_FLOAT32_C(   90.53), SIMDE_FLOAT32_C(  -38.56), SIMDE_FLOAT32_C(  730.28) },
      { SIMDE_FLOAT32_C(  516.22), SIMDE_FLOAT32_C(   90.53), SIMDE_FLOAT32_C(  -38.56), SIMDE_FLOAT32_C(  730.28) } },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -641.93), SIMDE_FLOAT32_C(  207.52), SIMDE_FLOAT32_C( -794.64), SIMDE_FLOAT32_C(  604.13)),
      simde_mm_set_epi32(INT32_C( -917957115), INT32_C( 1866845223), INT32_C(  844085971), INT32_C( -482023893)),
      { SIMDE_FLOAT32_C( -925.57), SIMDE_FLOAT32_C(    8.11), SIMDE_FLOAT32_C( -375.92), SIMDE_FLOAT32_C( -370.15) },
      { SIMDE_FLOAT32_C(  604.13), SIMDE_FLOAT32_C(    8.11), SIMDE_FLOAT32_C( -375.92), SIMDE_FLOAT32_C( -641.93) } },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  805.10), SIMDE_FLOAT32_C( -153.46), SIMDE_FLOAT32_C(  308.00), SIMDE_FLOAT32_C( -159.40)),
      simde_mm_set_epi32(INT32_C( -582979650), INT32_C( 2118735836), INT32_C( -687047741), INT32_C( -848367450)),
      { SIMDE_FLOAT32_C(  678.23), SIMDE_FLOAT32_C(  517.86), SIMDE_FLOAT32_C(  930.59), SIMDE_FLOAT32_C( -376.44) },
      { SIMDE_FLOAT32_C( -159.40), SIMDE_FLOAT32_C(  308.00), SIMDE_FLOAT32_C(  930.59), SIMDE_FLOAT32_C(  805.10) } },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  286.93), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -966.59), SIMDE_FLOAT32_C( -652.85)),
      simde_mm_set_epi32(INT32_C( -773117066), INT32_C(-1995762340), INT32_C(-1086112436), INT32_C(  352565673)),
      { SIMDE_FLOAT32_C( -243.02), SIMDE_FLOAT32_C(  -10.35), SIMDE_FLOAT32_C( -930.64), SIMDE_FLOAT32_C( -942.71) },
      { SIMDE_FLOAT32_C( -243.02), SIMDE_FLOAT32_C( -966.59), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C(  286.93) } },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  695.71), SIMDE_FLOAT32_C(  544.01), SIMDE_FLOAT32_C( -155.11), SIMDE_FLOAT32_C(  773.52)),
      simde_mm_set_epi32(INT32_C( 2130523937), INT32_C(  959365319), INT32_C(  -87305215), INT32_C(  336137071)),
      { SIMDE_FLOAT32_C(  331.75), SIMDE_FLOAT32_C(  641.50), SIMDE_FLOAT32_C( -114.90), SIMDE_FLOAT32_C(  582.07) },
      { SIMDE_FLOAT32_C(  331.75), SIMDE_FLOAT32_C( -155.11), SIMDE_FLOAT32_C( -114.90), SIMDE_FLOAT32_C(  582.07) } },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -51.27), SIMDE_FLOAT32_C(  493.10), SIMDE_FLOAT32_C( -182.48), SIMDE_FLOAT32_C( -902.32)),
      simde_mm_set_epi32(INT32_C( -776181519), INT32_C(-1636897440), INT32_C( -492655883), INT32_C(-1758902344)),
      { SIMDE_FLOAT32_C(  -81.09), SIMDE_FLOAT32_C( -980.86), SIMDE_FLOAT32_C( -619.01), SIMDE_FLOAT32_C( -490.33) },
      { SIMDE_FLOAT32_C( -902.32), SIMDE_FLOAT32_C( -182.48), SIMDE_FLOAT32_C(  493.10), SIMDE_FLOAT32_C(  -51.27) } },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  432.35), SIMDE_FLOAT32_C(  341.26), SIMDE_FLOAT32_C(  183.18), SIMDE_FLOAT32_C( -136.49)),
      simde_mm_set_epi32(INT32_C( 1637126517), INT32_C(-1887081950), INT32_C( 1956680612), INT32_C( 1224753500)),
      { SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C(  241.66), SIMDE_FLOAT32_C(  -19.61), SIMDE_FLOAT32_C( -311.97) },
      { SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C(  241.66), SIMDE_FLOAT32_C(  341.26), SIMDE_FLOAT32_C( -311.97) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde_float32 r[4];
    simde_memcpy(r, test_vec[i].ri, sizeof(r));
    simde_mm_maskstore_ps(r, test_vec[i].mask, test_vec[i].a);
    simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1);
  }

  return 0;
}

static int
test_simde_mm256_maskstore_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256i mask;
    simde_float32 ri[8];
    simde_float32 ro[8];
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   631.62), SIMDE_FLOAT32_C(  -891.94),
                         SIMDE_FLOAT32_C(  -689.27), SIMDE_FLOAT32_C(   347.81),
                         SIMDE_FLOAT32_C(  -616.22), SIMDE_FLOAT32_C(   642.58),
                         SIMDE_FLOAT32_C(   228.19), SIMDE_FLOAT32_C(  -205.29)),
      simde_mm256_set_epi32(INT32_C(  295073064), INT32_C(  716384814), INT32_C(  546124227), INT32_C( -305648391),
                            INT32_C(-1290025628), INT32_C(-1970079627), INT32_C( -571027584), INT32_C(  423261258)),
      { SIMDE_FLOAT32_C(  -608.38), SIMDE_FLOAT32_C(   456.15),
        SIMDE_FLOAT32_C(   520.16), SIMDE_FLOAT32_C(   784.51),
        SIMDE_FLOAT32_C(   874.80), SIMDE_FLOAT32_C(  -683.96),
        SIMDE_FLOAT32_C(  -492.84), SIMDE_FLOAT32_C(  -304.46) },
      { SIMDE_FLOAT32_C(  -608.38), SIMDE_FLOAT32_C(   228.19),
        SIMDE_FLOAT32_C(   642.58), SIMDE_FLOAT32_C(  -616.22),
        SIMDE_FLOAT32_C(   347.81), SIMDE_FLOAT32_C(  -683.96),
        SIMDE_FLOAT32_C(  -492.84), SIMDE_FLOAT32_C(  -304.46) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   410.61), SIMDE_FLOAT32_C(  -548.69),
                         SIMDE_FLOAT32_C(  -844.69), SIMDE_FLOAT32_C(   115.46),
                         SIMDE_FLOAT32_C(  -883.40), SIMDE_FLOAT32_C(  -942.83),
                         SIMDE_FLOAT32_C(   299.17), SIMDE_FLOAT32_C(   463.83)),
      simde_mm256_set_epi32(INT32_C(  162007636), INT32_C( 1372837309), INT32_C( 1276687632), INT32_C(-1819337795),
                            INT32_C(-2027923298), INT32_C(  531821300), INT32_C( 1062735782), INT32_C( -103135294)),
      { SIMDE_FLOAT32_C(  -186.23), SIMDE_FLOAT32_C(   411.88),
        SIMDE_FLOAT32_C(  -764.36), SIMDE_FLOAT32_C(   765.54),
        SIMDE_FLOAT32_C(  -288.35), SIMDE_FLOAT32_C(   486.60),
        SIMDE_FLOAT32_C(  -405.36), SIMDE_FLOAT32_C(   812.05) },
      { SIMDE_FLOAT32_C(   463.83), SIMDE_FLOAT32_C(   411.88),
        SIMDE_FLOAT32_C(  -764.36), SIMDE_FLOAT32_C(  -883.40),
        SIMDE_FLOAT32_C(   115.46), SIMDE_FLOAT32_C(   486.60),
        SIMDE_FLOAT32_C(  -405.36), SIMDE_FLOAT32_C(   812.05) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -783.01), SIMDE_FLOAT32_C(  -741.62),
                         SIMDE_FLOAT32_C(  -384.48), SIMDE_FLOAT32_C(   584.48),
                         SIMDE_FLOAT32_C(  -995.40), SIMDE_FLOAT32_C(  -545.74),
                         SIMDE_FLOAT32_C(  -304.24), SIMDE_FLOAT32_C(   611.74)),
      simde_mm256_set_epi32(INT32_C(-1838874946), INT32_C(-2030629043), INT32_C(   38494100), INT32_C(   -7221084),
                            INT32_C(-1092763998), INT32_C(   39946466), INT32_C( -239157020), INT32_C(-1142390879)),
      { SIMDE_FLOAT32_C(  -415.76), SIMDE_FLOAT32_C(   542.61),
        SIMDE_FLOAT32_C(   568.26), SIMDE_FLOAT32_C(   280.16),
        SIMDE_FLOAT32_C(   662.20), SIMDE_FLOAT32_C(   -14.92),
        SIMDE_FLOAT32_C(   639.71), SIMDE_FLOAT32_C(  -773.09) },
      { SIMDE_FLOAT32_C(   611.74), SIMDE_FLOAT32_C(  -304.24),
        SIMDE_FLOAT32_C(   568.26), SIMDE_FLOAT32_C(  -995.40),
        SIMDE_FLOAT32_C(   584.48), SIMDE_FLOAT32_C(   -14.92),
        SIMDE_FLOAT32_C(  -741.62), SIMDE_FLOAT32_C(  -783.01) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   921.86), SIMDE_FLOAT32_C(  -688.86),
                         SIMDE_FLOAT32_C(    43.96), SIMDE_FLOAT32_C(   754.20),
                         SIMDE_FLOAT32_C(  -480.27), SIMDE_FLOAT32_C(  -645.94),
                         SIMDE_FLOAT32_C(   315.20), SIMDE_FLOAT32_C(   726.23)),
      simde_mm256_set_epi32(INT32_C( -956355020), INT32_C( -805184504), INT32_C(   -5391233), INT32_C(  154150621),
                            INT32_C( -322849130), INT32_C(-1596216639), INT32_C( -653101729), INT32_C( -652476461)),
      { SIMDE_FLOAT32_C(   -76.23), SIMDE_FLOAT32_C(   -18.44),
        SIMDE_FLOAT32_C(   771.31), SIMDE_FLOAT32_C(   206.00),
        SIMDE_FLOAT32_C(   650.40), SIMDE_FLOAT32_C(   -69.08),
        SIMDE_FLOAT32_C(  -737.29), SIMDE_FLOAT32_C(   222.97) },
      { SIMDE_FLOAT32_C(   726.23), SIMDE_FLOAT32_C(   315.20),
        SIMDE_FLOAT32_C(  -645.94), SIMDE_FLOAT32_C(  -480.27),
        SIMDE_FLOAT32_C(   650.40), SIMDE_FLOAT32_C(    43.96),
        SIMDE_FLOAT32_C(  -688.86), SIMDE_FLOAT32_C(   921.86) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   606.49), SIMDE_FLOAT32_C(   777.74),
                         SIMDE_FLOAT32_C(   -66.55), SIMDE_FLOAT32_C(   -42.93),
                         SIMDE_FLOAT32_C(    40.92), SIMDE_FLOAT32_C(  -104.82),
                         SIMDE_FLOAT32_C(   745.57), SIMDE_FLOAT32_C(  -526.15)),
      simde_mm256_set_epi32(INT32_C( 1494400292), INT32_C(  898207849), INT32_C(  499297865), INT32_C(   32131455),
                            INT32_C(  837039755), INT32_C(  686948685), INT32_C( 1413261791), INT32_C( 1091352937)),
      { SIMDE_FLOAT32_C(  -911.46), SIMDE_FLOAT32_C(   181.07),
        SIMDE_FLOAT32_C(    60.18), SIMDE_FLOAT32_C(   299.13),
        SIMDE_FLOAT32_C(  -412.14), SIMDE_FLOAT32_C(  -496.33),
        SIMDE_FLOAT32_C(   300.62), SIMDE_FLOAT32_C(  -738.40) },
      { SIMDE_FLOAT32_C(  -911.46), SIMDE_FLOAT32_C(   181.07),
        SIMDE_FLOAT32_C(    60.18), SIMDE_FLOAT32_C(   299.13),
        SIMDE_FLOAT32_C(  -412.14), SIMDE_FLOAT32_C(  -496.33),
        SIMDE_FLOAT32_C(   300.62), SIMDE_FLOAT32_C(  -738.40) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -418.84), SIMDE_FLOAT32_C(   873.25),
                         SIMDE_FLOAT32_C(   510.96), SIMDE_FLOAT32_C(   992.60),
                         SIMDE_FLOAT32_C(  -316.31), SIMDE_FLOAT32_C(    91.31),
                         SIMDE_FLOAT32_C(   578.46), SIMDE_FLOAT32_C(  -878.58)),
      simde_mm256_set_epi32(INT32_C(  957079452), INT32_C( 1280918142), INT32_C( 1743745557), INT32_C( 1819407670),
                            INT32_C(  242366822), INT32_C( 1212388671), INT32_C(-1368211077), INT32_C( 1178548564)),
      { SIMDE_FLOAT32_C(   268.38), SIMDE_FLOAT32_C(   166.26),
        SIMDE_FLOAT32_C(  -817.18), SIMDE_FLOAT32_C(  -906.27),
        SIMDE_FLOAT32_C(   690.97), SIMDE_FLOAT32_C(    40.42),
        SIMDE_FLOAT32_C(   937.35), SIMDE_FLOAT32_C(  -494.05) },
      { SIMDE_FLOAT32_C(   268.38), SIMDE_FLOAT32_C(   578.46),
        SIMDE_FLOAT32_C(  -817.18), SIMDE_FLOAT32_C(  -906.27),
        SIMDE_FLOAT32_C(   690.97), SIMDE_FLOAT32_C(    40.42),
        SIMDE_FLOAT32_C(   937.35), SIMDE_FLOAT32_C(  -494.05) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   991.62), SIMDE_FLOAT32_C(   121.11),
                         SIMDE_FLOAT32_C(   504.72), SIMDE_FLOAT32_C(   982.21),
                         SIMDE_FLOAT32_C(   263.79), SIMDE_FLOAT32_C(   803.03),
                         SIMDE_FLOAT32_C(    92.44), SIMDE_FLOAT32_C(  -807.69)),
      simde_mm256_set_epi32(INT32_C(-2097969116), INT32_C(-2051872419), INT32_C(  269695043), INT32_C( -952585033),
                            INT32_C( 1293504381), INT32_C( -196806212), INT32_C(-2045108827), INT32_C( 1173779579)),
      { SIMDE_FLOAT32_C(   292.67), SIMDE_FLOAT32_C(   917.15),
        SIMDE_FLOAT32_C(    90.37), SIMDE_FLOAT32_C(   166.43),
        SIMDE_FLOAT32_C(   627.88), SIMDE_FLOAT32_C(  -780.11),
        SIMDE_FLOAT32_C(  -304.67), SIMDE_FLOAT32_C(  -518.81) },
      { SIMDE_FLOAT32_C(   292.67), SIMDE_FLOAT32_C(    92.44),
        SIMDE_FLOAT32_C(   803.03), SIMDE_FLOAT32_C(   166.43),
        SIMDE_FLOAT32_C(   982.21), SIMDE_FLOAT32_C(  -780.11),
        SIMDE_FLOAT32_C(   121.11), SIMDE_FLOAT32_C(   991.62) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -433.27), SIMDE_FLOAT32_C(   892.50),
                         SIMDE_FLOAT32_C(   314.40), SIMDE_FLOAT32_C(  -419.48),
                         SIMDE_FLOAT32_C(  -384.30), SIMDE_FLOAT32_C(  -314.92),
                         SIMDE_FLOAT32_C(   743.09), SIMDE_FLOAT32_C(  -477.54)),
      simde_mm256_set_epi32(INT32_C( 1543878346), INT32_C(  -55266127), INT32_C(-1716646352), INT32_C( 1500867969),
                            INT32_C( -538309268), INT32_C( 1738471819), INT32_C( -967093953), INT32_C( -389124917)),
      { SIMDE_FLOAT32_C(   553.15), SIMDE_FLOAT32_C(  -107.56),
        SIMDE_FLOAT32_C(  -195.96), SIMDE_FLOAT32_C(   763.27),
        SIMDE_FLOAT32_C(  -256.55), SIMDE_FLOAT32_C(  -826.51),
        SIMDE_FLOAT32_C(  -168.36), SIMDE_FLOAT32_C(  -340.90) },
      { SIMDE_FLOAT32_C(  -477.54), SIMDE_FLOAT32_C(   743.09),
        SIMDE_FLOAT32_C(  -195.96), SIMDE_FLOAT32_C(  -384.30),
        SIMDE_FLOAT32_C(  -256.55), SIMDE_FLOAT32_C(   314.40),
        SIMDE_FLOAT32_C(   892.50), SIMDE_FLOAT32_C(  -340.90) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde_float32 r[8];
    simde_memcpy(r, test_vec[i].ri, sizeof(r));
    simde_mm256_maskstore_ps(r, test_vec[i].mask, test_vec[i].a);
    simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1);
  }

  return 0;
}

static int
test_simde_mm256_min_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  620.32), SIMDE_FLOAT32_C( -596.35),
                         SIMDE_FLOAT32_C(  174.72), SIMDE_FLOAT32_C(  165.53),
                         SIMDE_FLOAT32_C(  242.92), SIMDE_FLOAT32_C(  330.00),
                         SIMDE_FLOAT32_C( -436.53), SIMDE_FLOAT32_C( -259.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  137.36), SIMDE_FLOAT32_C(  -49.88),
                         SIMDE_FLOAT32_C(  846.67), SIMDE_FLOAT32_C(  642.07),
                         SIMDE_FLOAT32_C(  353.31), SIMDE_FLOAT32_C( -696.33),
                         SIMDE_FLOAT32_C( -153.51), SIMDE_FLOAT32_C( -347.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  137.36), SIMDE_FLOAT32_C( -596.35),
                         SIMDE_FLOAT32_C(  174.72), SIMDE_FLOAT32_C(  165.53),
                         SIMDE_FLOAT32_C(  242.92), SIMDE_FLOAT32_C( -696.33),
                         SIMDE_FLOAT32_C( -436.53), SIMDE_FLOAT32_C( -347.51)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  938.63), SIMDE_FLOAT32_C(  244.19),
                         SIMDE_FLOAT32_C(  355.24), SIMDE_FLOAT32_C(  261.35),
                         SIMDE_FLOAT32_C(  679.42), SIMDE_FLOAT32_C(  -31.35),
                         SIMDE_FLOAT32_C(  138.77), SIMDE_FLOAT32_C( -717.66)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -563.43), SIMDE_FLOAT32_C(  586.11),
                         SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C(  850.65),
                         SIMDE_FLOAT32_C(  165.14), SIMDE_FLOAT32_C( -413.67),
                         SIMDE_FLOAT32_C( -290.54), SIMDE_FLOAT32_C(  984.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -563.43), SIMDE_FLOAT32_C(  244.19),
                         SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C(  261.35),
                         SIMDE_FLOAT32_C(  165.14), SIMDE_FLOAT32_C( -413.67),
                         SIMDE_FLOAT32_C( -290.54), SIMDE_FLOAT32_C( -717.66)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -838.48), SIMDE_FLOAT32_C(  344.61),
                         SIMDE_FLOAT32_C( -913.53), SIMDE_FLOAT32_C(  858.23),
                         SIMDE_FLOAT32_C( -347.90), SIMDE_FLOAT32_C( -707.87),
                         SIMDE_FLOAT32_C( -634.91), SIMDE_FLOAT32_C( -919.82)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -180.34), SIMDE_FLOAT32_C( -789.22),
                         SIMDE_FLOAT32_C(  607.01), SIMDE_FLOAT32_C(  440.98),
                         SIMDE_FLOAT32_C(  432.59), SIMDE_FLOAT32_C( -196.73),
                         SIMDE_FLOAT32_C(  380.83), SIMDE_FLOAT32_C(  796.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -838.48), SIMDE_FLOAT32_C( -789.22),
                         SIMDE_FLOAT32_C( -913.53), SIMDE_FLOAT32_C(  440.98),
                         SIMDE_FLOAT32_C( -347.90), SIMDE_FLOAT32_C( -707.87),
                         SIMDE_FLOAT32_C( -634.91), SIMDE_FLOAT32_C( -919.82)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -210.35), SIMDE_FLOAT32_C( -870.90),
                         SIMDE_FLOAT32_C(  992.08), SIMDE_FLOAT32_C( -822.82),
                         SIMDE_FLOAT32_C( -209.97), SIMDE_FLOAT32_C( -436.22),
                         SIMDE_FLOAT32_C(  481.44), SIMDE_FLOAT32_C(  169.09)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -860.96), SIMDE_FLOAT32_C(  972.39),
                         SIMDE_FLOAT32_C(  830.57), SIMDE_FLOAT32_C(  -23.76),
                         SIMDE_FLOAT32_C(  311.19), SIMDE_FLOAT32_C(  554.15),
                         SIMDE_FLOAT32_C(    0.80), SIMDE_FLOAT32_C( -247.41)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -860.96), SIMDE_FLOAT32_C( -870.90),
                         SIMDE_FLOAT32_C(  830.57), SIMDE_FLOAT32_C( -822.82),
                         SIMDE_FLOAT32_C( -209.97), SIMDE_FLOAT32_C( -436.22),
                         SIMDE_FLOAT32_C(    0.80), SIMDE_FLOAT32_C( -247.41)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -158.36), SIMDE_FLOAT32_C( -758.94),
                         SIMDE_FLOAT32_C(  618.04), SIMDE_FLOAT32_C(  976.02),
                         SIMDE_FLOAT32_C( -953.60), SIMDE_FLOAT32_C(  866.14),
                         SIMDE_FLOAT32_C(  565.22), SIMDE_FLOAT32_C(  554.29)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -8.54), SIMDE_FLOAT32_C( -896.75),
                         SIMDE_FLOAT32_C(  630.48), SIMDE_FLOAT32_C(   27.00),
                         SIMDE_FLOAT32_C(  865.05), SIMDE_FLOAT32_C( -640.13),
                         SIMDE_FLOAT32_C( -969.96), SIMDE_FLOAT32_C( -427.62)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -158.36), SIMDE_FLOAT32_C( -896.75),
                         SIMDE_FLOAT32_C(  618.04), SIMDE_FLOAT32_C(   27.00),
                         SIMDE_FLOAT32_C( -953.60), SIMDE_FLOAT32_C( -640.13),
                         SIMDE_FLOAT32_C( -969.96), SIMDE_FLOAT32_C( -427.62)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -601.54), SIMDE_FLOAT32_C( -545.11),
                         SIMDE_FLOAT32_C( -568.66), SIMDE_FLOAT32_C(  393.12),
                         SIMDE_FLOAT32_C( -656.85), SIMDE_FLOAT32_C(  612.23),
                         SIMDE_FLOAT32_C(  417.91), SIMDE_FLOAT32_C( -206.48)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -56.90), SIMDE_FLOAT32_C( -986.18),
                         SIMDE_FLOAT32_C(  948.05), SIMDE_FLOAT32_C(  -52.08),
                         SIMDE_FLOAT32_C( -838.46), SIMDE_FLOAT32_C( -751.49),
                         SIMDE_FLOAT32_C(  775.89), SIMDE_FLOAT32_C(  940.13)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -601.54), SIMDE_FLOAT32_C( -986.18),
                         SIMDE_FLOAT32_C( -568.66), SIMDE_FLOAT32_C(  -52.08),
                         SIMDE_FLOAT32_C( -838.46), SIMDE_FLOAT32_C( -751.49),
                         SIMDE_FLOAT32_C(  417.91), SIMDE_FLOAT32_C( -206.48)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  459.26), SIMDE_FLOAT32_C( -447.11),
                         SIMDE_FLOAT32_C(  826.76), SIMDE_FLOAT32_C( -107.61),
                         SIMDE_FLOAT32_C( -521.03), SIMDE_FLOAT32_C(  -33.76),
                         SIMDE_FLOAT32_C(  315.53), SIMDE_FLOAT32_C( -222.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -136.58), SIMDE_FLOAT32_C(   50.72),
                         SIMDE_FLOAT32_C(  921.42), SIMDE_FLOAT32_C(  664.07),
                         SIMDE_FLOAT32_C(  743.00), SIMDE_FLOAT32_C( -236.39),
                         SIMDE_FLOAT32_C(  981.20), SIMDE_FLOAT32_C(  280.47)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -136.58), SIMDE_FLOAT32_C( -447.11),
                         SIMDE_FLOAT32_C(  826.76), SIMDE_FLOAT32_C( -107.61),
                         SIMDE_FLOAT32_C( -521.03), SIMDE_FLOAT32_C( -236.39),
                         SIMDE_FLOAT32_C(  315.53), SIMDE_FLOAT32_C( -222.31)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  356.76), SIMDE_FLOAT32_C( -389.22),
                         SIMDE_FLOAT32_C( -477.96), SIMDE_FLOAT32_C(   -3.77),
                         SIMDE_FLOAT32_C( -645.03), SIMDE_FLOAT32_C( -766.89),
                         SIMDE_FLOAT32_C(  755.76), SIMDE_FLOAT32_C(  244.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  823.25), SIMDE_FLOAT32_C(  384.14),
                         SIMDE_FLOAT32_C(  475.61), SIMDE_FLOAT32_C( -650.92),
                         SIMDE_FLOAT32_C( -913.35), SIMDE_FLOAT32_C( -290.77),
                         SIMDE_FLOAT32_C(  213.82), SIMDE_FLOAT32_C( -350.01)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  356.76), SIMDE_FLOAT32_C( -389.22),
                         SIMDE_FLOAT32_C( -477.96), SIMDE_FLOAT32_C( -650.92),
                         SIMDE_FLOAT32_C( -913.35), SIMDE_FLOAT32_C( -766.89),
                         SIMDE_FLOAT32_C(  213.82), SIMDE_FLOAT32_C( -350.01)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_min_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_min_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  207.41), SIMDE_FLOAT64_C(  328.63),
                         SIMDE_FLOAT64_C( -694.69), SIMDE_FLOAT64_C(  687.63)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   89.05), SIMDE_FLOAT64_C(  448.86),
                         SIMDE_FLOAT64_C(   19.12), SIMDE_FLOAT64_C( -158.19)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   89.05), SIMDE_FLOAT64_C(  328.63),
                         SIMDE_FLOAT64_C( -694.69), SIMDE_FLOAT64_C( -158.19)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  809.97), SIMDE_FLOAT64_C( -437.57),
                         SIMDE_FLOAT64_C( -994.98), SIMDE_FLOAT64_C( -594.51)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  949.89), SIMDE_FLOAT64_C( -515.80),
                         SIMDE_FLOAT64_C( -545.90), SIMDE_FLOAT64_C(  794.78)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  809.97), SIMDE_FLOAT64_C( -515.80),
                         SIMDE_FLOAT64_C( -994.98), SIMDE_FLOAT64_C( -594.51)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -509.90), SIMDE_FLOAT64_C( -421.15),
                         SIMDE_FLOAT64_C( -539.85), SIMDE_FLOAT64_C( -245.38)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -30.48), SIMDE_FLOAT64_C( -451.08),
                         SIMDE_FLOAT64_C(  -92.82), SIMDE_FLOAT64_C( -896.69)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -509.90), SIMDE_FLOAT64_C( -451.08),
                         SIMDE_FLOAT64_C( -539.85), SIMDE_FLOAT64_C( -896.69)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  362.92), SIMDE_FLOAT64_C(  618.37),
                         SIMDE_FLOAT64_C( -874.81), SIMDE_FLOAT64_C(  119.95)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  479.87), SIMDE_FLOAT64_C(  161.56),
                         SIMDE_FLOAT64_C(  162.67), SIMDE_FLOAT64_C( -967.58)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  362.92), SIMDE_FLOAT64_C(  161.56),
                         SIMDE_FLOAT64_C( -874.81), SIMDE_FLOAT64_C( -967.58)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -124.73), SIMDE_FLOAT64_C(  204.12),
                         SIMDE_FLOAT64_C( -546.39), SIMDE_FLOAT64_C(  -78.22)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  120.00), SIMDE_FLOAT64_C(  851.26),
                         SIMDE_FLOAT64_C(  153.83), SIMDE_FLOAT64_C(  393.14)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -124.73), SIMDE_FLOAT64_C(  204.12),
                         SIMDE_FLOAT64_C( -546.39), SIMDE_FLOAT64_C(  -78.22)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -812.91), SIMDE_FLOAT64_C(  797.50),
                         SIMDE_FLOAT64_C(  285.31), SIMDE_FLOAT64_C(  340.33)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -48.34), SIMDE_FLOAT64_C( -906.19),
                         SIMDE_FLOAT64_C(  314.61), SIMDE_FLOAT64_C(  602.47)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -812.91), SIMDE_FLOAT64_C( -906.19),
                         SIMDE_FLOAT64_C(  285.31), SIMDE_FLOAT64_C(  340.33)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  376.08), SIMDE_FLOAT64_C(  847.57),
                         SIMDE_FLOAT64_C(  979.10), SIMDE_FLOAT64_C( -602.74)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  307.85), SIMDE_FLOAT64_C(  571.44),
                         SIMDE_FLOAT64_C( -621.81), SIMDE_FLOAT64_C(  -12.29)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  307.85), SIMDE_FLOAT64_C(  571.44),
                         SIMDE_FLOAT64_C( -621.81), SIMDE_FLOAT64_C( -602.74)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -682.10), SIMDE_FLOAT64_C(  349.84),
                         SIMDE_FLOAT64_C( -943.34), SIMDE_FLOAT64_C(  341.61)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -89.49), SIMDE_FLOAT64_C( -236.51),
                         SIMDE_FLOAT64_C(  632.28), SIMDE_FLOAT64_C( -535.13)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -682.10), SIMDE_FLOAT64_C( -236.51),
                         SIMDE_FLOAT64_C( -943.34), SIMDE_FLOAT64_C( -535.13)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_min_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_max_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  449.92), SIMDE_FLOAT32_C(   34.28),
                         SIMDE_FLOAT32_C(  -25.78), SIMDE_FLOAT32_C(  210.08),
                         SIMDE_FLOAT32_C(  389.04), SIMDE_FLOAT32_C( -871.84),
                         SIMDE_FLOAT32_C( -259.15), SIMDE_FLOAT32_C( -935.03)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -540.89), SIMDE_FLOAT32_C(  480.33),
                         SIMDE_FLOAT32_C(   35.20), SIMDE_FLOAT32_C(  243.72),
                         SIMDE_FLOAT32_C(  827.03), SIMDE_FLOAT32_C(  -35.53),
                         SIMDE_FLOAT32_C( -369.50), SIMDE_FLOAT32_C( -834.21)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  449.92), SIMDE_FLOAT32_C(  480.33),
                         SIMDE_FLOAT32_C(   35.20), SIMDE_FLOAT32_C(  243.72),
                         SIMDE_FLOAT32_C(  827.03), SIMDE_FLOAT32_C(  -35.53),
                         SIMDE_FLOAT32_C( -259.15), SIMDE_FLOAT32_C( -834.21)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  489.45), SIMDE_FLOAT32_C( -170.78),
                         SIMDE_FLOAT32_C(  183.48), SIMDE_FLOAT32_C(  307.64),
                         SIMDE_FLOAT32_C( -977.66), SIMDE_FLOAT32_C(  745.18),
                         SIMDE_FLOAT32_C(  561.96), SIMDE_FLOAT32_C( -868.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -220.92), SIMDE_FLOAT32_C(  742.39),
                         SIMDE_FLOAT32_C(  682.97), SIMDE_FLOAT32_C(  319.92),
                         SIMDE_FLOAT32_C(  734.45), SIMDE_FLOAT32_C( -490.69),
                         SIMDE_FLOAT32_C( -753.68), SIMDE_FLOAT32_C( -110.63)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  489.45), SIMDE_FLOAT32_C(  742.39),
                         SIMDE_FLOAT32_C(  682.97), SIMDE_FLOAT32_C(  319.92),
                         SIMDE_FLOAT32_C(  734.45), SIMDE_FLOAT32_C(  745.18),
                         SIMDE_FLOAT32_C(  561.96), SIMDE_FLOAT32_C( -110.63)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -934.39), SIMDE_FLOAT32_C( -436.28),
                         SIMDE_FLOAT32_C(  572.10), SIMDE_FLOAT32_C( -111.64),
                         SIMDE_FLOAT32_C(  551.18), SIMDE_FLOAT32_C(  829.61),
                         SIMDE_FLOAT32_C( -107.94), SIMDE_FLOAT32_C( -864.62)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  797.77), SIMDE_FLOAT32_C(  207.18),
                         SIMDE_FLOAT32_C( -690.52), SIMDE_FLOAT32_C(  861.15),
                         SIMDE_FLOAT32_C(  110.08), SIMDE_FLOAT32_C(   67.85),
                         SIMDE_FLOAT32_C( -389.66), SIMDE_FLOAT32_C(  867.20)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  797.77), SIMDE_FLOAT32_C(  207.18),
                         SIMDE_FLOAT32_C(  572.10), SIMDE_FLOAT32_C(  861.15),
                         SIMDE_FLOAT32_C(  551.18), SIMDE_FLOAT32_C(  829.61),
                         SIMDE_FLOAT32_C( -107.94), SIMDE_FLOAT32_C(  867.20)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  481.13), SIMDE_FLOAT32_C(  -53.37),
                         SIMDE_FLOAT32_C(   -0.04), SIMDE_FLOAT32_C(  614.09),
                         SIMDE_FLOAT32_C(  596.94), SIMDE_FLOAT32_C(  349.18),
                         SIMDE_FLOAT32_C(   53.36), SIMDE_FLOAT32_C(  244.43)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -253.17), SIMDE_FLOAT32_C( -107.20),
                         SIMDE_FLOAT32_C( -893.53), SIMDE_FLOAT32_C(  608.05),
                         SIMDE_FLOAT32_C(  292.67), SIMDE_FLOAT32_C(  529.08),
                         SIMDE_FLOAT32_C(  170.70), SIMDE_FLOAT32_C( -778.36)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  481.13), SIMDE_FLOAT32_C(  -53.37),
                         SIMDE_FLOAT32_C(   -0.04), SIMDE_FLOAT32_C(  614.09),
                         SIMDE_FLOAT32_C(  596.94), SIMDE_FLOAT32_C(  529.08),
                         SIMDE_FLOAT32_C(  170.70), SIMDE_FLOAT32_C(  244.43)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -125.79), SIMDE_FLOAT32_C( -776.79),
                         SIMDE_FLOAT32_C( -268.99), SIMDE_FLOAT32_C(  991.82),
                         SIMDE_FLOAT32_C(  880.36), SIMDE_FLOAT32_C( -906.06),
                         SIMDE_FLOAT32_C( -550.03), SIMDE_FLOAT32_C(  415.17)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  733.77), SIMDE_FLOAT32_C( -207.30),
                         SIMDE_FLOAT32_C( -944.02), SIMDE_FLOAT32_C( -591.97),
                         SIMDE_FLOAT32_C( -584.21), SIMDE_FLOAT32_C(  271.28),
                         SIMDE_FLOAT32_C( -845.79), SIMDE_FLOAT32_C( -155.27)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  733.77), SIMDE_FLOAT32_C( -207.30),
                         SIMDE_FLOAT32_C( -268.99), SIMDE_FLOAT32_C(  991.82),
                         SIMDE_FLOAT32_C(  880.36), SIMDE_FLOAT32_C(  271.28),
                         SIMDE_FLOAT32_C( -550.03), SIMDE_FLOAT32_C(  415.17)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -136.67), SIMDE_FLOAT32_C(  221.00),
                         SIMDE_FLOAT32_C( -864.98), SIMDE_FLOAT32_C(  290.97),
                         SIMDE_FLOAT32_C( -704.76), SIMDE_FLOAT32_C( -867.43),
                         SIMDE_FLOAT32_C( -323.83), SIMDE_FLOAT32_C(   74.81)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -127.03), SIMDE_FLOAT32_C(  135.62),
                         SIMDE_FLOAT32_C( -308.59), SIMDE_FLOAT32_C( -352.70),
                         SIMDE_FLOAT32_C(  883.68), SIMDE_FLOAT32_C(  134.86),
                         SIMDE_FLOAT32_C( -894.89), SIMDE_FLOAT32_C( -737.05)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -127.03), SIMDE_FLOAT32_C(  221.00),
                         SIMDE_FLOAT32_C( -308.59), SIMDE_FLOAT32_C(  290.97),
                         SIMDE_FLOAT32_C(  883.68), SIMDE_FLOAT32_C(  134.86),
                         SIMDE_FLOAT32_C( -323.83), SIMDE_FLOAT32_C(   74.81)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  780.81), SIMDE_FLOAT32_C(  -76.69),
                         SIMDE_FLOAT32_C( -213.47), SIMDE_FLOAT32_C(  296.05),
                         SIMDE_FLOAT32_C(  129.81), SIMDE_FLOAT32_C(   95.07),
                         SIMDE_FLOAT32_C( -493.97), SIMDE_FLOAT32_C( -309.39)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  224.91), SIMDE_FLOAT32_C(  545.10),
                         SIMDE_FLOAT32_C( -928.25), SIMDE_FLOAT32_C(  373.14),
                         SIMDE_FLOAT32_C( -710.01), SIMDE_FLOAT32_C(  166.25),
                         SIMDE_FLOAT32_C(  729.82), SIMDE_FLOAT32_C(  996.22)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  780.81), SIMDE_FLOAT32_C(  545.10),
                         SIMDE_FLOAT32_C( -213.47), SIMDE_FLOAT32_C(  373.14),
                         SIMDE_FLOAT32_C(  129.81), SIMDE_FLOAT32_C(  166.25),
                         SIMDE_FLOAT32_C(  729.82), SIMDE_FLOAT32_C(  996.22)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -153.19), SIMDE_FLOAT32_C( -809.42),
                         SIMDE_FLOAT32_C(  665.06), SIMDE_FLOAT32_C(  966.51),
                         SIMDE_FLOAT32_C(  515.00), SIMDE_FLOAT32_C(  239.45),
                         SIMDE_FLOAT32_C(  878.48), SIMDE_FLOAT32_C(   83.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -855.75), SIMDE_FLOAT32_C( -288.00),
                         SIMDE_FLOAT32_C(  118.38), SIMDE_FLOAT32_C(  373.81),
                         SIMDE_FLOAT32_C( -507.54), SIMDE_FLOAT32_C( -677.13),
                         SIMDE_FLOAT32_C( -680.61), SIMDE_FLOAT32_C(  934.18)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -153.19), SIMDE_FLOAT32_C( -288.00),
                         SIMDE_FLOAT32_C(  665.06), SIMDE_FLOAT32_C(  966.51),
                         SIMDE_FLOAT32_C(  515.00), SIMDE_FLOAT32_C(  239.45),
                         SIMDE_FLOAT32_C(  878.48), SIMDE_FLOAT32_C(  934.18)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_max_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_max_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -66.36), SIMDE_FLOAT64_C( -982.48),
                         SIMDE_FLOAT64_C( -994.10), SIMDE_FLOAT64_C(  656.44)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   58.12), SIMDE_FLOAT64_C(  730.28),
                         SIMDE_FLOAT64_C(  705.46), SIMDE_FLOAT64_C(  138.28)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   58.12), SIMDE_FLOAT64_C(  730.28),
                         SIMDE_FLOAT64_C(  705.46), SIMDE_FLOAT64_C(  656.44)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  965.09), SIMDE_FLOAT64_C( -956.81),
                         SIMDE_FLOAT64_C(  -84.17), SIMDE_FLOAT64_C(   -0.28)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -943.91), SIMDE_FLOAT64_C( -849.45),
                         SIMDE_FLOAT64_C(  747.06), SIMDE_FLOAT64_C(  297.16)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  965.09), SIMDE_FLOAT64_C( -849.45),
                         SIMDE_FLOAT64_C(  747.06), SIMDE_FLOAT64_C(  297.16)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -285.40), SIMDE_FLOAT64_C(   20.32),
                         SIMDE_FLOAT64_C( -517.57), SIMDE_FLOAT64_C(  747.40)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -634.35), SIMDE_FLOAT64_C( -655.50),
                         SIMDE_FLOAT64_C(  321.54), SIMDE_FLOAT64_C( -151.46)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -285.40), SIMDE_FLOAT64_C(   20.32),
                         SIMDE_FLOAT64_C(  321.54), SIMDE_FLOAT64_C(  747.40)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  766.49), SIMDE_FLOAT64_C( -821.02),
                         SIMDE_FLOAT64_C(  800.18), SIMDE_FLOAT64_C(  -78.19)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -744.41), SIMDE_FLOAT64_C(  250.49),
                         SIMDE_FLOAT64_C(   16.63), SIMDE_FLOAT64_C(  223.92)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  766.49), SIMDE_FLOAT64_C(  250.49),
                         SIMDE_FLOAT64_C(  800.18), SIMDE_FLOAT64_C(  223.92)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  297.61), SIMDE_FLOAT64_C(   -1.23),
                         SIMDE_FLOAT64_C( -644.62), SIMDE_FLOAT64_C(  -37.89)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -152.08), SIMDE_FLOAT64_C(  667.58),
                         SIMDE_FLOAT64_C(  737.38), SIMDE_FLOAT64_C(  221.17)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  297.61), SIMDE_FLOAT64_C(  667.58),
                         SIMDE_FLOAT64_C(  737.38), SIMDE_FLOAT64_C(  221.17)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  804.47), SIMDE_FLOAT64_C( -363.06),
                         SIMDE_FLOAT64_C( -130.47), SIMDE_FLOAT64_C( -257.67)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -448.43), SIMDE_FLOAT64_C(   52.56),
                         SIMDE_FLOAT64_C( -688.52), SIMDE_FLOAT64_C(  690.05)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  804.47), SIMDE_FLOAT64_C(   52.56),
                         SIMDE_FLOAT64_C( -130.47), SIMDE_FLOAT64_C(  690.05)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   20.48), SIMDE_FLOAT64_C(  404.58),
                         SIMDE_FLOAT64_C( -885.87), SIMDE_FLOAT64_C(  607.60)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -402.92), SIMDE_FLOAT64_C(  777.55),
                         SIMDE_FLOAT64_C( -392.90), SIMDE_FLOAT64_C(  385.67)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   20.48), SIMDE_FLOAT64_C(  777.55),
                         SIMDE_FLOAT64_C( -392.90), SIMDE_FLOAT64_C(  607.60)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -612.46), SIMDE_FLOAT64_C( -353.94),
                         SIMDE_FLOAT64_C( -723.01), SIMDE_FLOAT64_C( -705.60)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -638.30), SIMDE_FLOAT64_C(   67.95),
                         SIMDE_FLOAT64_C(  537.38), SIMDE_FLOAT64_C( -280.17)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -612.46), SIMDE_FLOAT64_C(   67.95),
                         SIMDE_FLOAT64_C(  537.38), SIMDE_FLOAT64_C( -280.17)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_max_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_movedup_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -848.53), SIMDE_FLOAT64_C( -411.84),
                         SIMDE_FLOAT64_C( -162.95), SIMDE_FLOAT64_C(  899.65)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -411.84), SIMDE_FLOAT64_C( -411.84),
                         SIMDE_FLOAT64_C(  899.65), SIMDE_FLOAT64_C(  899.65)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -411.40), SIMDE_FLOAT64_C( -713.22),
                         SIMDE_FLOAT64_C( -868.77), SIMDE_FLOAT64_C( -109.40)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -713.22), SIMDE_FLOAT64_C( -713.22),
                         SIMDE_FLOAT64_C( -109.40), SIMDE_FLOAT64_C( -109.40)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -430.65), SIMDE_FLOAT64_C(  350.42),
                         SIMDE_FLOAT64_C( -891.94), SIMDE_FLOAT64_C( -973.52)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  350.42), SIMDE_FLOAT64_C(  350.42),
                         SIMDE_FLOAT64_C( -973.52), SIMDE_FLOAT64_C( -973.52)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -988.20), SIMDE_FLOAT64_C( -840.45),
                         SIMDE_FLOAT64_C(  979.48), SIMDE_FLOAT64_C( -644.54)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -840.45), SIMDE_FLOAT64_C( -840.45),
                         SIMDE_FLOAT64_C( -644.54), SIMDE_FLOAT64_C( -644.54)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  459.82), SIMDE_FLOAT64_C(  645.18),
                         SIMDE_FLOAT64_C( -152.73), SIMDE_FLOAT64_C(  150.29)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  645.18), SIMDE_FLOAT64_C(  645.18),
                         SIMDE_FLOAT64_C(  150.29), SIMDE_FLOAT64_C(  150.29)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -902.52), SIMDE_FLOAT64_C(   -5.45),
                         SIMDE_FLOAT64_C( -958.89), SIMDE_FLOAT64_C( -924.74)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -5.45), SIMDE_FLOAT64_C(   -5.45),
                         SIMDE_FLOAT64_C( -924.74), SIMDE_FLOAT64_C( -924.74)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -582.37), SIMDE_FLOAT64_C(  163.98),
                         SIMDE_FLOAT64_C( -479.86), SIMDE_FLOAT64_C(  420.17)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  163.98), SIMDE_FLOAT64_C(  163.98),
                         SIMDE_FLOAT64_C(  420.17), SIMDE_FLOAT64_C(  420.17)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -319.68), SIMDE_FLOAT64_C(  791.82),
                         SIMDE_FLOAT64_C(  388.17), SIMDE_FLOAT64_C(  537.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  791.82), SIMDE_FLOAT64_C(  791.82),
                         SIMDE_FLOAT64_C(  537.00), SIMDE_FLOAT64_C(  537.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_movedup_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_movehdup_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  948.05), SIMDE_FLOAT32_C( -208.59),
                         SIMDE_FLOAT32_C( -422.71), SIMDE_FLOAT32_C( -254.03),
                         SIMDE_FLOAT32_C(    4.80), SIMDE_FLOAT32_C( -671.71),
                         SIMDE_FLOAT32_C(  685.42), SIMDE_FLOAT32_C( -954.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  948.05), SIMDE_FLOAT32_C(  948.05),
                         SIMDE_FLOAT32_C( -422.71), SIMDE_FLOAT32_C( -422.71),
                         SIMDE_FLOAT32_C(    4.80), SIMDE_FLOAT32_C(    4.80),
                         SIMDE_FLOAT32_C(  685.42), SIMDE_FLOAT32_C(  685.42)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  569.22), SIMDE_FLOAT32_C(  232.90),
                         SIMDE_FLOAT32_C(  835.45), SIMDE_FLOAT32_C(   10.13),
                         SIMDE_FLOAT32_C(   30.06), SIMDE_FLOAT32_C(  598.20),
                         SIMDE_FLOAT32_C(  376.16), SIMDE_FLOAT32_C( -918.63)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  569.22), SIMDE_FLOAT32_C(  569.22),
                         SIMDE_FLOAT32_C(  835.45), SIMDE_FLOAT32_C(  835.45),
                         SIMDE_FLOAT32_C(   30.06), SIMDE_FLOAT32_C(   30.06),
                         SIMDE_FLOAT32_C(  376.16), SIMDE_FLOAT32_C(  376.16)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   97.81), SIMDE_FLOAT32_C(  286.59),
                         SIMDE_FLOAT32_C( -299.41), SIMDE_FLOAT32_C(  944.38),
                         SIMDE_FLOAT32_C( -367.92), SIMDE_FLOAT32_C(  626.43),
                         SIMDE_FLOAT32_C(  889.36), SIMDE_FLOAT32_C(  776.89)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   97.81), SIMDE_FLOAT32_C(   97.81),
                         SIMDE_FLOAT32_C( -299.41), SIMDE_FLOAT32_C( -299.41),
                         SIMDE_FLOAT32_C( -367.92), SIMDE_FLOAT32_C( -367.92),
                         SIMDE_FLOAT32_C(  889.36), SIMDE_FLOAT32_C(  889.36)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -67.83), SIMDE_FLOAT32_C( -435.86),
                         SIMDE_FLOAT32_C( -637.27), SIMDE_FLOAT32_C(  -95.13),
                         SIMDE_FLOAT32_C(  698.01), SIMDE_FLOAT32_C(   55.48),
                         SIMDE_FLOAT32_C(    1.28), SIMDE_FLOAT32_C( -243.13)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -67.83), SIMDE_FLOAT32_C(  -67.83),
                         SIMDE_FLOAT32_C( -637.27), SIMDE_FLOAT32_C( -637.27),
                         SIMDE_FLOAT32_C(  698.01), SIMDE_FLOAT32_C(  698.01),
                         SIMDE_FLOAT32_C(    1.28), SIMDE_FLOAT32_C(    1.28)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -266.47), SIMDE_FLOAT32_C(  784.83),
                         SIMDE_FLOAT32_C( -470.70), SIMDE_FLOAT32_C(  865.28),
                         SIMDE_FLOAT32_C(  393.60), SIMDE_FLOAT32_C( -743.40),
                         SIMDE_FLOAT32_C(  858.48), SIMDE_FLOAT32_C(  507.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -266.47), SIMDE_FLOAT32_C( -266.47),
                         SIMDE_FLOAT32_C( -470.70), SIMDE_FLOAT32_C( -470.70),
                         SIMDE_FLOAT32_C(  393.60), SIMDE_FLOAT32_C(  393.60),
                         SIMDE_FLOAT32_C(  858.48), SIMDE_FLOAT32_C(  858.48)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  270.84), SIMDE_FLOAT32_C(   46.20),
                         SIMDE_FLOAT32_C( -185.53), SIMDE_FLOAT32_C(  211.61),
                         SIMDE_FLOAT32_C(  -20.87), SIMDE_FLOAT32_C( -661.06),
                         SIMDE_FLOAT32_C(  209.04), SIMDE_FLOAT32_C(  920.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  270.84), SIMDE_FLOAT32_C(  270.84),
                         SIMDE_FLOAT32_C( -185.53), SIMDE_FLOAT32_C( -185.53),
                         SIMDE_FLOAT32_C(  -20.87), SIMDE_FLOAT32_C(  -20.87),
                         SIMDE_FLOAT32_C(  209.04), SIMDE_FLOAT32_C(  209.04)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -751.76), SIMDE_FLOAT32_C( -655.85),
                         SIMDE_FLOAT32_C(  179.21), SIMDE_FLOAT32_C(  373.36),
                         SIMDE_FLOAT32_C(    1.62), SIMDE_FLOAT32_C( -981.05),
                         SIMDE_FLOAT32_C(  802.72), SIMDE_FLOAT32_C(  -49.40)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -751.76), SIMDE_FLOAT32_C( -751.76),
                         SIMDE_FLOAT32_C(  179.21), SIMDE_FLOAT32_C(  179.21),
                         SIMDE_FLOAT32_C(    1.62), SIMDE_FLOAT32_C(    1.62),
                         SIMDE_FLOAT32_C(  802.72), SIMDE_FLOAT32_C(  802.72)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   44.57), SIMDE_FLOAT32_C( -391.28),
                         SIMDE_FLOAT32_C(  614.40), SIMDE_FLOAT32_C( -901.50),
                         SIMDE_FLOAT32_C(  577.45), SIMDE_FLOAT32_C( -465.25),
                         SIMDE_FLOAT32_C( -148.70), SIMDE_FLOAT32_C( -714.68)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   44.57), SIMDE_FLOAT32_C(   44.57),
                         SIMDE_FLOAT32_C(  614.40), SIMDE_FLOAT32_C(  614.40),
                         SIMDE_FLOAT32_C(  577.45), SIMDE_FLOAT32_C(  577.45),
                         SIMDE_FLOAT32_C( -148.70), SIMDE_FLOAT32_C( -148.70)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_movehdup_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_moveldup_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  366.49), SIMDE_FLOAT32_C(  -15.43),
                         SIMDE_FLOAT32_C( -732.71), SIMDE_FLOAT32_C(  312.44),
                         SIMDE_FLOAT32_C( -535.64), SIMDE_FLOAT32_C(  -24.14),
                         SIMDE_FLOAT32_C( -881.62), SIMDE_FLOAT32_C(  419.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -15.43), SIMDE_FLOAT32_C(  -15.43),
                         SIMDE_FLOAT32_C(  312.44), SIMDE_FLOAT32_C(  312.44),
                         SIMDE_FLOAT32_C(  -24.14), SIMDE_FLOAT32_C(  -24.14),
                         SIMDE_FLOAT32_C(  419.86), SIMDE_FLOAT32_C(  419.86)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -625.40), SIMDE_FLOAT32_C( -523.97),
                         SIMDE_FLOAT32_C(  296.91), SIMDE_FLOAT32_C(  228.72),
                         SIMDE_FLOAT32_C(  553.44), SIMDE_FLOAT32_C(  -88.10),
                         SIMDE_FLOAT32_C( -240.30), SIMDE_FLOAT32_C(  437.29)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -523.97), SIMDE_FLOAT32_C( -523.97),
                         SIMDE_FLOAT32_C(  228.72), SIMDE_FLOAT32_C(  228.72),
                         SIMDE_FLOAT32_C(  -88.10), SIMDE_FLOAT32_C(  -88.10),
                         SIMDE_FLOAT32_C(  437.29), SIMDE_FLOAT32_C(  437.29)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -54.17), SIMDE_FLOAT32_C( -444.25),
                         SIMDE_FLOAT32_C( -384.50), SIMDE_FLOAT32_C(  781.57),
                         SIMDE_FLOAT32_C(  607.05), SIMDE_FLOAT32_C( -295.21),
                         SIMDE_FLOAT32_C(  101.75), SIMDE_FLOAT32_C( -941.55)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.25), SIMDE_FLOAT32_C( -444.25),
                         SIMDE_FLOAT32_C(  781.57), SIMDE_FLOAT32_C(  781.57),
                         SIMDE_FLOAT32_C( -295.21), SIMDE_FLOAT32_C( -295.21),
                         SIMDE_FLOAT32_C( -941.55), SIMDE_FLOAT32_C( -941.55)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -41.58), SIMDE_FLOAT32_C( -957.94),
                         SIMDE_FLOAT32_C(  871.50), SIMDE_FLOAT32_C( -839.89),
                         SIMDE_FLOAT32_C(  692.80), SIMDE_FLOAT32_C( -417.15),
                         SIMDE_FLOAT32_C( -850.22), SIMDE_FLOAT32_C(  594.16)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -957.94), SIMDE_FLOAT32_C( -957.94),
                         SIMDE_FLOAT32_C( -839.89), SIMDE_FLOAT32_C( -839.89),
                         SIMDE_FLOAT32_C( -417.15), SIMDE_FLOAT32_C( -417.15),
                         SIMDE_FLOAT32_C(  594.16), SIMDE_FLOAT32_C(  594.16)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -319.28), SIMDE_FLOAT32_C( -229.51),
                         SIMDE_FLOAT32_C( -581.14), SIMDE_FLOAT32_C(   81.57),
                         SIMDE_FLOAT32_C(  774.33), SIMDE_FLOAT32_C( -621.69),
                         SIMDE_FLOAT32_C( -447.13), SIMDE_FLOAT32_C(  334.88)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -229.51), SIMDE_FLOAT32_C( -229.51),
                         SIMDE_FLOAT32_C(   81.57), SIMDE_FLOAT32_C(   81.57),
                         SIMDE_FLOAT32_C( -621.69), SIMDE_FLOAT32_C( -621.69),
                         SIMDE_FLOAT32_C(  334.88), SIMDE_FLOAT32_C(  334.88)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  288.60), SIMDE_FLOAT32_C(  139.33),
                         SIMDE_FLOAT32_C(  951.18), SIMDE_FLOAT32_C( -924.84),
                         SIMDE_FLOAT32_C( -320.35), SIMDE_FLOAT32_C( -998.89),
                         SIMDE_FLOAT32_C(  164.55), SIMDE_FLOAT32_C(  991.73)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  139.33), SIMDE_FLOAT32_C(  139.33),
                         SIMDE_FLOAT32_C( -924.84), SIMDE_FLOAT32_C( -924.84),
                         SIMDE_FLOAT32_C( -998.89), SIMDE_FLOAT32_C( -998.89),
                         SIMDE_FLOAT32_C(  991.73), SIMDE_FLOAT32_C(  991.73)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -671.27), SIMDE_FLOAT32_C(   35.52),
                         SIMDE_FLOAT32_C(  780.53), SIMDE_FLOAT32_C(  -58.65),
                         SIMDE_FLOAT32_C(  227.27), SIMDE_FLOAT32_C( -621.99),
                         SIMDE_FLOAT32_C( -182.62), SIMDE_FLOAT32_C(  448.36)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   35.52), SIMDE_FLOAT32_C(   35.52),
                         SIMDE_FLOAT32_C(  -58.65), SIMDE_FLOAT32_C(  -58.65),
                         SIMDE_FLOAT32_C( -621.99), SIMDE_FLOAT32_C( -621.99),
                         SIMDE_FLOAT32_C(  448.36), SIMDE_FLOAT32_C(  448.36)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -984.41), SIMDE_FLOAT32_C( -968.89),
                         SIMDE_FLOAT32_C( -986.96), SIMDE_FLOAT32_C( -760.49),
                         SIMDE_FLOAT32_C(  908.37), SIMDE_FLOAT32_C(   35.57),
                         SIMDE_FLOAT32_C( -144.09), SIMDE_FLOAT32_C( -735.38)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -968.89), SIMDE_FLOAT32_C( -968.89),
                         SIMDE_FLOAT32_C( -760.49), SIMDE_FLOAT32_C( -760.49),
                         SIMDE_FLOAT32_C(   35.57), SIMDE_FLOAT32_C(   35.57),
                         SIMDE_FLOAT32_C( -735.38), SIMDE_FLOAT32_C( -735.38)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_moveldup_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_movemask_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    int r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C(-1882468747), INT32_C(  687119108), INT32_C(  990615051), INT32_C(-1253009356),
                            INT32_C( -617641993), INT32_C(-1788847115), INT32_C( 1286496634), INT32_C( -717001088)),
      157 },
    { simde_mm256_set_epi32(INT32_C(-1989003409), INT32_C(-1421574364), INT32_C( -382909714), INT32_C( 1867671217),
                            INT32_C( -487294761), INT32_C( -454003817), INT32_C(  -33445130), INT32_C( -757457478)),
      239 },
    { simde_mm256_set_epi32(INT32_C(   75750543), INT32_C( -792015200), INT32_C( -456366441), INT32_C( 1395825015),
                            INT32_C( 1090607410), INT32_C( -644107645), INT32_C( 2024077315), INT32_C( 1442543377)),
      100 },
    { simde_mm256_set_epi32(INT32_C(   64842844), INT32_C(-1499432736), INT32_C( -849672143), INT32_C(  808386603),
                            INT32_C( 1431766696), INT32_C(  778893676), INT32_C(   -4359592), INT32_C( -465691700)),
      99 },
    { simde_mm256_set_epi32(INT32_C(-1142593030), INT32_C( 1697913004), INT32_C(-1241903623), INT32_C( 1420498198),
                            INT32_C( 1725730120), INT32_C( 1757719770), INT32_C( 1834101516), INT32_C(-1076410946)),
      161 },
    { simde_mm256_set_epi32(INT32_C( -247458634), INT32_C(  497266182), INT32_C(  668953611), INT32_C( -198408792),
                            INT32_C(  443042962), INT32_C(  174399567), INT32_C( 1910223665), INT32_C( 1408104689)),
      144 },
    { simde_mm256_set_epi32(INT32_C( 1266303831), INT32_C(-1981624404), INT32_C( 1894718767), INT32_C( 1471458198),
                            INT32_C(-1698669031), INT32_C(-1514661026), INT32_C(-1880466849), INT32_C(-1226909311)),
      79 },
    { simde_mm256_set_epi32(INT32_C( 1314135938), INT32_C( 1476193225), INT32_C( 1478701126), INT32_C( 1097128360),
                            INT32_C( -681267332), INT32_C( -854863432), INT32_C(-1814679036), INT32_C( 2077413591)),
      14 }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    int r = simde_mm256_movemask_ps(simde_mm256_castsi256_ps(test_vec[i].a));
    simde_assert_equal_i(r, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_movemask_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    int r;
  } test_vec[8] = {
    { simde_mm256_set_epi64x(INT64_C(-3476114617639449125), INT64_C( 4174348817044283167),
                             INT64_C( 2372823762134739460), INT64_C( 2922754125044459603)),
      8 },
    { simde_mm256_set_epi64x(INT64_C( 2977848152908728188), INT64_C(-9001314161736319662),
                             INT64_C(  316358866512427816), INT64_C(  122734419977663898)),
      4 },
    { simde_mm256_set_epi64x(INT64_C( 1458362257601867464), INT64_C(-2204086314119824728),
                             INT64_C( 4226262178485377739), INT64_C( 3412235452127467527)),
      4 },
    { simde_mm256_set_epi64x(INT64_C(-3206742534496437425), INT64_C( 4783689227782243759),
                             INT64_C(-2969411634419391796), INT64_C( 6490543198836487087)),
      10 },
    { simde_mm256_set_epi64x(INT64_C( 5617129527752259343), INT64_C( 8836429733868806831),
                             INT64_C(-3648376369890579220), INT64_C( 2974368927295586543)),
      2 },
    { simde_mm256_set_epi64x(INT64_C(-7634385432411504297), INT64_C( 7257505416389479780),
                             INT64_C( 7838438756599446984), INT64_C( 8814240438147347165)),
      8 },
    { simde_mm256_set_epi64x(INT64_C( 9071444056306285548), INT64_C( 8310979399473657009),
                             INT64_C(-1777863912628705993), INT64_C( 4472167403670196676)),
      2 },
    { simde_mm256_set_epi64x(INT64_C(-6555517372143397251), INT64_C( 2575855241209659630),
                             INT64_C(-8431720993701553511), INT64_C( -874276281105343662)),
      11 }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    int r = simde_mm256_movemask_pd(simde_mm256_castsi256_pd(test_vec[i].a));
    simde_assert_equal_i(r, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_mul_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -65.11), SIMDE_FLOAT32_C(  729.63),
                         SIMDE_FLOAT32_C(  579.86), SIMDE_FLOAT32_C(  759.34),
                         SIMDE_FLOAT32_C(  638.63), SIMDE_FLOAT32_C(  366.71),
                         SIMDE_FLOAT32_C( -251.89), SIMDE_FLOAT32_C( -327.70)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -853.75), SIMDE_FLOAT32_C(  306.50),
                         SIMDE_FLOAT32_C( -694.30), SIMDE_FLOAT32_C(   62.03),
                         SIMDE_FLOAT32_C( -332.43), SIMDE_FLOAT32_C( -617.58),
                         SIMDE_FLOAT32_C( -841.05), SIMDE_FLOAT32_C( -990.91)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(55587.66), SIMDE_FLOAT32_C(223631.59),
                         SIMDE_FLOAT32_C(-402596.78), SIMDE_FLOAT32_C(47101.86),
                         SIMDE_FLOAT32_C(-212299.77), SIMDE_FLOAT32_C(-226472.77),
                         SIMDE_FLOAT32_C(211852.08), SIMDE_FLOAT32_C(324721.22)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -540.24), SIMDE_FLOAT32_C(  599.92),
                         SIMDE_FLOAT32_C(  946.63), SIMDE_FLOAT32_C(  269.39),
                         SIMDE_FLOAT32_C(  990.71), SIMDE_FLOAT32_C(  736.78),
                         SIMDE_FLOAT32_C(  735.17), SIMDE_FLOAT32_C( -839.81)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -471.86), SIMDE_FLOAT32_C(  231.47),
                         SIMDE_FLOAT32_C(  368.19), SIMDE_FLOAT32_C( -199.69),
                         SIMDE_FLOAT32_C( -781.21), SIMDE_FLOAT32_C( -576.81),
                         SIMDE_FLOAT32_C( -351.44), SIMDE_FLOAT32_C(  650.57)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(254917.64), SIMDE_FLOAT32_C(138863.48),
                         SIMDE_FLOAT32_C(348539.72), SIMDE_FLOAT32_C(-53794.49),
                         SIMDE_FLOAT32_C(-773952.62), SIMDE_FLOAT32_C(-424982.09),
                         SIMDE_FLOAT32_C(-258368.14), SIMDE_FLOAT32_C(-546355.19)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  174.33), SIMDE_FLOAT32_C( -261.00),
                         SIMDE_FLOAT32_C(  947.71), SIMDE_FLOAT32_C(  -39.38),
                         SIMDE_FLOAT32_C( -142.31), SIMDE_FLOAT32_C( -753.91),
                         SIMDE_FLOAT32_C( -304.55), SIMDE_FLOAT32_C(  197.06)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -894.79), SIMDE_FLOAT32_C(  298.89),
                         SIMDE_FLOAT32_C(  413.08), SIMDE_FLOAT32_C(   45.44),
                         SIMDE_FLOAT32_C( -362.24), SIMDE_FLOAT32_C(  247.39),
                         SIMDE_FLOAT32_C( -836.15), SIMDE_FLOAT32_C(  250.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-155988.73), SIMDE_FLOAT32_C(-78010.30),
                         SIMDE_FLOAT32_C(391480.03), SIMDE_FLOAT32_C(-1789.43),
                         SIMDE_FLOAT32_C(51550.37), SIMDE_FLOAT32_C(-186509.78),
                         SIMDE_FLOAT32_C(254649.48), SIMDE_FLOAT32_C(49367.47)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -108.81), SIMDE_FLOAT32_C( -144.46),
                         SIMDE_FLOAT32_C( -926.46), SIMDE_FLOAT32_C(  -87.02),
                         SIMDE_FLOAT32_C( -701.58), SIMDE_FLOAT32_C(  412.02),
                         SIMDE_FLOAT32_C( -404.71), SIMDE_FLOAT32_C( -140.12)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -502.46), SIMDE_FLOAT32_C( -741.60),
                         SIMDE_FLOAT32_C(  261.69), SIMDE_FLOAT32_C( -236.88),
                         SIMDE_FLOAT32_C(  805.45), SIMDE_FLOAT32_C(  663.49),
                         SIMDE_FLOAT32_C(  804.65), SIMDE_FLOAT32_C( -231.08)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(54672.67), SIMDE_FLOAT32_C(107131.54),
                         SIMDE_FLOAT32_C(-242445.33), SIMDE_FLOAT32_C(20613.30),
                         SIMDE_FLOAT32_C(-565087.62), SIMDE_FLOAT32_C(273371.12),
                         SIMDE_FLOAT32_C(-325649.91), SIMDE_FLOAT32_C(32378.93)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  274.12), SIMDE_FLOAT32_C(  417.28),
                         SIMDE_FLOAT32_C(  626.47), SIMDE_FLOAT32_C(  541.42),
                         SIMDE_FLOAT32_C( -351.86), SIMDE_FLOAT32_C(  144.88),
                         SIMDE_FLOAT32_C( -692.65), SIMDE_FLOAT32_C(  994.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -588.84), SIMDE_FLOAT32_C(  440.91),
                         SIMDE_FLOAT32_C( -668.35), SIMDE_FLOAT32_C(  950.11),
                         SIMDE_FLOAT32_C(   38.08), SIMDE_FLOAT32_C( -877.72),
                         SIMDE_FLOAT32_C(  389.76), SIMDE_FLOAT32_C(  702.21)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-161412.83), SIMDE_FLOAT32_C(183982.92),
                         SIMDE_FLOAT32_C(-418701.19), SIMDE_FLOAT32_C(514408.53),
                         SIMDE_FLOAT32_C(-13398.83), SIMDE_FLOAT32_C(-127164.07),
                         SIMDE_FLOAT32_C(-269967.28), SIMDE_FLOAT32_C(698361.94)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -72.46), SIMDE_FLOAT32_C( -957.98),
                         SIMDE_FLOAT32_C(  872.62), SIMDE_FLOAT32_C( -600.00),
                         SIMDE_FLOAT32_C(  937.99), SIMDE_FLOAT32_C( -698.26),
                         SIMDE_FLOAT32_C(  895.96), SIMDE_FLOAT32_C( -799.53)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -864.38), SIMDE_FLOAT32_C( -416.69),
                         SIMDE_FLOAT32_C(  894.78), SIMDE_FLOAT32_C(  968.43),
                         SIMDE_FLOAT32_C(  609.48), SIMDE_FLOAT32_C(  317.20),
                         SIMDE_FLOAT32_C(  767.37), SIMDE_FLOAT32_C(  -51.68)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(62632.97), SIMDE_FLOAT32_C(399180.69),
                         SIMDE_FLOAT32_C(780802.94), SIMDE_FLOAT32_C(-581058.00),
                         SIMDE_FLOAT32_C(571686.12), SIMDE_FLOAT32_C(-221488.08),
                         SIMDE_FLOAT32_C(687532.81), SIMDE_FLOAT32_C(41319.71)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C( -294.47),
                         SIMDE_FLOAT32_C( -916.92), SIMDE_FLOAT32_C(  146.62),
                         SIMDE_FLOAT32_C(  597.05), SIMDE_FLOAT32_C(   75.62),
                         SIMDE_FLOAT32_C(  636.06), SIMDE_FLOAT32_C(  363.23)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  427.08), SIMDE_FLOAT32_C( -105.65),
                         SIMDE_FLOAT32_C(   98.05), SIMDE_FLOAT32_C(  398.71),
                         SIMDE_FLOAT32_C( -883.53), SIMDE_FLOAT32_C( -434.31),
                         SIMDE_FLOAT32_C(  638.77), SIMDE_FLOAT32_C( -453.78)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-55977.38), SIMDE_FLOAT32_C(31110.76),
                         SIMDE_FLOAT32_C(-89904.01), SIMDE_FLOAT32_C(58458.86),
                         SIMDE_FLOAT32_C(-527511.56), SIMDE_FLOAT32_C(-32842.52),
                         SIMDE_FLOAT32_C(406296.06), SIMDE_FLOAT32_C(-164826.52)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  401.83), SIMDE_FLOAT32_C( -101.21),
                         SIMDE_FLOAT32_C( -199.00), SIMDE_FLOAT32_C(  823.62),
                         SIMDE_FLOAT32_C( -930.84), SIMDE_FLOAT32_C( -154.42),
                         SIMDE_FLOAT32_C(  958.59), SIMDE_FLOAT32_C(  853.69)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -449.55), SIMDE_FLOAT32_C(  354.78),
                         SIMDE_FLOAT32_C( -131.96), SIMDE_FLOAT32_C( -519.93),
                         SIMDE_FLOAT32_C(  699.65), SIMDE_FLOAT32_C(  781.77),
                         SIMDE_FLOAT32_C( -157.96), SIMDE_FLOAT32_C( -793.47)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-180642.67), SIMDE_FLOAT32_C(-35907.29),
                         SIMDE_FLOAT32_C(26260.04), SIMDE_FLOAT32_C(-428224.75),
                         SIMDE_FLOAT32_C(-651262.25), SIMDE_FLOAT32_C(-120720.92),
                         SIMDE_FLOAT32_C(-151418.89), SIMDE_FLOAT32_C(-677377.38)) },
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_mul_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_mul_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   -216.07), SIMDE_FLOAT64_C(   -759.70),
                         SIMDE_FLOAT64_C(   -257.81), SIMDE_FLOAT64_C(    916.82)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -279.71), SIMDE_FLOAT64_C(    654.36),
                         SIMDE_FLOAT64_C(   -699.07), SIMDE_FLOAT64_C(    772.15)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  60436.94), SIMDE_FLOAT64_C(-497117.29),
                         SIMDE_FLOAT64_C( 180227.24), SIMDE_FLOAT64_C( 707922.56)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   -576.02), SIMDE_FLOAT64_C(   -915.91),
                         SIMDE_FLOAT64_C(    616.42), SIMDE_FLOAT64_C(    692.53)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    285.52), SIMDE_FLOAT64_C(   -538.61),
                         SIMDE_FLOAT64_C(    604.61), SIMDE_FLOAT64_C(   -220.38)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-164465.23), SIMDE_FLOAT64_C( 493318.29),
                         SIMDE_FLOAT64_C( 372693.70), SIMDE_FLOAT64_C(-152619.76)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   -378.41), SIMDE_FLOAT64_C(   -782.44),
                         SIMDE_FLOAT64_C(   -858.38), SIMDE_FLOAT64_C(   -932.65)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    782.91), SIMDE_FLOAT64_C(    352.73),
                         SIMDE_FLOAT64_C(    705.00), SIMDE_FLOAT64_C(    -78.46)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-296260.97), SIMDE_FLOAT64_C(-275990.06),
                         SIMDE_FLOAT64_C(-605157.90), SIMDE_FLOAT64_C(  73175.72)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   -936.48), SIMDE_FLOAT64_C(    274.43),
                         SIMDE_FLOAT64_C(    341.69), SIMDE_FLOAT64_C(    588.43)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -915.35), SIMDE_FLOAT64_C(    625.98),
                         SIMDE_FLOAT64_C(    -66.28), SIMDE_FLOAT64_C(   -474.34)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( 857206.97), SIMDE_FLOAT64_C( 171787.69),
                         SIMDE_FLOAT64_C( -22647.21), SIMDE_FLOAT64_C(-279115.89)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(    371.75), SIMDE_FLOAT64_C(   -392.05),
                         SIMDE_FLOAT64_C(   -730.60), SIMDE_FLOAT64_C(    399.39)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -409.43), SIMDE_FLOAT64_C(    920.37),
                         SIMDE_FLOAT64_C(    -56.28), SIMDE_FLOAT64_C(   -779.31)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-152205.60), SIMDE_FLOAT64_C(-360831.06),
                         SIMDE_FLOAT64_C(  41118.17), SIMDE_FLOAT64_C(-311248.62)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(     -0.10), SIMDE_FLOAT64_C(    955.19),
                         SIMDE_FLOAT64_C(    162.40), SIMDE_FLOAT64_C(   -236.01)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -741.60), SIMDE_FLOAT64_C(    194.99),
                         SIMDE_FLOAT64_C(    845.16), SIMDE_FLOAT64_C(    363.39)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(     74.16), SIMDE_FLOAT64_C( 186252.50),
                         SIMDE_FLOAT64_C( 137253.98), SIMDE_FLOAT64_C( -85763.67)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(     18.23), SIMDE_FLOAT64_C(    296.33),
                         SIMDE_FLOAT64_C(    628.43), SIMDE_FLOAT64_C(   -660.29)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    -24.23), SIMDE_FLOAT64_C(    573.95),
                         SIMDE_FLOAT64_C(    350.37), SIMDE_FLOAT64_C(   -979.36)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   -441.71), SIMDE_FLOAT64_C( 170078.60),
                         SIMDE_FLOAT64_C( 220183.02), SIMDE_FLOAT64_C( 646661.61)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(     14.47), SIMDE_FLOAT64_C(    780.26),
                         SIMDE_FLOAT64_C(   -875.01), SIMDE_FLOAT64_C(    609.14)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    690.47), SIMDE_FLOAT64_C(   -614.79),
                         SIMDE_FLOAT64_C(    626.96), SIMDE_FLOAT64_C(   -369.27)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   9991.10), SIMDE_FLOAT64_C(-479696.05),
                         SIMDE_FLOAT64_C(-548596.27), SIMDE_FLOAT64_C(-224937.13)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_mul_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_or_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -856.34), SIMDE_FLOAT32_C(   -251.54),
                         SIMDE_FLOAT32_C(    873.84), SIMDE_FLOAT32_C(    282.56),
                         SIMDE_FLOAT32_C(   -701.43), SIMDE_FLOAT32_C(    881.08),
                         SIMDE_FLOAT32_C(    949.17), SIMDE_FLOAT32_C(    -70.20)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(     13.03), SIMDE_FLOAT32_C(   -213.71),
                         SIMDE_FLOAT32_C(   -960.05), SIMDE_FLOAT32_C(     76.97),
                         SIMDE_FLOAT32_C(    529.10), SIMDE_FLOAT32_C(   -768.23),
                         SIMDE_FLOAT32_C(    808.48), SIMDE_FLOAT32_C(   -237.02)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -3431.99), SIMDE_FLOAT32_C(   -255.75),
                         SIMDE_FLOAT32_C(  -1001.87), SIMDE_FLOAT32_C(    315.93),
                         SIMDE_FLOAT32_C(   -701.50), SIMDE_FLOAT32_C(   -881.25),
                         SIMDE_FLOAT32_C(    957.48), SIMDE_FLOAT32_C(   -474.81)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    222.49), SIMDE_FLOAT32_C(    898.11),
                         SIMDE_FLOAT32_C(   -986.95), SIMDE_FLOAT32_C(   -624.89),
                         SIMDE_FLOAT32_C(   -780.02), SIMDE_FLOAT32_C(    920.74),
                         SIMDE_FLOAT32_C(    815.92), SIMDE_FLOAT32_C(    550.68)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -274.50), SIMDE_FLOAT32_C(    377.51),
                         SIMDE_FLOAT32_C(    595.96), SIMDE_FLOAT32_C(    513.30),
                         SIMDE_FLOAT32_C(    235.21), SIMDE_FLOAT32_C(    -45.95),
                         SIMDE_FLOAT32_C(    722.85), SIMDE_FLOAT32_C(    781.79)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -446.98), SIMDE_FLOAT32_C( 129422.58),
                         SIMDE_FLOAT32_C(   -987.97), SIMDE_FLOAT32_C(   -625.94),
                         SIMDE_FLOAT32_C( -60213.79), SIMDE_FLOAT32_C( -15867.97),
                         SIMDE_FLOAT32_C(   1023.98), SIMDE_FLOAT32_C(    815.93)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -789.64), SIMDE_FLOAT32_C(    957.16),
                         SIMDE_FLOAT32_C(   -431.62), SIMDE_FLOAT32_C(    527.46),
                         SIMDE_FLOAT32_C(   -345.54), SIMDE_FLOAT32_C(   -528.90),
                         SIMDE_FLOAT32_C(   -257.76), SIMDE_FLOAT32_C(    194.98)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    966.68), SIMDE_FLOAT32_C(    513.70),
                         SIMDE_FLOAT32_C(    151.32), SIMDE_FLOAT32_C(   -424.41),
                         SIMDE_FLOAT32_C(   -411.73), SIMDE_FLOAT32_C(    846.41),
                         SIMDE_FLOAT32_C(    151.87), SIMDE_FLOAT32_C(   -542.65)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -983.69), SIMDE_FLOAT32_C(    957.73),
                         SIMDE_FLOAT32_C(   -431.75), SIMDE_FLOAT32_C(-110586.96),
                         SIMDE_FLOAT32_C(   -475.73), SIMDE_FLOAT32_C(   -862.93),
                         SIMDE_FLOAT32_C(   -304.00), SIMDE_FLOAT32_C( -51195.98)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -998.71), SIMDE_FLOAT32_C(   -667.81),
                         SIMDE_FLOAT32_C(     67.10), SIMDE_FLOAT32_C(   -760.87),
                         SIMDE_FLOAT32_C(   -217.54), SIMDE_FLOAT32_C(    503.65),
                         SIMDE_FLOAT32_C(    247.04), SIMDE_FLOAT32_C(    844.19)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    661.94), SIMDE_FLOAT32_C(   -209.04),
                         SIMDE_FLOAT32_C(    -75.64), SIMDE_FLOAT32_C(   -274.76),
                         SIMDE_FLOAT32_C(   -442.04), SIMDE_FLOAT32_C(   -562.29),
                         SIMDE_FLOAT32_C(   -151.20), SIMDE_FLOAT32_C(    897.08)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -1015.96), SIMDE_FLOAT32_C( -63484.00),
                         SIMDE_FLOAT32_C(    -75.73), SIMDE_FLOAT32_C( -98031.86),
                         SIMDE_FLOAT32_C(   -443.12), SIMDE_FLOAT32_C(-130983.49),
                         SIMDE_FLOAT32_C(   -247.23), SIMDE_FLOAT32_C(    973.21)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    413.34), SIMDE_FLOAT32_C(    969.02),
                         SIMDE_FLOAT32_C(   -337.47), SIMDE_FLOAT32_C(   -324.75),
                         SIMDE_FLOAT32_C(   -193.88), SIMDE_FLOAT32_C(   -534.97),
                         SIMDE_FLOAT32_C(    873.51), SIMDE_FLOAT32_C(   -141.88)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -783.17), SIMDE_FLOAT32_C(    718.95),
                         SIMDE_FLOAT32_C(    707.27), SIMDE_FLOAT32_C(   -573.61),
                         SIMDE_FLOAT32_C(   -268.73), SIMDE_FLOAT32_C(    245.35),
                         SIMDE_FLOAT32_C(    309.36), SIMDE_FLOAT32_C(   -248.32)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-106455.79), SIMDE_FLOAT32_C(    975.97),
                         SIMDE_FLOAT32_C( -94714.82), SIMDE_FLOAT32_C( -89806.08),
                         SIMDE_FLOAT32_C(   -399.98), SIMDE_FLOAT32_C( -62975.62),
                         SIMDE_FLOAT32_C( 112093.41), SIMDE_FLOAT32_C(   -253.95)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -788.80), SIMDE_FLOAT32_C(   -153.54),
                         SIMDE_FLOAT32_C(    281.95), SIMDE_FLOAT32_C(    256.34),
                         SIMDE_FLOAT32_C(   -635.81), SIMDE_FLOAT32_C(   -147.12),
                         SIMDE_FLOAT32_C(   -744.53), SIMDE_FLOAT32_C(    799.41)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(      2.25), SIMDE_FLOAT32_C(    -53.24),
                         SIMDE_FLOAT32_C(    690.38), SIMDE_FLOAT32_C(    776.59),
                         SIMDE_FLOAT32_C(   -713.17), SIMDE_FLOAT32_C(   -856.98),
                         SIMDE_FLOAT32_C(   -302.74), SIMDE_FLOAT32_C(    409.28)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -852.80), SIMDE_FLOAT32_C(   -222.00),
                         SIMDE_FLOAT32_C(  88563.70), SIMDE_FLOAT32_C(  99423.55),
                         SIMDE_FLOAT32_C(   -763.94), SIMDE_FLOAT32_C( -55102.72),
                         SIMDE_FLOAT32_C( -98047.97), SIMDE_FLOAT32_C( 106487.99)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   -404.76), SIMDE_FLOAT32_C(   -875.24),
                         SIMDE_FLOAT32_C(    822.92), SIMDE_FLOAT32_C(    409.42),
                         SIMDE_FLOAT32_C(    144.27), SIMDE_FLOAT32_C(    260.67),
                         SIMDE_FLOAT32_C(    383.19), SIMDE_FLOAT32_C(    173.56)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -913.39), SIMDE_FLOAT32_C(   -747.11),
                         SIMDE_FLOAT32_C(    262.13), SIMDE_FLOAT32_C(    535.89),
                         SIMDE_FLOAT32_C(   -513.17), SIMDE_FLOAT32_C(    945.43),
                         SIMDE_FLOAT32_C(    473.23), SIMDE_FLOAT32_C(    594.31)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(-122099.98), SIMDE_FLOAT32_C(  -1003.24),
                         SIMDE_FLOAT32_C( 106357.79), SIMDE_FLOAT32_C( 105467.93),
                         SIMDE_FLOAT32_C( -36944.00), SIMDE_FLOAT32_C( 122047.55),
                         SIMDE_FLOAT32_C(    511.23), SIMDE_FLOAT32_C(  48543.87)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(    255.64), SIMDE_FLOAT32_C(    352.54),
                         SIMDE_FLOAT32_C(    905.17), SIMDE_FLOAT32_C(    -97.04),
                         SIMDE_FLOAT32_C(    635.19), SIMDE_FLOAT32_C(   -653.37),
                         SIMDE_FLOAT32_C(    871.87), SIMDE_FLOAT32_C(   -471.42)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -534.86), SIMDE_FLOAT32_C(     20.68),
                         SIMDE_FLOAT32_C(    963.33), SIMDE_FLOAT32_C(   -431.12),
                         SIMDE_FLOAT32_C(    199.08), SIMDE_FLOAT32_C(    684.85),
                         SIMDE_FLOAT32_C(   -893.07), SIMDE_FLOAT32_C(   -817.23)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -65463.87), SIMDE_FLOAT32_C(    362.92),
                         SIMDE_FLOAT32_C(    971.50), SIMDE_FLOAT32_C(   -431.25),
                         SIMDE_FLOAT32_C(  57308.48), SIMDE_FLOAT32_C(   -685.87),
                         SIMDE_FLOAT32_C(   -895.87), SIMDE_FLOAT32_C(-122879.96)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_or_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_or_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256i a;
    simde__m256i b;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm256_set_epi32(INT32_C( -465239073), INT32_C( 1279184195), INT32_C( 2016764339), INT32_C(-2145324536),
                            INT32_C(-1764212445), INT32_C(  366604460), INT32_C( 2076865232), INT32_C( -193563958)),
      simde_mm256_set_epi32(INT32_C(  541400396), INT32_C( -972933189), INT32_C(  510962050), INT32_C( -823731197),
                            INT32_C(  -98096262), INT32_C( -873938367), INT32_C(  832486509), INT32_C(-1258790386)),
      simde_mm256_set_epi32(INT32_C( -465231905), INT32_C( -834732037), INT32_C( 2121656755), INT32_C( -823722485),
                            INT32_C(  -16815237), INT32_C( -537267475), INT32_C( 2078211837), INT32_C( -184651058)) },
    { simde_mm256_set_epi32(INT32_C( -357268810), INT32_C( -651759515), INT32_C(  235257662), INT32_C( -710907048),
                            INT32_C(-1810919865), INT32_C( 1157547055), INT32_C(-1536937137), INT32_C( 1736416320)),
      simde_mm256_set_epi32(INT32_C( -849859400), INT32_C(  -22261753), INT32_C(-1893529894), INT32_C( -986693653),
                            INT32_C( -263776240), INT32_C(  502458571), INT32_C( -753857515), INT32_C(-1100300353)),
      simde_mm256_set_epi32(INT32_C( -268652866), INT32_C(   -5312409), INT32_C(-1893220354), INT32_C( -709853189),
                            INT32_C( -196108713), INT32_C( 1576987887), INT32_C( -143311009), INT32_C(   -8404993)) },
    { simde_mm256_set_epi32(INT32_C(-1390025255), INT32_C(  226124884), INT32_C( 1790052194), INT32_C( -108567495),
                            INT32_C( 1678439305), INT32_C(  946188942), INT32_C(-2005442113), INT32_C(-1348041469)),
      simde_mm256_set_epi32(INT32_C(  575705169), INT32_C(-1737321258), INT32_C(-1640691781), INT32_C( -551468190),
                            INT32_C( 1952646913), INT32_C( 1393751647), INT32_C(-1388496514), INT32_C(  590324192)),
      simde_mm256_set_epi32(INT32_C(-1351223847), INT32_C(-1652886314), INT32_C(  -21559301), INT32_C(   -5806213),
                            INT32_C( 1953232777), INT32_C( 2071459551), INT32_C(-1384154625), INT32_C(-1347443229)) },
    { simde_mm256_set_epi32(INT32_C(-1745763744), INT32_C( -289111572), INT32_C( -806274679), INT32_C( 1716472169),
                            INT32_C( 1235124509), INT32_C(   -2416200), INT32_C( -718461715), INT32_C(-2068168100)),
      simde_mm256_set_epi32(INT32_C( -747913617), INT32_C( 1661223449), INT32_C(  458600484), INT32_C(-1412075388),
                            INT32_C( 1069408970), INT32_C( 1945090076), INT32_C( -553357992), INT32_C(-1440436522)),
      simde_mm256_set_epi32(INT32_C( -671351185), INT32_C( -272317443), INT32_C( -537544275), INT32_C( -270566931),
                            INT32_C( 2143282143), INT32_C(     -17988), INT32_C( -550670851), INT32_C(-1363218722)) },
    { simde_mm256_set_epi32(INT32_C(-1244409590), INT32_C( 1807868489), INT32_C(  -47548399), INT32_C( 1894098437),
                            INT32_C(-1418958797), INT32_C( -739449954), INT32_C(  531303833), INT32_C( -582313126)),
      simde_mm256_set_epi32(INT32_C(  357521808), INT32_C( 1489594358), INT32_C(-1693320213), INT32_C( 1373628776),
                            INT32_C( -817392977), INT32_C( -536101335), INT32_C( 1011260304), INT32_C(  796312463)),
      simde_mm256_set_epi32(INT32_C(-1243620966), INT32_C( 2076829695), INT32_C(  -12846085), INT32_C( 1912598381),
                            INT32_C( -277873985), INT32_C( -202375233), INT32_C( 1072668569), INT32_C(   -8463393)) },
    { simde_mm256_set_epi32(INT32_C(-1207931928), INT32_C( -834187886), INT32_C(  -41133883), INT32_C( 1826519423),
                            INT32_C(-1537225150), INT32_C(-1972742802), INT32_C(-1107144372), INT32_C( 2074601373)),
      simde_mm256_set_epi32(INT32_C( 1198219345), INT32_C(  680764868), INT32_C( 1929177864), INT32_C( 1458999019),
                            INT32_C(  847246989), INT32_C(  902092277), INT32_C(   17517725), INT32_C(  814431018)),
      simde_mm256_set_epi32(INT32_C(   -9737223), INT32_C( -287838762), INT32_C(    -197683), INT32_C( 2130640895),
                            INT32_C(-1233125681), INT32_C(-1074866689), INT32_C(-1089774115), INT32_C( 2075131839)) },
    { simde_mm256_set_epi32(INT32_C( -137211173), INT32_C( -663911731), INT32_C( 1607469024), INT32_C( -419979231),
                            INT32_C( 1165074553), INT32_C( 1528925488), INT32_C(-1523385372), INT32_C( -907053265)),
      simde_mm256_set_epi32(INT32_C(  994086113), INT32_C(  702992700), INT32_C( -270545927), INT32_C( -490915434),
                            INT32_C( -743254762), INT32_C(   48033191), INT32_C(-2091867903), INT32_C( -756059930)),
      simde_mm256_set_epi32(INT32_C(   -2957573), INT32_C( -101723139), INT32_C(   -2109447), INT32_C( -419448393),
                            INT32_C( -671877761), INT32_C( 1543368119), INT32_C(-1485636635), INT32_C( -605063185)) },
    { simde_mm256_set_epi32(INT32_C( 1358203904), INT32_C(  915012873), INT32_C( 1992547669), INT32_C(-1717736064),
                            INT32_C(-1421704847), INT32_C(-1530428724), INT32_C(  686978685), INT32_C(  376599363)),
      simde_mm256_set_epi32(INT32_C(  963177670), INT32_C( -310024670), INT32_C(-1429164258), INT32_C(  390666975),
                            INT32_C( -901190881), INT32_C(-1166820314), INT32_C( 2002591871), INT32_C( -569097185)),
      simde_mm256_set_epi32(INT32_C( 2046618822), INT32_C(   -7471317), INT32_C(  -19660961), INT32_C(-1612873761),
                            INT32_C( -347411585), INT32_C(-1091059986), INT32_C( 2147448447), INT32_C( -562661537)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_castpd_si256(simde_mm256_or_pd(simde_mm256_castsi256_pd(test_vec[i].a), simde_mm256_castsi256_pd(test_vec[i].b)));
    simde_assert_m256i_equal(r, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_permute_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    struct {
      simde__m256 a;
      simde__m256 r;
    } p[8];
  } test_vec[1] = {
    {
      {
        { simde_mm256_set_ps(SIMDE_FLOAT32_C( -139.04), SIMDE_FLOAT32_C( -911.51),
                             SIMDE_FLOAT32_C(  580.20), SIMDE_FLOAT32_C(  982.03),
                             SIMDE_FLOAT32_C( -878.62), SIMDE_FLOAT32_C( -797.57),
                             SIMDE_FLOAT32_C( -426.64), SIMDE_FLOAT32_C( -818.52)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C(  982.03), SIMDE_FLOAT32_C(  982.03),
                             SIMDE_FLOAT32_C(  982.03), SIMDE_FLOAT32_C(  982.03),
                             SIMDE_FLOAT32_C( -818.52), SIMDE_FLOAT32_C( -818.52),
                             SIMDE_FLOAT32_C( -818.52), SIMDE_FLOAT32_C( -818.52)) },
        { simde_mm256_set_ps(SIMDE_FLOAT32_C( -946.18), SIMDE_FLOAT32_C(  580.43),
                             SIMDE_FLOAT32_C(  276.12), SIMDE_FLOAT32_C(  862.54),
                             SIMDE_FLOAT32_C(  664.35), SIMDE_FLOAT32_C(  162.22),
                             SIMDE_FLOAT32_C(  234.37), SIMDE_FLOAT32_C(  188.86)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C(  862.54), SIMDE_FLOAT32_C(  862.54),
                             SIMDE_FLOAT32_C(  862.54), SIMDE_FLOAT32_C(  276.12),
                             SIMDE_FLOAT32_C(  188.86), SIMDE_FLOAT32_C(  188.86),
                             SIMDE_FLOAT32_C(  188.86), SIMDE_FLOAT32_C(  234.37)) },
        { simde_mm256_set_ps(SIMDE_FLOAT32_C( -799.19), SIMDE_FLOAT32_C( -835.77),
                             SIMDE_FLOAT32_C( -716.61), SIMDE_FLOAT32_C(  614.58),
                             SIMDE_FLOAT32_C(  366.48), SIMDE_FLOAT32_C(  294.53),
                             SIMDE_FLOAT32_C( -368.14), SIMDE_FLOAT32_C(  638.90)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C(  614.58), SIMDE_FLOAT32_C(  614.58),
                             SIMDE_FLOAT32_C(  614.58), SIMDE_FLOAT32_C( -835.77),
                             SIMDE_FLOAT32_C(  638.90), SIMDE_FLOAT32_C(  638.90),
                             SIMDE_FLOAT32_C(  638.90), SIMDE_FLOAT32_C(  294.53)) },
        { simde_mm256_set_ps(SIMDE_FLOAT32_C(  141.00), SIMDE_FLOAT32_C( -110.45),
                             SIMDE_FLOAT32_C( -302.60), SIMDE_FLOAT32_C( -332.94),
                             SIMDE_FLOAT32_C(  188.73), SIMDE_FLOAT32_C(  551.54),
                             SIMDE_FLOAT32_C( -580.84), SIMDE_FLOAT32_C(  854.95)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C( -332.94), SIMDE_FLOAT32_C( -332.94),
                             SIMDE_FLOAT32_C( -332.94), SIMDE_FLOAT32_C(  141.00),
                             SIMDE_FLOAT32_C(  854.95), SIMDE_FLOAT32_C(  854.95),
                             SIMDE_FLOAT32_C(  854.95), SIMDE_FLOAT32_C(  188.73)) },
        { simde_mm256_set_ps(SIMDE_FLOAT32_C(  655.95), SIMDE_FLOAT32_C( -740.30),
                             SIMDE_FLOAT32_C( -946.00), SIMDE_FLOAT32_C( -434.04),
                             SIMDE_FLOAT32_C(   67.66), SIMDE_FLOAT32_C(  990.02),
                             SIMDE_FLOAT32_C(  889.36), SIMDE_FLOAT32_C(  470.48)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C( -434.04), SIMDE_FLOAT32_C( -434.04),
                             SIMDE_FLOAT32_C( -946.00), SIMDE_FLOAT32_C( -434.04),
                             SIMDE_FLOAT32_C(  470.48), SIMDE_FLOAT32_C(  470.48),
                             SIMDE_FLOAT32_C(  889.36), SIMDE_FLOAT32_C(  470.48)) },
        { simde_mm256_set_ps(SIMDE_FLOAT32_C(  184.64), SIMDE_FLOAT32_C(  689.89),
                             SIMDE_FLOAT32_C(   66.41), SIMDE_FLOAT32_C(  657.41),
                             SIMDE_FLOAT32_C( -642.74), SIMDE_FLOAT32_C(  674.83),
                             SIMDE_FLOAT32_C( -458.59), SIMDE_FLOAT32_C( -735.59)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C(  657.41), SIMDE_FLOAT32_C(  657.41),
                             SIMDE_FLOAT32_C(   66.41), SIMDE_FLOAT32_C(   66.41),
                             SIMDE_FLOAT32_C( -735.59), SIMDE_FLOAT32_C( -735.59),
                             SIMDE_FLOAT32_C( -458.59), SIMDE_FLOAT32_C( -458.59)) },
        { simde_mm256_set_ps(SIMDE_FLOAT32_C( -810.48), SIMDE_FLOAT32_C( -796.84),
                             SIMDE_FLOAT32_C(  173.69), SIMDE_FLOAT32_C(  -58.65),
                             SIMDE_FLOAT32_C( -873.61), SIMDE_FLOAT32_C( -813.18),
                             SIMDE_FLOAT32_C( -876.21), SIMDE_FLOAT32_C( -105.85)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C(  -58.65), SIMDE_FLOAT32_C(  -58.65),
                             SIMDE_FLOAT32_C(  173.69), SIMDE_FLOAT32_C( -796.84),
                             SIMDE_FLOAT32_C( -105.85), SIMDE_FLOAT32_C( -105.85),
                             SIMDE_FLOAT32_C( -876.21), SIMDE_FLOAT32_C( -813.18)) },
        { simde_mm256_set_ps(SIMDE_FLOAT32_C( -653.04), SIMDE_FLOAT32_C( -205.53),
                             SIMDE_FLOAT32_C(  -47.21), SIMDE_FLOAT32_C( -850.03),
                             SIMDE_FLOAT32_C( -120.73), SIMDE_FLOAT32_C( -663.03),
                             SIMDE_FLOAT32_C(  803.42), SIMDE_FLOAT32_C(  391.07)),
          simde_mm256_set_ps(SIMDE_FLOAT32_C( -850.03), SIMDE_FLOAT32_C( -850.03),
                             SIMDE_FLOAT32_C(  -47.21), SIMDE_FLOAT32_C( -653.04),
                             SIMDE_FLOAT32_C(  391.07), SIMDE_FLOAT32_C(  391.07),
                             SIMDE_FLOAT32_C(  803.42), SIMDE_FLOAT32_C( -120.73)) },
      },
    }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r;

    r = simde_mm256_permute_ps(test_vec[i].p[0].a, 0);
    simde_assert_m256_close(r, test_vec[i].p[0].r, 1);

    r = simde_mm256_permute_ps(test_vec[i].p[1].a, 1);
    simde_assert_m256_close(r, test_vec[i].p[1].r, 1);

    r = simde_mm256_permute_ps(test_vec[i].p[2].a, 2);
    simde_assert_m256_close(r, test_vec[i].p[2].r, 1);

    r = simde_mm256_permute_ps(test_vec[i].p[3].a, 3);
    simde_assert_m256_close(r, test_vec[i].p[3].r, 1);

    r = simde_mm256_permute_ps(test_vec[i].p[4].a, 4);
    simde_assert_m256_close(r, test_vec[i].p[4].r, 1);

    r = simde_mm256_permute_ps(test_vec[i].p[5].a, 5);
    simde_assert_m256_close(r, test_vec[i].p[5].r, 1);

    r = simde_mm256_permute_ps(test_vec[i].p[6].a, 6);
    simde_assert_m256_close(r, test_vec[i].p[6].r, 1);

    r = simde_mm256_permute_ps(test_vec[i].p[7].a, 7);
    simde_assert_m256_close(r, test_vec[i].p[7].r, 1);
  }

  return 0;
}

static int
test_simde_mm_permute_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    struct {
      simde__m128d a;
      simde__m128d r;
    } p[4];
  } test_vec[8] = {
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -586.66), SIMDE_FLOAT64_C(  759.07)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  759.07), SIMDE_FLOAT64_C(  759.07)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  312.86), SIMDE_FLOAT64_C(  489.25)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  489.25), SIMDE_FLOAT64_C(  312.86)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  510.93), SIMDE_FLOAT64_C( -731.36)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  510.93), SIMDE_FLOAT64_C( -731.36)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  211.69), SIMDE_FLOAT64_C(  302.00)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  211.69), SIMDE_FLOAT64_C(  211.69)) },
      },
    },
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  878.46), SIMDE_FLOAT64_C(  679.15)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  679.15), SIMDE_FLOAT64_C(  679.15)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -990.20), SIMDE_FLOAT64_C( -630.61)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -630.61), SIMDE_FLOAT64_C( -990.20)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  471.54), SIMDE_FLOAT64_C( -307.27)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  471.54), SIMDE_FLOAT64_C( -307.27)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  625.19), SIMDE_FLOAT64_C(  623.51)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  625.19), SIMDE_FLOAT64_C(  625.19)) },
      },
    },
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  637.56), SIMDE_FLOAT64_C(  480.86)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  480.86), SIMDE_FLOAT64_C(  480.86)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -358.97), SIMDE_FLOAT64_C( -958.25)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -958.25), SIMDE_FLOAT64_C( -358.97)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  867.67), SIMDE_FLOAT64_C(  -39.52)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  867.67), SIMDE_FLOAT64_C(  -39.52)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  596.21), SIMDE_FLOAT64_C(  558.65)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  596.21), SIMDE_FLOAT64_C(  596.21)) },
      },
    },
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C(   32.94), SIMDE_FLOAT64_C( -306.71)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -306.71), SIMDE_FLOAT64_C( -306.71)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  881.79), SIMDE_FLOAT64_C(  737.35)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  737.35), SIMDE_FLOAT64_C(  881.79)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  857.28), SIMDE_FLOAT64_C(  640.79)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  857.28), SIMDE_FLOAT64_C(  640.79)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  529.31), SIMDE_FLOAT64_C(  660.42)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  529.31), SIMDE_FLOAT64_C(  529.31)) },
      },
    },
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -645.32), SIMDE_FLOAT64_C( -500.25)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -500.25), SIMDE_FLOAT64_C( -500.25)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -862.75), SIMDE_FLOAT64_C( -773.36)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -773.36), SIMDE_FLOAT64_C( -862.75)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  795.76), SIMDE_FLOAT64_C(  320.59)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  795.76), SIMDE_FLOAT64_C(  320.59)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.80), SIMDE_FLOAT64_C(  899.97)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -749.80), SIMDE_FLOAT64_C( -749.80)) },
      },
    },
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  711.35), SIMDE_FLOAT64_C(  304.45)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  304.45), SIMDE_FLOAT64_C(  304.45)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  -92.64), SIMDE_FLOAT64_C(   45.68)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(   45.68), SIMDE_FLOAT64_C(  -92.64)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  261.59), SIMDE_FLOAT64_C(  207.13)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  261.59), SIMDE_FLOAT64_C(  207.13)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -540.32), SIMDE_FLOAT64_C(  -11.20)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -540.32), SIMDE_FLOAT64_C( -540.32)) },
      },
    },
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C(   11.20), SIMDE_FLOAT64_C(  175.87)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  175.87), SIMDE_FLOAT64_C(  175.87)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  274.15), SIMDE_FLOAT64_C(  152.30)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  152.30), SIMDE_FLOAT64_C(  274.15)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  673.19), SIMDE_FLOAT64_C(  979.94)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  673.19), SIMDE_FLOAT64_C(  979.94)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(   36.65), SIMDE_FLOAT64_C( -391.85)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(   36.65), SIMDE_FLOAT64_C(   36.65)) },
      },
    },
    {
      {
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.12), SIMDE_FLOAT64_C( -979.88)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -979.88), SIMDE_FLOAT64_C( -979.88)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C(  910.59), SIMDE_FLOAT64_C(  380.41)),
          simde_mm_set_pd(SIMDE_FLOAT64_C(  380.41), SIMDE_FLOAT64_C(  910.59)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -731.74), SIMDE_FLOAT64_C(  314.11)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -731.74), SIMDE_FLOAT64_C(  314.11)) },
        { simde_mm_set_pd(SIMDE_FLOAT64_C( -794.00), SIMDE_FLOAT64_C( -659.77)),
          simde_mm_set_pd(SIMDE_FLOAT64_C( -794.00), SIMDE_FLOAT64_C( -794.00)) },
      },
    }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128d r;

    r = simde_mm_permute_pd(test_vec[i].p[0].a, 0);
    simde_assert_m128d_close(r, test_vec[i].p[0].r, 1);

    r = simde_mm_permute_pd(test_vec[i].p[1].a, 1);
    simde_assert_m128d_close(r, test_vec[i].p[1].r, 1);

    r = simde_mm_permute_pd(test_vec[i].p[2].a, 2);
    simde_assert_m128d_close(r, test_vec[i].p[2].r, 1);

    r = simde_mm_permute_pd(test_vec[i].p[3].a, 3);
    simde_assert_m128d_close(r, test_vec[i].p[3].r, 1);
  }

  return 0;
}

static int
test_simde_mm_permute_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128 a;
    simde__m128 r;
  } test_vec[8] = {
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   -46.54), SIMDE_FLOAT32_C(  -884.02), SIMDE_FLOAT32_C(  -125.85), SIMDE_FLOAT32_C(  -211.36)),
      // imm8 = 104
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -125.85), SIMDE_FLOAT32_C(  -884.02), SIMDE_FLOAT32_C(  -884.02), SIMDE_FLOAT32_C(  -211.36)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -126.12), SIMDE_FLOAT32_C(   654.39), SIMDE_FLOAT32_C(   987.87), SIMDE_FLOAT32_C(   213.63)),
      // imm8 = 15
      simde_mm_set_ps(SIMDE_FLOAT32_C(   213.63), SIMDE_FLOAT32_C(   213.63), SIMDE_FLOAT32_C(  -126.12), SIMDE_FLOAT32_C(  -126.12)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   545.41), SIMDE_FLOAT32_C(   -55.61), SIMDE_FLOAT32_C(   390.65), SIMDE_FLOAT32_C(  -546.65)),
      // imm8 = 25
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -546.65), SIMDE_FLOAT32_C(   390.65), SIMDE_FLOAT32_C(   -55.61), SIMDE_FLOAT32_C(   390.65)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -570.93), SIMDE_FLOAT32_C(   337.51), SIMDE_FLOAT32_C(    48.49), SIMDE_FLOAT32_C(  -941.32)),
      // imm8 = 21
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -941.32), SIMDE_FLOAT32_C(    48.49), SIMDE_FLOAT32_C(    48.49), SIMDE_FLOAT32_C(    48.49)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(    35.91), SIMDE_FLOAT32_C(  -937.58), SIMDE_FLOAT32_C(   867.97), SIMDE_FLOAT32_C(   -64.33)),
      // imm8 = 105
      simde_mm_set_ps(SIMDE_FLOAT32_C(   867.97), SIMDE_FLOAT32_C(  -937.58), SIMDE_FLOAT32_C(  -937.58), SIMDE_FLOAT32_C(   867.97)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   592.52), SIMDE_FLOAT32_C(   823.80), SIMDE_FLOAT32_C(   377.28), SIMDE_FLOAT32_C(   174.06)),
      // imm8 = 246
      simde_mm_set_ps(SIMDE_FLOAT32_C(   592.52), SIMDE_FLOAT32_C(   592.52), SIMDE_FLOAT32_C(   377.28), SIMDE_FLOAT32_C(   823.80)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -853.11), SIMDE_FLOAT32_C(   886.60), SIMDE_FLOAT32_C(  -771.84), SIMDE_FLOAT32_C(  -900.32)),
      // imm8 = 183
      simde_mm_set_ps(SIMDE_FLOAT32_C(   886.60), SIMDE_FLOAT32_C(  -853.11), SIMDE_FLOAT32_C(  -771.84), SIMDE_FLOAT32_C(  -853.11)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(    59.21), SIMDE_FLOAT32_C(  -625.11), SIMDE_FLOAT32_C(  -350.66), SIMDE_FLOAT32_C(   873.23)),
      // imm8 = 169
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -625.11), SIMDE_FLOAT32_C(  -625.11), SIMDE_FLOAT32_C(  -625.11), SIMDE_FLOAT32_C(  -350.66)) }
  };

  simde__m128 r;

  r = simde_mm_permute_ps(test_vec[0].a, 104);
  simde_assert_m128_equal(r, test_vec[0].r);

  r = simde_mm_permute_ps(test_vec[1].a, 15);
  simde_assert_m128_equal(r, test_vec[1].r);

  r = simde_mm_permute_ps(test_vec[2].a, 25);
  simde_assert_m128_equal(r, test_vec[2].r);

  r = simde_mm_permute_ps(test_vec[3].a, 21);
  simde_assert_m128_equal(r, test_vec[3].r);

  r = simde_mm_permute_ps(test_vec[4].a, 105);
  simde_assert_m128_equal(r, test_vec[4].r);

  r = simde_mm_permute_ps(test_vec[5].a, 246);
  simde_assert_m128_equal(r, test_vec[5].r);

  r = simde_mm_permute_ps(test_vec[6].a, 183);
  simde_assert_m128_equal(r, test_vec[6].r);

  r = simde_mm_permute_ps(test_vec[7].a, 169);
  simde_assert_m128_equal(r, test_vec[7].r);

  return 0;
}

static int
test_simde_mm256_permute_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a[4];
    simde__m256d r[4];
  } test_vec[8] = {
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -307.33), SIMDE_FLOAT64_C( -277.83),
                           SIMDE_FLOAT64_C( -811.26), SIMDE_FLOAT64_C( -340.98)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  520.01), SIMDE_FLOAT64_C(   20.96),
                           SIMDE_FLOAT64_C( -217.27), SIMDE_FLOAT64_C(  475.98)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( -515.97),
                           SIMDE_FLOAT64_C( -252.10), SIMDE_FLOAT64_C( -367.50)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -740.61), SIMDE_FLOAT64_C(  459.66),
                           SIMDE_FLOAT64_C(  780.67), SIMDE_FLOAT64_C( -928.66)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C( -277.83), SIMDE_FLOAT64_C( -277.83),
                           SIMDE_FLOAT64_C( -340.98), SIMDE_FLOAT64_C( -340.98)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(   20.96), SIMDE_FLOAT64_C(   20.96),
                           SIMDE_FLOAT64_C(  475.98), SIMDE_FLOAT64_C( -217.27)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -515.97), SIMDE_FLOAT64_C( -515.97),
                           SIMDE_FLOAT64_C( -252.10), SIMDE_FLOAT64_C( -367.50)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  459.66), SIMDE_FLOAT64_C(  459.66),
                           SIMDE_FLOAT64_C(  780.67), SIMDE_FLOAT64_C(  780.67)) } },
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C(  718.00), SIMDE_FLOAT64_C( -514.42),
                           SIMDE_FLOAT64_C( -222.91), SIMDE_FLOAT64_C( -665.22)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  431.31), SIMDE_FLOAT64_C( -787.13),
                           SIMDE_FLOAT64_C( -902.93), SIMDE_FLOAT64_C( -601.27)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -645.66), SIMDE_FLOAT64_C(  168.66),
                           SIMDE_FLOAT64_C(  823.10), SIMDE_FLOAT64_C(  348.53)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -855.11), SIMDE_FLOAT64_C(  343.83),
                           SIMDE_FLOAT64_C(  888.93), SIMDE_FLOAT64_C(   81.36)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C( -514.42), SIMDE_FLOAT64_C( -514.42),
                           SIMDE_FLOAT64_C( -665.22), SIMDE_FLOAT64_C( -665.22)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -787.13), SIMDE_FLOAT64_C( -787.13),
                           SIMDE_FLOAT64_C( -601.27), SIMDE_FLOAT64_C( -902.93)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  168.66), SIMDE_FLOAT64_C(  168.66),
                           SIMDE_FLOAT64_C(  823.10), SIMDE_FLOAT64_C(  348.53)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  343.83), SIMDE_FLOAT64_C(  343.83),
                           SIMDE_FLOAT64_C(  888.93), SIMDE_FLOAT64_C(  888.93)) } },
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C(  482.80), SIMDE_FLOAT64_C(  651.20),
                           SIMDE_FLOAT64_C( -299.11), SIMDE_FLOAT64_C(  660.92)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  562.16), SIMDE_FLOAT64_C( -407.46),
                           SIMDE_FLOAT64_C(  470.74), SIMDE_FLOAT64_C(  663.86)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -609.81), SIMDE_FLOAT64_C(  224.41),
                           SIMDE_FLOAT64_C(  638.76), SIMDE_FLOAT64_C(  609.66)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  950.61), SIMDE_FLOAT64_C( -221.62),
                           SIMDE_FLOAT64_C(  198.64), SIMDE_FLOAT64_C(  472.05)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C(  651.20), SIMDE_FLOAT64_C(  651.20),
                           SIMDE_FLOAT64_C(  660.92), SIMDE_FLOAT64_C(  660.92)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -407.46), SIMDE_FLOAT64_C( -407.46),
                           SIMDE_FLOAT64_C(  663.86), SIMDE_FLOAT64_C(  470.74)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  224.41), SIMDE_FLOAT64_C(  224.41),
                           SIMDE_FLOAT64_C(  638.76), SIMDE_FLOAT64_C(  609.66)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -221.62), SIMDE_FLOAT64_C( -221.62),
                           SIMDE_FLOAT64_C(  198.64), SIMDE_FLOAT64_C(  198.64)) } },
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -304.11), SIMDE_FLOAT64_C(  887.38),
                           SIMDE_FLOAT64_C(  -49.50), SIMDE_FLOAT64_C( -449.56)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -667.31), SIMDE_FLOAT64_C(  293.87),
                           SIMDE_FLOAT64_C( -667.79), SIMDE_FLOAT64_C(  371.99)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  895.41), SIMDE_FLOAT64_C(  116.14),
                           SIMDE_FLOAT64_C(   65.95), SIMDE_FLOAT64_C( -990.78)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -751.37), SIMDE_FLOAT64_C( -570.35),
                           SIMDE_FLOAT64_C(  -32.79), SIMDE_FLOAT64_C(  337.40)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C(  887.38), SIMDE_FLOAT64_C(  887.38),
                           SIMDE_FLOAT64_C( -449.56), SIMDE_FLOAT64_C( -449.56)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  293.87), SIMDE_FLOAT64_C(  293.87),
                           SIMDE_FLOAT64_C(  371.99), SIMDE_FLOAT64_C( -667.79)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  116.14), SIMDE_FLOAT64_C(  116.14),
                           SIMDE_FLOAT64_C(   65.95), SIMDE_FLOAT64_C( -990.78)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -570.35), SIMDE_FLOAT64_C( -570.35),
                           SIMDE_FLOAT64_C(  -32.79), SIMDE_FLOAT64_C(  -32.79)) } },
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -78.53), SIMDE_FLOAT64_C( -723.45),
                           SIMDE_FLOAT64_C( -594.84), SIMDE_FLOAT64_C( -504.83)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  599.38), SIMDE_FLOAT64_C( -102.58),
                           SIMDE_FLOAT64_C(  369.99), SIMDE_FLOAT64_C(  -58.86)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -705.85), SIMDE_FLOAT64_C( -561.88),
                           SIMDE_FLOAT64_C( -855.33), SIMDE_FLOAT64_C( -876.41)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  200.30), SIMDE_FLOAT64_C( -816.59),
                           SIMDE_FLOAT64_C(  495.88), SIMDE_FLOAT64_C(  -20.39)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C( -723.45), SIMDE_FLOAT64_C( -723.45),
                           SIMDE_FLOAT64_C( -504.83), SIMDE_FLOAT64_C( -504.83)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -102.58), SIMDE_FLOAT64_C( -102.58),
                           SIMDE_FLOAT64_C(  -58.86), SIMDE_FLOAT64_C(  369.99)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -561.88), SIMDE_FLOAT64_C( -561.88),
                           SIMDE_FLOAT64_C( -855.33), SIMDE_FLOAT64_C( -876.41)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -816.59), SIMDE_FLOAT64_C( -816.59),
                           SIMDE_FLOAT64_C(  495.88), SIMDE_FLOAT64_C(  495.88)) } },
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -459.43), SIMDE_FLOAT64_C(   35.05),
                           SIMDE_FLOAT64_C( -647.26), SIMDE_FLOAT64_C( -116.28)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  861.84), SIMDE_FLOAT64_C(   79.42),
                           SIMDE_FLOAT64_C(  -61.14), SIMDE_FLOAT64_C( -959.28)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -190.88), SIMDE_FLOAT64_C(   91.78),
                           SIMDE_FLOAT64_C(  624.59), SIMDE_FLOAT64_C( -875.05)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -757.98), SIMDE_FLOAT64_C( -777.95),
                           SIMDE_FLOAT64_C( -309.55), SIMDE_FLOAT64_C(  387.53)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C(   35.05), SIMDE_FLOAT64_C(   35.05),
                           SIMDE_FLOAT64_C( -116.28), SIMDE_FLOAT64_C( -116.28)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(   79.42), SIMDE_FLOAT64_C(   79.42),
                           SIMDE_FLOAT64_C( -959.28), SIMDE_FLOAT64_C(  -61.14)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(   91.78), SIMDE_FLOAT64_C(   91.78),
                           SIMDE_FLOAT64_C(  624.59), SIMDE_FLOAT64_C( -875.05)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -777.95), SIMDE_FLOAT64_C( -777.95),
                           SIMDE_FLOAT64_C( -309.55), SIMDE_FLOAT64_C( -309.55)) } },
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -420.28),
                           SIMDE_FLOAT64_C( -324.78), SIMDE_FLOAT64_C( -643.43)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  271.76), SIMDE_FLOAT64_C( -727.19),
                           SIMDE_FLOAT64_C(  659.23), SIMDE_FLOAT64_C(   91.29)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  751.73), SIMDE_FLOAT64_C(  366.97),
                           SIMDE_FLOAT64_C(  178.00), SIMDE_FLOAT64_C( -562.69)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  355.89), SIMDE_FLOAT64_C(  861.10),
                           SIMDE_FLOAT64_C(  814.16), SIMDE_FLOAT64_C(  218.35)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C( -420.28), SIMDE_FLOAT64_C( -420.28),
                           SIMDE_FLOAT64_C( -643.43), SIMDE_FLOAT64_C( -643.43)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -727.19), SIMDE_FLOAT64_C( -727.19),
                           SIMDE_FLOAT64_C(   91.29), SIMDE_FLOAT64_C(  659.23)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  366.97), SIMDE_FLOAT64_C(  366.97),
                           SIMDE_FLOAT64_C(  178.00), SIMDE_FLOAT64_C( -562.69)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  861.10), SIMDE_FLOAT64_C(  861.10),
                           SIMDE_FLOAT64_C(  814.16), SIMDE_FLOAT64_C(  814.16)) } },
    { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -344.58), SIMDE_FLOAT64_C( -961.29),
                           SIMDE_FLOAT64_C(  602.43), SIMDE_FLOAT64_C(  -99.06)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  915.17), SIMDE_FLOAT64_C(  886.67),
                           SIMDE_FLOAT64_C(  631.07), SIMDE_FLOAT64_C( -393.04)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  815.36), SIMDE_FLOAT64_C( -920.33),
                           SIMDE_FLOAT64_C( -701.98), SIMDE_FLOAT64_C(  230.05)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  408.01), SIMDE_FLOAT64_C( -369.61),
                           SIMDE_FLOAT64_C( -195.80), SIMDE_FLOAT64_C( -161.14)) },
      { simde_mm256_set_pd(SIMDE_FLOAT64_C( -961.29), SIMDE_FLOAT64_C( -961.29),
                           SIMDE_FLOAT64_C(  -99.06), SIMDE_FLOAT64_C(  -99.06)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C(  886.67), SIMDE_FLOAT64_C(  886.67),
                           SIMDE_FLOAT64_C( -393.04), SIMDE_FLOAT64_C(  631.07)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -920.33), SIMDE_FLOAT64_C( -920.33),
                           SIMDE_FLOAT64_C( -701.98), SIMDE_FLOAT64_C(  230.05)),
        simde_mm256_set_pd(SIMDE_FLOAT64_C( -369.61), SIMDE_FLOAT64_C( -369.61),
                           SIMDE_FLOAT64_C( -195.80), SIMDE_FLOAT64_C( -195.80)) } }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r;

    r = simde_mm256_permute_pd(test_vec[i].a[0], 0);
    simde_assert_m256d_close(r, test_vec[i].r[0], 1);

    r = simde_mm256_permute_pd(test_vec[i].a[1], 1);
    simde_assert_m256d_close(r, test_vec[i].r[1], 1);

    r = simde_mm256_permute_pd(test_vec[i].a[2], 2);
    simde_assert_m256d_close(r, test_vec[i].r[2], 1);

    r = simde_mm256_permute_pd(test_vec[i].a[3], 3);
    simde_assert_m256d_close(r, test_vec[i].r[3], 1);
  }

  return 0;
}

static int
test_simde_mm_permutevar_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128 a;
    simde__m128i b;
    simde__m128 r;
  } test_vec[8] = {
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -220.03), SIMDE_FLOAT32_C( -102.32), SIMDE_FLOAT32_C( -878.65), SIMDE_FLOAT32_C(  736.04)),
      simde_mm_set_epi32(INT32_C( 1978615509), INT32_C( -298382064), INT32_C(  844888802), INT32_C(-1984258319)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -878.65), SIMDE_FLOAT32_C(  736.04), SIMDE_FLOAT32_C( -102.32), SIMDE_FLOAT32_C( -878.65)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -74.21), SIMDE_FLOAT32_C(   20.33), SIMDE_FLOAT32_C(  -47.37), SIMDE_FLOAT32_C( -145.03)),
      simde_mm_set_epi32(INT32_C(-1739872531), INT32_C( 1398798289), INT32_C(  139283762), INT32_C( -468646578)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  -47.37), SIMDE_FLOAT32_C(  -47.37), SIMDE_FLOAT32_C(   20.33), SIMDE_FLOAT32_C(   20.33)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  388.02), SIMDE_FLOAT32_C(  910.17), SIMDE_FLOAT32_C( -160.23), SIMDE_FLOAT32_C( -802.99)),
      simde_mm_set_epi32(INT32_C(  836573493), INT32_C(-1468644888), INT32_C( -506758879), INT32_C( -861763047)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -160.23), SIMDE_FLOAT32_C( -802.99), SIMDE_FLOAT32_C( -160.23), SIMDE_FLOAT32_C( -160.23)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  909.90), SIMDE_FLOAT32_C(  126.61), SIMDE_FLOAT32_C(  817.61), SIMDE_FLOAT32_C(  504.25)),
      simde_mm_set_epi32(INT32_C(  584238895), INT32_C(  928311120), INT32_C( -480157729), INT32_C(  870102815)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  909.90), SIMDE_FLOAT32_C(  504.25), SIMDE_FLOAT32_C(  909.90), SIMDE_FLOAT32_C(  909.90)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -446.77), SIMDE_FLOAT32_C(  -55.86), SIMDE_FLOAT32_C(  873.78), SIMDE_FLOAT32_C( -757.74)),
      simde_mm_set_epi32(INT32_C(-1116848756), INT32_C(   67501238), INT32_C( 1151634701), INT32_C(-1833672337)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -757.74), SIMDE_FLOAT32_C(  -55.86), SIMDE_FLOAT32_C(  873.78), SIMDE_FLOAT32_C( -446.77)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   17.75), SIMDE_FLOAT32_C(  909.79), SIMDE_FLOAT32_C( -854.76), SIMDE_FLOAT32_C(  845.21)),
      simde_mm_set_epi32(INT32_C( -650701315), INT32_C( -514022340), INT32_C( -456325153), INT32_C(  575846112)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -854.76), SIMDE_FLOAT32_C(  845.21), SIMDE_FLOAT32_C(   17.75), SIMDE_FLOAT32_C(  845.21)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C(  212.03), SIMDE_FLOAT32_C(  766.48), SIMDE_FLOAT32_C(   58.02)),
      simde_mm_set_epi32(INT32_C(  751532596), INT32_C( 2017282760), INT32_C( 1270374455), INT32_C( -795583425)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(   58.02), SIMDE_FLOAT32_C(   58.02), SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C( -703.39)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  101.14), SIMDE_FLOAT32_C(  114.68), SIMDE_FLOAT32_C(  986.99), SIMDE_FLOAT32_C( -651.72)),
      simde_mm_set_epi32(INT32_C( 1630473427), INT32_C( 1562779502), INT32_C( 1531074799), INT32_C(-1809767434)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  101.14), SIMDE_FLOAT32_C(  114.68), SIMDE_FLOAT32_C(  101.14), SIMDE_FLOAT32_C(  114.68)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128 r = simde_mm_permutevar_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m128_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm_permutevar_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128d a;
    simde__m128i b;
    simde__m128d r;
  } test_vec[8] = {
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -190.00), SIMDE_FLOAT64_C( -363.64)),
      simde_mm_set_epi64x(INT64_C(-3679719263685326635), INT64_C(-5014117432834044471)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -363.64), SIMDE_FLOAT64_C( -363.64)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  107.93), SIMDE_FLOAT64_C(  497.72)),
      simde_mm_set_epi64x(INT64_C(1238725724228652833), INT64_C(2295575207610739945)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  497.72), SIMDE_FLOAT64_C(  497.72)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  -71.89), SIMDE_FLOAT64_C( -692.84)),
      simde_mm_set_epi64x(INT64_C(3235819707285929243), INT64_C(-60491261046190647)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  -71.89), SIMDE_FLOAT64_C( -692.84)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -729.73), SIMDE_FLOAT64_C(  585.33)),
      simde_mm_set_epi64x(INT64_C(2481877852619205882), INT64_C(3545311239979806958)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -729.73), SIMDE_FLOAT64_C( -729.73)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -735.74), SIMDE_FLOAT64_C(  646.64)),
      simde_mm_set_epi64x(INT64_C(-8424077459294111103), INT64_C(-6727548776374001581)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  646.64), SIMDE_FLOAT64_C( -735.74)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  403.13), SIMDE_FLOAT64_C( -882.62)),
      simde_mm_set_epi64x(INT64_C(-731202554371506341), INT64_C(7811413526677278696)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  403.13), SIMDE_FLOAT64_C( -882.62)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  910.78), SIMDE_FLOAT64_C(  162.04)),
      simde_mm_set_epi64x(INT64_C(-7914651116933831795), INT64_C(-7291667702753737699)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  162.04), SIMDE_FLOAT64_C(  162.04)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -194.64), SIMDE_FLOAT64_C(   21.61)),
      simde_mm_set_epi64x(INT64_C(-8162260555449998511), INT64_C(-7079262047989370805)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(   21.61), SIMDE_FLOAT64_C( -194.64)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m128d r = simde_mm_permutevar_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m128d_equal(r, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_permutevar_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256i b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -581.11), SIMDE_FLOAT32_C(  662.67),
                         SIMDE_FLOAT32_C(  749.10), SIMDE_FLOAT32_C(  794.46),
                         SIMDE_FLOAT32_C(  351.98), SIMDE_FLOAT32_C(   95.47),
                         SIMDE_FLOAT32_C( -323.47), SIMDE_FLOAT32_C(  766.08)),
      simde_mm256_set_epi32(INT32_C(-1995089848), INT32_C(-1205354020), INT32_C(  942463332), INT32_C( 2042714882),
                            INT32_C(   -3085894), INT32_C( 1293825925), INT32_C( 1293195492), INT32_C( -942672880)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  794.46), SIMDE_FLOAT32_C(  794.46),
                         SIMDE_FLOAT32_C(  794.46), SIMDE_FLOAT32_C(  662.67),
                         SIMDE_FLOAT32_C(   95.47), SIMDE_FLOAT32_C( -323.47),
                         SIMDE_FLOAT32_C(  766.08), SIMDE_FLOAT32_C(  766.08)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  553.57), SIMDE_FLOAT32_C( -638.78),
                         SIMDE_FLOAT32_C( -812.35), SIMDE_FLOAT32_C( -453.08),
                         SIMDE_FLOAT32_C(  690.64), SIMDE_FLOAT32_C( -358.14),
                         SIMDE_FLOAT32_C(  491.09), SIMDE_FLOAT32_C(  642.81)),
      simde_mm256_set_epi32(INT32_C(   74819979), INT32_C(-1724654262), INT32_C( 1654056695), INT32_C(  767929859),
                            INT32_C(-1773750147), INT32_C( 1504941571), INT32_C(-1277766239), INT32_C( -474163433)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  553.57), SIMDE_FLOAT32_C( -638.78),
                         SIMDE_FLOAT32_C(  553.57), SIMDE_FLOAT32_C(  553.57),
                         SIMDE_FLOAT32_C(  491.09), SIMDE_FLOAT32_C(  690.64),
                         SIMDE_FLOAT32_C(  491.09), SIMDE_FLOAT32_C(  690.64)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -359.39), SIMDE_FLOAT32_C(  -64.37),
                         SIMDE_FLOAT32_C( -720.24), SIMDE_FLOAT32_C( -724.37),
                         SIMDE_FLOAT32_C( -455.62), SIMDE_FLOAT32_C(  674.74),
                         SIMDE_FLOAT32_C( -589.17), SIMDE_FLOAT32_C( -867.21)),
      simde_mm256_set_epi32(INT32_C(-1090291188), INT32_C(-2009955584), INT32_C(-1244380880), INT32_C( 2087210230),
                            INT32_C( 1556610240), INT32_C(  347022662), INT32_C( -341820489), INT32_C(  209748637)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -724.37), SIMDE_FLOAT32_C( -724.37),
                         SIMDE_FLOAT32_C( -724.37), SIMDE_FLOAT32_C(  -64.37),
                         SIMDE_FLOAT32_C( -867.21), SIMDE_FLOAT32_C(  674.74),
                         SIMDE_FLOAT32_C( -455.62), SIMDE_FLOAT32_C( -589.17)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -688.17), SIMDE_FLOAT32_C(  265.16),
                         SIMDE_FLOAT32_C(  223.77), SIMDE_FLOAT32_C(  457.91),
                         SIMDE_FLOAT32_C( -449.18), SIMDE_FLOAT32_C(  418.80),
                         SIMDE_FLOAT32_C( -219.32), SIMDE_FLOAT32_C( -798.45)),
      simde_mm256_set_epi32(INT32_C(-2022840556), INT32_C(  147366607), INT32_C( 1340044144), INT32_C(-1339633728),
                            INT32_C(-2141656242), INT32_C(  179357343), INT32_C( -169847688), INT32_C(  944580448)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  457.91), SIMDE_FLOAT32_C( -688.17),
                         SIMDE_FLOAT32_C(  457.91), SIMDE_FLOAT32_C(  457.91),
                         SIMDE_FLOAT32_C(  418.80), SIMDE_FLOAT32_C( -449.18),
                         SIMDE_FLOAT32_C( -798.45), SIMDE_FLOAT32_C( -798.45)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -894.16), SIMDE_FLOAT32_C( -261.98),
                         SIMDE_FLOAT32_C( -668.79), SIMDE_FLOAT32_C( -535.27),
                         SIMDE_FLOAT32_C(  295.60), SIMDE_FLOAT32_C( -624.10),
                         SIMDE_FLOAT32_C( -218.41), SIMDE_FLOAT32_C( -239.98)),
      simde_mm256_set_epi32(INT32_C(-1987732124), INT32_C(-1043251572), INT32_C(-1915492365), INT32_C( 1239473734),
                            INT32_C( 1924578330), INT32_C(  667857703), INT32_C( 1334096582), INT32_C(-1561092382)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -535.27), SIMDE_FLOAT32_C( -535.27),
                         SIMDE_FLOAT32_C( -894.16), SIMDE_FLOAT32_C( -261.98),
                         SIMDE_FLOAT32_C( -624.10), SIMDE_FLOAT32_C(  295.60),
                         SIMDE_FLOAT32_C( -624.10), SIMDE_FLOAT32_C( -624.10)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  282.55), SIMDE_FLOAT32_C(  313.26),
                         SIMDE_FLOAT32_C( -349.67), SIMDE_FLOAT32_C( -128.29),
                         SIMDE_FLOAT32_C(  298.40), SIMDE_FLOAT32_C( -200.08),
                         SIMDE_FLOAT32_C( -322.88), SIMDE_FLOAT32_C(  643.97)),
      simde_mm256_set_epi32(INT32_C(-1231918378), INT32_C(-1967971864), INT32_C( 1721865701), INT32_C( -553468547),
                            INT32_C( -723509981), INT32_C( 1588622188), INT32_C( 1625856378), INT32_C(-1426622327)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  313.26), SIMDE_FLOAT32_C( -128.29),
                         SIMDE_FLOAT32_C( -349.67), SIMDE_FLOAT32_C( -349.67),
                         SIMDE_FLOAT32_C(  298.40), SIMDE_FLOAT32_C(  643.97),
                         SIMDE_FLOAT32_C( -200.08), SIMDE_FLOAT32_C( -322.88)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -773.19), SIMDE_FLOAT32_C(  149.49),
                         SIMDE_FLOAT32_C( -417.32), SIMDE_FLOAT32_C( -747.01),
                         SIMDE_FLOAT32_C(  553.89), SIMDE_FLOAT32_C( -499.06),
                         SIMDE_FLOAT32_C( -480.71), SIMDE_FLOAT32_C( -871.55)),
      simde_mm256_set_epi32(INT32_C(-1619504079), INT32_C(  234838625), INT32_C( 1611169016), INT32_C(  708864983),
                            INT32_C(  595455017), INT32_C( -338961641), INT32_C( 1283075935), INT32_C(  -90174648)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -417.32), SIMDE_FLOAT32_C( -417.32),
                         SIMDE_FLOAT32_C( -747.01), SIMDE_FLOAT32_C( -773.19),
                         SIMDE_FLOAT32_C( -480.71), SIMDE_FLOAT32_C(  553.89),
                         SIMDE_FLOAT32_C(  553.89), SIMDE_FLOAT32_C( -871.55)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  770.41), SIMDE_FLOAT32_C( -575.75),
                         SIMDE_FLOAT32_C( -694.46), SIMDE_FLOAT32_C(  878.16),
                         SIMDE_FLOAT32_C(  230.89), SIMDE_FLOAT32_C( -700.74),
                         SIMDE_FLOAT32_C( -243.26), SIMDE_FLOAT32_C(  192.97)),
      simde_mm256_set_epi32(INT32_C(-1612783450), INT32_C( 2104159364), INT32_C( -271090577), INT32_C(  962282198),
                            INT32_C(-1614359330), INT32_C( -824400343), INT32_C( -259439032), INT32_C( -336808887)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -575.75), SIMDE_FLOAT32_C(  878.16),
                         SIMDE_FLOAT32_C(  770.41), SIMDE_FLOAT32_C( -575.75),
                         SIMDE_FLOAT32_C( -700.74), SIMDE_FLOAT32_C( -243.26),
                         SIMDE_FLOAT32_C(  192.97), SIMDE_FLOAT32_C( -243.26)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_permutevar_ps(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_permutevar_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256i b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  191.45), SIMDE_FLOAT64_C(  955.97),
                         SIMDE_FLOAT64_C( -381.93), SIMDE_FLOAT64_C( -276.35)),
      simde_mm256_set_epi64x(INT64_C( 7847047898918917938), INT64_C(-2237739371695600451),
                             INT64_C(-5921100696665465273), INT64_C(-3629132568613815239)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  191.45), SIMDE_FLOAT64_C(  955.97),
                         SIMDE_FLOAT64_C( -381.93), SIMDE_FLOAT64_C( -276.35)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -864.55), SIMDE_FLOAT64_C(  105.60),
                         SIMDE_FLOAT64_C(  308.22), SIMDE_FLOAT64_C( -262.99)),
      simde_mm256_set_epi64x(INT64_C( 1954446392539316319), INT64_C(-5867362525432575314),
                             INT64_C(-5609592881024898283), INT64_C(-2732169121859970729)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -864.55), SIMDE_FLOAT64_C( -864.55),
                         SIMDE_FLOAT64_C( -262.99), SIMDE_FLOAT64_C(  308.22)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C(  408.71),
                         SIMDE_FLOAT64_C(  -24.71), SIMDE_FLOAT64_C(  850.59)),
      simde_mm256_set_epi64x(INT64_C( 8155867202589355926), INT64_C(-4551757813155184517),
                             INT64_C( 4070473136336150836), INT64_C( 8294293362513343506)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( -415.61),
                         SIMDE_FLOAT64_C(  850.59), SIMDE_FLOAT64_C(  -24.71)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -858.92), SIMDE_FLOAT64_C( -409.27),
                         SIMDE_FLOAT64_C(  940.24), SIMDE_FLOAT64_C(  118.21)),
      simde_mm256_set_epi64x(INT64_C(-6819188498234901479), INT64_C( 2067633441850695354),
                             INT64_C(  371129412881073798), INT64_C(-4745838610152722297)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -409.27), SIMDE_FLOAT64_C( -858.92),
                         SIMDE_FLOAT64_C(  940.24), SIMDE_FLOAT64_C(  940.24)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -35.59), SIMDE_FLOAT64_C(  620.23),
                         SIMDE_FLOAT64_C(  173.49), SIMDE_FLOAT64_C( -242.33)),
      simde_mm256_set_epi64x(INT64_C( 6267827345436252242), INT64_C( 7757337633506703794),
                             INT64_C( 8397042844771135785), INT64_C( 4768191744605903319)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -35.59), SIMDE_FLOAT64_C(  -35.59),
                         SIMDE_FLOAT64_C( -242.33), SIMDE_FLOAT64_C(  173.49)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C( -205.25),
                         SIMDE_FLOAT64_C(  427.82), SIMDE_FLOAT64_C( -695.42)),
      simde_mm256_set_epi64x(INT64_C(-4627283775150795805), INT64_C( 5796432689531982886),
                             INT64_C(-5333403376253040789), INT64_C(-1223877538147285054)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C( -615.22),
                         SIMDE_FLOAT64_C(  427.82), SIMDE_FLOAT64_C(  427.82)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  461.58), SIMDE_FLOAT64_C( -322.38),
                         SIMDE_FLOAT64_C( -747.07), SIMDE_FLOAT64_C( -350.25)),
      simde_mm256_set_epi64x(INT64_C(-4907910955860203917), INT64_C(-1172835446387939434),
                             INT64_C( 6316430026104479052), INT64_C( 8850000004913574542)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  461.58), SIMDE_FLOAT64_C(  461.58),
                         SIMDE_FLOAT64_C( -350.25), SIMDE_FLOAT64_C( -747.07)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  705.05), SIMDE_FLOAT64_C(   49.33),
                         SIMDE_FLOAT64_C(  -85.30), SIMDE_FLOAT64_C(  936.63)),
      simde_mm256_set_epi64x(INT64_C( 8506270823776015936), INT64_C(-7945266156798964263),
                             INT64_C( 7749717350625346930), INT64_C( -406852585870799824)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   49.33), SIMDE_FLOAT64_C(   49.33),
                         SIMDE_FLOAT64_C(  -85.30), SIMDE_FLOAT64_C(  936.63)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_permutevar_pd(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_permute2f128_pd(SIMDE_MUNIT_TEST_ARGS) {
#if 1
  const struct {
    const simde_float64 a[4];
    const simde_float64 b[4];
    const int imm8;
    const simde_float64 r[4];
  } test_vec[] = {
    { { SIMDE_FLOAT64_C(    15.55), SIMDE_FLOAT64_C(  -461.51), SIMDE_FLOAT64_C(   514.66), SIMDE_FLOAT64_C(  -513.58) },
      { SIMDE_FLOAT64_C(    95.37), SIMDE_FLOAT64_C(  -239.15), SIMDE_FLOAT64_C(  -315.05), SIMDE_FLOAT64_C(   844.04) },
       INT32_C(         108),
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(    95.37), SIMDE_FLOAT64_C(  -239.15) } },
    { { SIMDE_FLOAT64_C(   410.02), SIMDE_FLOAT64_C(  -232.21), SIMDE_FLOAT64_C(   585.19), SIMDE_FLOAT64_C(   989.33) },
      { SIMDE_FLOAT64_C(   192.64), SIMDE_FLOAT64_C(  -316.14), SIMDE_FLOAT64_C(  -537.95), SIMDE_FLOAT64_C(  -941.52) },
       INT32_C(          27),
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   585.19), SIMDE_FLOAT64_C(   989.33) } },
    { { SIMDE_FLOAT64_C(   573.89), SIMDE_FLOAT64_C(    20.06), SIMDE_FLOAT64_C(   715.65), SIMDE_FLOAT64_C(   911.84) },
      { SIMDE_FLOAT64_C(  -412.11), SIMDE_FLOAT64_C(   406.05), SIMDE_FLOAT64_C(   758.60), SIMDE_FLOAT64_C(   603.44) },
       INT32_C(          70),
      { SIMDE_FLOAT64_C(  -412.11), SIMDE_FLOAT64_C(   406.05), SIMDE_FLOAT64_C(   573.89), SIMDE_FLOAT64_C(    20.06) } },
    { { SIMDE_FLOAT64_C(  -965.88), SIMDE_FLOAT64_C(  -225.19), SIMDE_FLOAT64_C(  -116.04), SIMDE_FLOAT64_C(  -623.14) },
      { SIMDE_FLOAT64_C(  -940.20), SIMDE_FLOAT64_C(  -698.12), SIMDE_FLOAT64_C(  -941.06), SIMDE_FLOAT64_C(   469.82) },
       INT32_C(         185),
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00) } },
    { { SIMDE_FLOAT64_C(  -672.02), SIMDE_FLOAT64_C(   921.19), SIMDE_FLOAT64_C(  -679.20), SIMDE_FLOAT64_C(   392.39) },
      { SIMDE_FLOAT64_C(  -774.03), SIMDE_FLOAT64_C(  -268.72), SIMDE_FLOAT64_C(   639.82), SIMDE_FLOAT64_C(   799.86) },
       INT32_C(          61),
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   639.82), SIMDE_FLOAT64_C(   799.86) } },
    { { SIMDE_FLOAT64_C(  -238.48), SIMDE_FLOAT64_C(   470.30), SIMDE_FLOAT64_C(   942.67), SIMDE_FLOAT64_C(  -293.93) },
      { SIMDE_FLOAT64_C(  -256.44), SIMDE_FLOAT64_C(  -967.47), SIMDE_FLOAT64_C(   745.99), SIMDE_FLOAT64_C(  -222.32) },
       INT32_C(         123),
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(   745.99), SIMDE_FLOAT64_C(  -222.32) } },
    { { SIMDE_FLOAT64_C(   -68.16), SIMDE_FLOAT64_C(   213.48), SIMDE_FLOAT64_C(  -663.05), SIMDE_FLOAT64_C(  -998.49) },
      { SIMDE_FLOAT64_C(  -142.40), SIMDE_FLOAT64_C(   796.10), SIMDE_FLOAT64_C(  -736.18), SIMDE_FLOAT64_C(   185.58) },
       INT32_C(         170),
      { SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00) } },
    { { SIMDE_FLOAT64_C(   315.90), SIMDE_FLOAT64_C(  -782.21), SIMDE_FLOAT64_C(   743.12), SIMDE_FLOAT64_C(    67.24) },
      { SIMDE_FLOAT64_C(   573.26), SIMDE_FLOAT64_C(   454.82), SIMDE_FLOAT64_C(   406.47), SIMDE_FLOAT64_C(  -665.22) },
       INT32_C(         241),
      { SIMDE_FLOAT64_C(   743.12), SIMDE_FLOAT64_C(    67.24), SIMDE_FLOAT64_C(     0.00), SIMDE_FLOAT64_C(     0.00) } },
  };

  simde__m256d a, b, r;

  a = simde_mm256_loadu_pd(test_vec[0].a);
  b = simde_mm256_loadu_pd(test_vec[0].b);
  r = simde_mm256_permute2f128_pd(a, b, 108);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[0].r), 1);

  a = simde_mm256_loadu_pd(test_vec[1].a);
  b = simde_mm256_loadu_pd(test_vec[1].b);
  r = simde_mm256_permute2f128_pd(a, b, 27);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[1].r), 1);

  a = simde_mm256_loadu_pd(test_vec[2].a);
  b = simde_mm256_loadu_pd(test_vec[2].b);
  r = simde_mm256_permute2f128_pd(a, b, 70);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[2].r), 1);

  a = simde_mm256_loadu_pd(test_vec[3].a);
  b = simde_mm256_loadu_pd(test_vec[3].b);
  r = simde_mm256_permute2f128_pd(a, b, 185);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[3].r), 1);

  a = simde_mm256_loadu_pd(test_vec[4].a);
  b = simde_mm256_loadu_pd(test_vec[4].b);
  r = simde_mm256_permute2f128_pd(a, b, 61);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[4].r), 1);

  a = simde_mm256_loadu_pd(test_vec[5].a);
  b = simde_mm256_loadu_pd(test_vec[5].b);
  r = simde_mm256_permute2f128_pd(a, b, 123);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[5].r), 1);

  a = simde_mm256_loadu_pd(test_vec[6].a);
  b = simde_mm256_loadu_pd(test_vec[6].b);
  r = simde_mm256_permute2f128_pd(a, b, 170);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[6].r), 1);

  a = simde_mm256_loadu_pd(test_vec[7].a);
  b = simde_mm256_loadu_pd(test_vec[7].b);
  r = simde_mm256_permute2f128_pd(a, b, 241);
  simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[7].r), 1);

  return 0;
#else
  fputc('\n', stdout);
  for (int i = 0 ; i < 8 ; i++) {
    simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
    simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0));
    int imm8 = simde_test_codegen_random_i32() & 255;
    simde__m256d r;
    SIMDE_CONSTIFY_256_(simde_mm256_permute2f128_pd, r, (HEDLEY_UNREACHABLE(), simde_mm256_setzero_pd()), imm8, a, b);

    simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST);
    simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
    simde_test_codegen_write_i32(2, imm8, SIMDE_TEST_VEC_POS_MIDDLE);
    simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST);
  }
  return 1;
#endif
}

static int
test_simde_mm256_permute2f128_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r;
  } test_vec[] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -376.93), SIMDE_FLOAT32_C( -598.80),
                         SIMDE_FLOAT32_C(  335.44), SIMDE_FLOAT32_C( -614.52),
                         SIMDE_FLOAT32_C(  219.29), SIMDE_FLOAT32_C( -425.58),
                         SIMDE_FLOAT32_C(  790.46), SIMDE_FLOAT32_C(  701.47)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -146.60), SIMDE_FLOAT32_C(  813.49),
                         SIMDE_FLOAT32_C( -148.37), SIMDE_FLOAT32_C( -614.66),
                         SIMDE_FLOAT32_C(  951.32), SIMDE_FLOAT32_C(  -49.79),
                         SIMDE_FLOAT32_C(  618.54), SIMDE_FLOAT32_C(  -94.32)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  219.29), SIMDE_FLOAT32_C( -425.58),
                         SIMDE_FLOAT32_C(  790.46), SIMDE_FLOAT32_C(  701.47),
                         SIMDE_FLOAT32_C(  219.29), SIMDE_FLOAT32_C( -425.58),
                         SIMDE_FLOAT32_C(  790.46), SIMDE_FLOAT32_C(  701.47)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C(  -24.42),
                         SIMDE_FLOAT32_C(   78.54), SIMDE_FLOAT32_C(  -19.08),
                         SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  204.91), SIMDE_FLOAT32_C(  161.00),
                         SIMDE_FLOAT32_C(  230.93), SIMDE_FLOAT32_C(  108.17),
                         SIMDE_FLOAT32_C(  327.81), SIMDE_FLOAT32_C( -178.38),
                         SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  935.39), SIMDE_FLOAT32_C(  982.16),
                         SIMDE_FLOAT32_C(  380.38), SIMDE_FLOAT32_C( -917.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00)) }
  };

  simde__m256 r;

  r = simde_mm256_permute2f128_ps(test_vec[0x0].a, test_vec[0x0].b, 0x0);
  simde_assert_m256_close(r, test_vec[0x0].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x1].a, test_vec[0x1].b, 0x1);
  simde_assert_m256_close(r, test_vec[0x1].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x2].a, test_vec[0x2].b, 0x2);
  simde_assert_m256_close(r, test_vec[0x2].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x3].a, test_vec[0x3].b, 0x3);
  simde_assert_m256_close(r, test_vec[0x3].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x4].a, test_vec[0x4].b, 0x4);
  simde_assert_m256_close(r, test_vec[0x4].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x5].a, test_vec[0x5].b, 0x5);
  simde_assert_m256_close(r, test_vec[0x5].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x6].a, test_vec[0x6].b, 0x6);
  simde_assert_m256_close(r, test_vec[0x6].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x7].a, test_vec[0x7].b, 0x7);
  simde_assert_m256_close(r, test_vec[0x7].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x8].a, test_vec[0x8].b, 0x8);
  simde_assert_m256_close(r, test_vec[0x8].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0x9].a, test_vec[0x9].b, 0x9);
  simde_assert_m256_close(r, test_vec[0x9].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0xa].a, test_vec[0xa].b, 0xa);
  simde_assert_m256_close(r, test_vec[0xa].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0xb].a, test_vec[0xb].b, 0xb);
  simde_assert_m256_close(r, test_vec[0xb].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0xc].a, test_vec[0xc].b, 0xc);
  simde_assert_m256_close(r, test_vec[0xc].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0xd].a, test_vec[0xd].b, 0xd);
  simde_assert_m256_close(r, test_vec[0xd].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0xe].a, test_vec[0xe].b, 0xe);
  simde_assert_m256_close(r, test_vec[0xe].r, 1);

  r = simde_mm256_permute2f128_ps(test_vec[0xf].a, test_vec[0xf].b, 0xf);
  simde_assert_m256_close(r, test_vec[0xf].r, 1);

  return 0;
}

static int
test_simde_mm256_permute2f128_si256(SIMDE_MUNIT_TEST_ARGS) {
#if 1
  const struct {
    const int16_t a[16];
    const int16_t b[16];
    const int16_t r[16];
  } test_vec[] = {
    { {  INT16_C(  8272), -INT16_C( 27274),  INT16_C( 13557), -INT16_C( 17507), -INT16_C( 29588), -INT16_C( 29778),  INT16_C(  1080),  INT16_C( 31493),
         INT16_C( 20143), -INT16_C( 24612), -INT16_C( 24508), -INT16_C( 26033), -INT16_C( 18817),  INT16_C( 13712), -INT16_C( 15934),  INT16_C(  4959) },
      { -INT16_C( 10526), -INT16_C( 10328),  INT16_C( 17674),  INT16_C( 30354),  INT16_C( 16594),  INT16_C(  2562),  INT16_C(  1860), -INT16_C(  2939),
         INT16_C( 24918), -INT16_C( 25965), -INT16_C(  7679), -INT16_C( 32716), -INT16_C( 15207),  INT16_C( 23477),  INT16_C(  5510),  INT16_C( 26734) },
      {  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),
         INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
    { { -INT16_C( 11684),  INT16_C( 11884),  INT16_C( 28178),  INT16_C( 22328), -INT16_C( 16779), -INT16_C( 13493), -INT16_C(  8673),  INT16_C(  8549),
        -INT16_C( 26176),  INT16_C( 22945),  INT16_C( 22366), -INT16_C(  6987),  INT16_C(  9068),  INT16_C( 22348), -INT16_C( 29894), -INT16_C( 27060) },
      { -INT16_C( 18339),  INT16_C( 28868), -INT16_C(   986), -INT16_C( 25401),  INT16_C(  4794), -INT16_C(  9625), -INT16_C( 12816), -INT16_C( 20229),
        -INT16_C( 25498), -INT16_C( 15350), -INT16_C( 16397),  INT16_C( 24488), -INT16_C(  2846),  INT16_C(  7350),  INT16_C(   896), -INT16_C(  8782) },
      {  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),
         INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
    { {  INT16_C(  5235),  INT16_C( 11646), -INT16_C(  6874),  INT16_C(  5639),  INT16_C(   690),  INT16_C(  6599), -INT16_C( 11873), -INT16_C( 27939),
        -INT16_C( 31088),  INT16_C( 29426), -INT16_C( 22406), -INT16_C(  1393),  INT16_C( 16811),  INT16_C( 26584),  INT16_C(  9656),  INT16_C( 11081) },
      { -INT16_C( 14534),  INT16_C( 24664),  INT16_C( 24748),  INT16_C( 24439),  INT16_C( 15970),  INT16_C(   376),  INT16_C( 21775), -INT16_C( 24684),
        -INT16_C( 31013),  INT16_C( 22033), -INT16_C( 24530), -INT16_C(  9648),  INT16_C( 10466), -INT16_C( 26047), -INT16_C( 30130), -INT16_C( 30523) },
      { -INT16_C( 31088),  INT16_C( 29426), -INT16_C( 22406), -INT16_C(  1393),  INT16_C( 16811),  INT16_C( 26584),  INT16_C(  9656),  INT16_C( 11081),
        -INT16_C( 31088),  INT16_C( 29426), -INT16_C( 22406), -INT16_C(  1393),  INT16_C( 16811),  INT16_C( 26584),  INT16_C(  9656),  INT16_C( 11081) } },
    { {  INT16_C( 24445), -INT16_C(  8100), -INT16_C( 11107), -INT16_C( 21279),  INT16_C( 29994),  INT16_C(  1355),  INT16_C( 24059),  INT16_C( 10843),
        -INT16_C( 21251), -INT16_C(  8444),  INT16_C( 17876),  INT16_C(  8825),  INT16_C( 16079),  INT16_C(  8362), -INT16_C( 27812), -INT16_C(  9955) },
      {  INT16_C( 31474), -INT16_C( 28487), -INT16_C( 25778),  INT16_C( 30780), -INT16_C( 30704),  INT16_C(  3198), -INT16_C(  9755), -INT16_C(  7626),
         INT16_C( 14981),  INT16_C( 23234),  INT16_C( 15231),  INT16_C( 20092),  INT16_C( 10106), -INT16_C( 10642), -INT16_C( 29766), -INT16_C( 21329) },
      { -INT16_C( 21251), -INT16_C(  8444),  INT16_C( 17876),  INT16_C(  8825),  INT16_C( 16079),  INT16_C(  8362), -INT16_C( 27812), -INT16_C(  9955),
         INT16_C( 24445), -INT16_C(  8100), -INT16_C( 11107), -INT16_C( 21279),  INT16_C( 29994),  INT16_C(  1355),  INT16_C( 24059),  INT16_C( 10843) } },
    { {  INT16_C( 30980),  INT16_C(  5324),  INT16_C( 18945), -INT16_C(  6624),  INT16_C( 22052), -INT16_C( 22072), -INT16_C( 30064),  INT16_C(  3843),
        -INT16_C( 32570),  INT16_C( 16477), -INT16_C( 13401),  INT16_C( 24854), -INT16_C( 15017),  INT16_C( 23565),  INT16_C( 18990),  INT16_C( 12976) },
      {  INT16_C( 32195), -INT16_C( 15289),  INT16_C( 26567), -INT16_C(  5206),  INT16_C( 29374),  INT16_C( 20117), -INT16_C( 26371), -INT16_C( 15522),
        -INT16_C( 17640), -INT16_C( 16637),  INT16_C(  6535), -INT16_C(  8672),  INT16_C( 11998),  INT16_C(  3386), -INT16_C(  5256),  INT16_C( 15167) },
      {  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),
         INT16_C( 32195), -INT16_C( 15289),  INT16_C( 26567), -INT16_C(  5206),  INT16_C( 29374),  INT16_C( 20117), -INT16_C( 26371), -INT16_C( 15522) } },
    { { -INT16_C( 22034), -INT16_C( 21477), -INT16_C( 20453),  INT16_C(  6394),  INT16_C( 22600),  INT16_C( 25051), -INT16_C(  8684), -INT16_C( 25824),
         INT16_C( 16887), -INT16_C( 10631), -INT16_C( 19601), -INT16_C(  6173),  INT16_C(  8862),  INT16_C(  1570),  INT16_C(  8617), -INT16_C( 26826) },
      {  INT16_C( 20938), -INT16_C(  6845),  INT16_C( 15617),  INT16_C( 18942), -INT16_C(  9834), -INT16_C( 21846), -INT16_C( 13384), -INT16_C( 20667),
        -INT16_C( 16884),  INT16_C( 31621),  INT16_C( 26737),  INT16_C(  4194), -INT16_C( 31605),  INT16_C( 13334),  INT16_C( 19621),  INT16_C( 28619) },
      {  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),
         INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
    { {  INT16_C( 21067), -INT16_C(  7704), -INT16_C( 28116), -INT16_C(  7029), -INT16_C( 12195),  INT16_C( 27027),  INT16_C(  6542),  INT16_C(   228),
         INT16_C( 18049),  INT16_C(  3088),  INT16_C(  9930),  INT16_C( 28480),  INT16_C(  2931),  INT16_C(  4318),  INT16_C( 13081),  INT16_C( 26031) },
      { -INT16_C( 26747), -INT16_C( 20154), -INT16_C( 11735), -INT16_C( 30827),  INT16_C( 10658),  INT16_C( 12784), -INT16_C( 10942), -INT16_C( 15567),
         INT16_C( 16667), -INT16_C(  6448),  INT16_C(  4199), -INT16_C(  9643),  INT16_C( 13340),  INT16_C( 13803), -INT16_C( 26009), -INT16_C(  4966) },
      {  INT16_C( 18049),  INT16_C(  3088),  INT16_C(  9930),  INT16_C( 28480),  INT16_C(  2931),  INT16_C(  4318),  INT16_C( 13081),  INT16_C( 26031),
         INT16_C( 16667), -INT16_C(  6448),  INT16_C(  4199), -INT16_C(  9643),  INT16_C( 13340),  INT16_C( 13803), -INT16_C( 26009), -INT16_C(  4966) } },
    { {  INT16_C( 13235),  INT16_C( 21985), -INT16_C( 11684), -INT16_C( 24954), -INT16_C( 18521), -INT16_C( 15774),  INT16_C( 13048),  INT16_C( 24744),
        -INT16_C(   446),  INT16_C( 24122),  INT16_C(  9522), -INT16_C( 26220),  INT16_C( 11967), -INT16_C(  3963),  INT16_C(  8975), -INT16_C( 15797) },
      {  INT16_C( 11351), -INT16_C( 19688), -INT16_C( 24834), -INT16_C( 23214), -INT16_C( 19370),  INT16_C( 20072),  INT16_C(  4326),  INT16_C( 10414),
        -INT16_C(  5874),  INT16_C( 16519),  INT16_C(  6926), -INT16_C( 12583),  INT16_C( 24393),  INT16_C( 22974),  INT16_C(  2434), -INT16_C(  9957) },
      {  INT16_C( 11351), -INT16_C( 19688), -INT16_C( 24834), -INT16_C( 23214), -INT16_C( 19370),  INT16_C( 20072),  INT16_C(  4326),  INT16_C( 10414),
        -INT16_C(  5874),  INT16_C( 16519),  INT16_C(  6926), -INT16_C( 12583),  INT16_C( 24393),  INT16_C( 22974),  INT16_C(  2434), -INT16_C(  9957) } },
  };
  simde__m256i a, b, r;

  a = simde_mm256_loadu_epi16(test_vec[0].a);
  b = simde_mm256_loadu_epi16(test_vec[0].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(235));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[0].r));

  a = simde_mm256_loadu_epi16(test_vec[1].a);
  b = simde_mm256_loadu_epi16(test_vec[1].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(187));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[1].r));

  a = simde_mm256_loadu_epi16(test_vec[2].a);
  b = simde_mm256_loadu_epi16(test_vec[2].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(81));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[2].r));

  a = simde_mm256_loadu_epi16(test_vec[3].a);
  b = simde_mm256_loadu_epi16(test_vec[3].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(5));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[3].r));

  a = simde_mm256_loadu_epi16(test_vec[4].a);
  b = simde_mm256_loadu_epi16(test_vec[4].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(104));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[4].r));

  a = simde_mm256_loadu_epi16(test_vec[5].a);
  b = simde_mm256_loadu_epi16(test_vec[5].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(157));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[5].r));

  a = simde_mm256_loadu_epi16(test_vec[6].a);
  b = simde_mm256_loadu_epi16(test_vec[6].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(49));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[6].r));

  a = simde_mm256_loadu_epi16(test_vec[7].a);
  b = simde_mm256_loadu_epi16(test_vec[7].b);
  r = simde_mm256_permute2f128_si256(a, b, INT32_C(54));
  simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[7].r));

  return 0;
#else
  fputc('\n', stdout);
  for (int i = 0 ; i < 8 ; i++) {
    simde__m256i a = simde_test_x86_random_i16x16();
    simde__m256i b = simde_test_x86_random_i16x16();
    int imm8 = simde_test_codegen_random_i32() & 255;
    simde__m256i r;
    SIMDE_CONSTIFY_256_(simde_mm256_permute2f128_si256, r, (HEDLEY_UNREACHABLE(), simde_mm256_setzero_si256()), imm8, a, b);

    simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST);
    simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE);
    simde_test_codegen_write_i32(2, imm8, SIMDE_TEST_VEC_POS_MIDDLE);
    simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST);
  }
  return 1;
#endif
}

static int
test_simde_mm256_rcp_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -908.92), SIMDE_FLOAT32_C( -201.59),
                         SIMDE_FLOAT32_C(    3.47), SIMDE_FLOAT32_C(  829.08),
                         SIMDE_FLOAT32_C(  -86.36), SIMDE_FLOAT32_C(  780.02),
                         SIMDE_FLOAT32_C(   13.29), SIMDE_FLOAT32_C(  492.53)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(   -0.00),
                         SIMDE_FLOAT32_C(    0.29), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(   -0.01), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.08), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  745.72), SIMDE_FLOAT32_C( -860.90),
                         SIMDE_FLOAT32_C(  647.35), SIMDE_FLOAT32_C( -932.06),
                         SIMDE_FLOAT32_C(  782.22), SIMDE_FLOAT32_C(  232.69),
                         SIMDE_FLOAT32_C(   88.27), SIMDE_FLOAT32_C( -882.29)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(   -0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(   -0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.01), SIMDE_FLOAT32_C(   -0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  750.74), SIMDE_FLOAT32_C(  -90.83),
                         SIMDE_FLOAT32_C(  949.51), SIMDE_FLOAT32_C(  177.31),
                         SIMDE_FLOAT32_C( -204.98), SIMDE_FLOAT32_C(  340.91),
                         SIMDE_FLOAT32_C(  -39.69), SIMDE_FLOAT32_C( -715.33)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(   -0.01),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.01),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(   -0.03), SIMDE_FLOAT32_C(   -0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -147.11), SIMDE_FLOAT32_C(  588.21),
                         SIMDE_FLOAT32_C(  521.36), SIMDE_FLOAT32_C( -659.55),
                         SIMDE_FLOAT32_C(  932.00), SIMDE_FLOAT32_C(  548.33),
                         SIMDE_FLOAT32_C(  639.13), SIMDE_FLOAT32_C( -316.06)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.01), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(   -0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(   -0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -794.81), SIMDE_FLOAT32_C(   88.08),
                         SIMDE_FLOAT32_C( -540.52), SIMDE_FLOAT32_C(   32.82),
                         SIMDE_FLOAT32_C( -921.78), SIMDE_FLOAT32_C( -970.13),
                         SIMDE_FLOAT32_C(  659.29), SIMDE_FLOAT32_C( -464.98)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.01),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.03),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(   -0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(   -0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -853.91), SIMDE_FLOAT32_C(  837.38),
                         SIMDE_FLOAT32_C( -478.03), SIMDE_FLOAT32_C(  330.06),
                         SIMDE_FLOAT32_C(  627.16), SIMDE_FLOAT32_C(  535.10),
                         SIMDE_FLOAT32_C( -787.00), SIMDE_FLOAT32_C(  376.04)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -244.65), SIMDE_FLOAT32_C(  415.43),
                         SIMDE_FLOAT32_C(  415.27), SIMDE_FLOAT32_C(  243.86),
                         SIMDE_FLOAT32_C(  475.16), SIMDE_FLOAT32_C(    0.53),
                         SIMDE_FLOAT32_C( -509.99), SIMDE_FLOAT32_C( -861.80)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    1.89),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(   -0.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -819.61), SIMDE_FLOAT32_C( -441.38),
                         SIMDE_FLOAT32_C( -736.01), SIMDE_FLOAT32_C(  681.16),
                         SIMDE_FLOAT32_C( -798.05), SIMDE_FLOAT32_C(  561.39),
                         SIMDE_FLOAT32_C(  116.98), SIMDE_FLOAT32_C( -372.62)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(   -0.00),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(   -0.00), SIMDE_FLOAT32_C(    0.00),
                         SIMDE_FLOAT32_C(    0.01), SIMDE_FLOAT32_C(   -0.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_rcp_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_round_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 nearest;
    simde__m256 neg_inf;
    simde__m256 pos_inf;
    simde__m256 truncate;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -437.99), SIMDE_FLOAT32_C(  332.86),
                         SIMDE_FLOAT32_C(  531.55), SIMDE_FLOAT32_C(  188.24),
                         SIMDE_FLOAT32_C(  135.31), SIMDE_FLOAT32_C( -341.69),
                         SIMDE_FLOAT32_C( -995.08), SIMDE_FLOAT32_C(  -84.86)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -438.00), SIMDE_FLOAT32_C(  333.00),
                         SIMDE_FLOAT32_C(  532.00), SIMDE_FLOAT32_C(  188.00),
                         SIMDE_FLOAT32_C(  135.00), SIMDE_FLOAT32_C( -342.00),
                         SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C(  -85.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -438.00), SIMDE_FLOAT32_C(  332.00),
                         SIMDE_FLOAT32_C(  531.00), SIMDE_FLOAT32_C(  188.00),
                         SIMDE_FLOAT32_C(  135.00), SIMDE_FLOAT32_C( -342.00),
                         SIMDE_FLOAT32_C( -996.00), SIMDE_FLOAT32_C(  -85.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -437.00), SIMDE_FLOAT32_C(  333.00),
                         SIMDE_FLOAT32_C(  532.00), SIMDE_FLOAT32_C(  189.00),
                         SIMDE_FLOAT32_C(  136.00), SIMDE_FLOAT32_C( -341.00),
                         SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C(  -84.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -437.00), SIMDE_FLOAT32_C(  332.00),
                         SIMDE_FLOAT32_C(  531.00), SIMDE_FLOAT32_C(  188.00),
                         SIMDE_FLOAT32_C(  135.00), SIMDE_FLOAT32_C( -341.00),
                         SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C(  -84.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.77), SIMDE_FLOAT32_C( -902.26),
                         SIMDE_FLOAT32_C(  960.96), SIMDE_FLOAT32_C(  885.00),
                         SIMDE_FLOAT32_C(  184.99), SIMDE_FLOAT32_C(  273.55),
                         SIMDE_FLOAT32_C( -508.74), SIMDE_FLOAT32_C(  304.51)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -272.00), SIMDE_FLOAT32_C( -902.00),
                         SIMDE_FLOAT32_C(  961.00), SIMDE_FLOAT32_C(  885.00),
                         SIMDE_FLOAT32_C(  185.00), SIMDE_FLOAT32_C(  274.00),
                         SIMDE_FLOAT32_C( -509.00), SIMDE_FLOAT32_C(  305.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -272.00), SIMDE_FLOAT32_C( -903.00),
                         SIMDE_FLOAT32_C(  960.00), SIMDE_FLOAT32_C(  885.00),
                         SIMDE_FLOAT32_C(  184.00), SIMDE_FLOAT32_C(  273.00),
                         SIMDE_FLOAT32_C( -509.00), SIMDE_FLOAT32_C(  304.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.00), SIMDE_FLOAT32_C( -902.00),
                         SIMDE_FLOAT32_C(  961.00), SIMDE_FLOAT32_C(  885.00),
                         SIMDE_FLOAT32_C(  185.00), SIMDE_FLOAT32_C(  274.00),
                         SIMDE_FLOAT32_C( -508.00), SIMDE_FLOAT32_C(  305.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.00), SIMDE_FLOAT32_C( -902.00),
                         SIMDE_FLOAT32_C(  960.00), SIMDE_FLOAT32_C(  885.00),
                         SIMDE_FLOAT32_C(  184.00), SIMDE_FLOAT32_C(  273.00),
                         SIMDE_FLOAT32_C( -508.00), SIMDE_FLOAT32_C(  304.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  970.76), SIMDE_FLOAT32_C( -833.91),
                         SIMDE_FLOAT32_C( -187.58), SIMDE_FLOAT32_C(   27.59),
                         SIMDE_FLOAT32_C(  181.38), SIMDE_FLOAT32_C( -399.46),
                         SIMDE_FLOAT32_C( -127.86), SIMDE_FLOAT32_C( -393.23)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  971.00), SIMDE_FLOAT32_C( -834.00),
                         SIMDE_FLOAT32_C( -188.00), SIMDE_FLOAT32_C(   28.00),
                         SIMDE_FLOAT32_C(  181.00), SIMDE_FLOAT32_C( -399.00),
                         SIMDE_FLOAT32_C( -128.00), SIMDE_FLOAT32_C( -393.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  970.00), SIMDE_FLOAT32_C( -834.00),
                         SIMDE_FLOAT32_C( -188.00), SIMDE_FLOAT32_C(   27.00),
                         SIMDE_FLOAT32_C(  181.00), SIMDE_FLOAT32_C( -400.00),
                         SIMDE_FLOAT32_C( -128.00), SIMDE_FLOAT32_C( -394.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  971.00), SIMDE_FLOAT32_C( -833.00),
                         SIMDE_FLOAT32_C( -187.00), SIMDE_FLOAT32_C(   28.00),
                         SIMDE_FLOAT32_C(  182.00), SIMDE_FLOAT32_C( -399.00),
                         SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( -393.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  970.00), SIMDE_FLOAT32_C( -833.00),
                         SIMDE_FLOAT32_C( -187.00), SIMDE_FLOAT32_C(   27.00),
                         SIMDE_FLOAT32_C(  181.00), SIMDE_FLOAT32_C( -399.00),
                         SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( -393.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  259.90), SIMDE_FLOAT32_C( -282.33),
                         SIMDE_FLOAT32_C(  766.19), SIMDE_FLOAT32_C(  948.74),
                         SIMDE_FLOAT32_C( -533.05), SIMDE_FLOAT32_C(  397.75),
                         SIMDE_FLOAT32_C(  998.83), SIMDE_FLOAT32_C( -841.13)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  260.00), SIMDE_FLOAT32_C( -282.00),
                         SIMDE_FLOAT32_C(  766.00), SIMDE_FLOAT32_C(  949.00),
                         SIMDE_FLOAT32_C( -533.00), SIMDE_FLOAT32_C(  398.00),
                         SIMDE_FLOAT32_C(  999.00), SIMDE_FLOAT32_C( -841.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  259.00), SIMDE_FLOAT32_C( -283.00),
                         SIMDE_FLOAT32_C(  766.00), SIMDE_FLOAT32_C(  948.00),
                         SIMDE_FLOAT32_C( -534.00), SIMDE_FLOAT32_C(  397.00),
                         SIMDE_FLOAT32_C(  998.00), SIMDE_FLOAT32_C( -842.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  260.00), SIMDE_FLOAT32_C( -282.00),
                         SIMDE_FLOAT32_C(  767.00), SIMDE_FLOAT32_C(  949.00),
                         SIMDE_FLOAT32_C( -533.00), SIMDE_FLOAT32_C(  398.00),
                         SIMDE_FLOAT32_C(  999.00), SIMDE_FLOAT32_C( -841.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  259.00), SIMDE_FLOAT32_C( -282.00),
                         SIMDE_FLOAT32_C(  766.00), SIMDE_FLOAT32_C(  948.00),
                         SIMDE_FLOAT32_C( -533.00), SIMDE_FLOAT32_C(  397.00),
                         SIMDE_FLOAT32_C(  998.00), SIMDE_FLOAT32_C( -841.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -178.69), SIMDE_FLOAT32_C(  880.71),
                         SIMDE_FLOAT32_C( -928.72), SIMDE_FLOAT32_C( -201.24),
                         SIMDE_FLOAT32_C(  -99.45), SIMDE_FLOAT32_C(  785.84),
                         SIMDE_FLOAT32_C(  542.02), SIMDE_FLOAT32_C(  -81.93)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -179.00), SIMDE_FLOAT32_C(  881.00),
                         SIMDE_FLOAT32_C( -929.00), SIMDE_FLOAT32_C( -201.00),
                         SIMDE_FLOAT32_C(  -99.00), SIMDE_FLOAT32_C(  786.00),
                         SIMDE_FLOAT32_C(  542.00), SIMDE_FLOAT32_C(  -82.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -179.00), SIMDE_FLOAT32_C(  880.00),
                         SIMDE_FLOAT32_C( -929.00), SIMDE_FLOAT32_C( -202.00),
                         SIMDE_FLOAT32_C( -100.00), SIMDE_FLOAT32_C(  785.00),
                         SIMDE_FLOAT32_C(  542.00), SIMDE_FLOAT32_C(  -82.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -178.00), SIMDE_FLOAT32_C(  881.00),
                         SIMDE_FLOAT32_C( -928.00), SIMDE_FLOAT32_C( -201.00),
                         SIMDE_FLOAT32_C(  -99.00), SIMDE_FLOAT32_C(  786.00),
                         SIMDE_FLOAT32_C(  543.00), SIMDE_FLOAT32_C(  -81.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -178.00), SIMDE_FLOAT32_C(  880.00),
                         SIMDE_FLOAT32_C( -928.00), SIMDE_FLOAT32_C( -201.00),
                         SIMDE_FLOAT32_C(  -99.00), SIMDE_FLOAT32_C(  785.00),
                         SIMDE_FLOAT32_C(  542.00), SIMDE_FLOAT32_C(  -81.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.16), SIMDE_FLOAT32_C(  854.52),
                         SIMDE_FLOAT32_C(  980.48), SIMDE_FLOAT32_C(  -86.24),
                         SIMDE_FLOAT32_C(  473.38), SIMDE_FLOAT32_C(  104.75),
                         SIMDE_FLOAT32_C(   14.33), SIMDE_FLOAT32_C(   91.35)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.00), SIMDE_FLOAT32_C(  855.00),
                         SIMDE_FLOAT32_C(  980.00), SIMDE_FLOAT32_C(  -86.00),
                         SIMDE_FLOAT32_C(  473.00), SIMDE_FLOAT32_C(  105.00),
                         SIMDE_FLOAT32_C(   14.00), SIMDE_FLOAT32_C(   91.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C(  854.00),
                         SIMDE_FLOAT32_C(  980.00), SIMDE_FLOAT32_C(  -87.00),
                         SIMDE_FLOAT32_C(  473.00), SIMDE_FLOAT32_C(  104.00),
                         SIMDE_FLOAT32_C(   14.00), SIMDE_FLOAT32_C(   91.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.00), SIMDE_FLOAT32_C(  855.00),
                         SIMDE_FLOAT32_C(  981.00), SIMDE_FLOAT32_C(  -86.00),
                         SIMDE_FLOAT32_C(  474.00), SIMDE_FLOAT32_C(  105.00),
                         SIMDE_FLOAT32_C(   15.00), SIMDE_FLOAT32_C(   92.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.00), SIMDE_FLOAT32_C(  854.00),
                         SIMDE_FLOAT32_C(  980.00), SIMDE_FLOAT32_C(  -86.00),
                         SIMDE_FLOAT32_C(  473.00), SIMDE_FLOAT32_C(  104.00),
                         SIMDE_FLOAT32_C(   14.00), SIMDE_FLOAT32_C(   91.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  529.30), SIMDE_FLOAT32_C(  148.64),
                         SIMDE_FLOAT32_C(  820.35), SIMDE_FLOAT32_C(  265.99),
                         SIMDE_FLOAT32_C(  701.82), SIMDE_FLOAT32_C(  479.73),
                         SIMDE_FLOAT32_C(  432.96), SIMDE_FLOAT32_C(  276.42)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  529.00), SIMDE_FLOAT32_C(  149.00),
                         SIMDE_FLOAT32_C(  820.00), SIMDE_FLOAT32_C(  266.00),
                         SIMDE_FLOAT32_C(  702.00), SIMDE_FLOAT32_C(  480.00),
                         SIMDE_FLOAT32_C(  433.00), SIMDE_FLOAT32_C(  276.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  529.00), SIMDE_FLOAT32_C(  148.00),
                         SIMDE_FLOAT32_C(  820.00), SIMDE_FLOAT32_C(  265.00),
                         SIMDE_FLOAT32_C(  701.00), SIMDE_FLOAT32_C(  479.00),
                         SIMDE_FLOAT32_C(  432.00), SIMDE_FLOAT32_C(  276.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  530.00), SIMDE_FLOAT32_C(  149.00),
                         SIMDE_FLOAT32_C(  821.00), SIMDE_FLOAT32_C(  266.00),
                         SIMDE_FLOAT32_C(  702.00), SIMDE_FLOAT32_C(  480.00),
                         SIMDE_FLOAT32_C(  433.00), SIMDE_FLOAT32_C(  277.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  529.00), SIMDE_FLOAT32_C(  148.00),
                         SIMDE_FLOAT32_C(  820.00), SIMDE_FLOAT32_C(  265.00),
                         SIMDE_FLOAT32_C(  701.00), SIMDE_FLOAT32_C(  479.00),
                         SIMDE_FLOAT32_C(  432.00), SIMDE_FLOAT32_C(  276.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -33.90), SIMDE_FLOAT32_C( -334.90),
                         SIMDE_FLOAT32_C( -399.58), SIMDE_FLOAT32_C(  824.28),
                         SIMDE_FLOAT32_C(  442.40), SIMDE_FLOAT32_C(  699.22),
                         SIMDE_FLOAT32_C( -143.02), SIMDE_FLOAT32_C( -465.79)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -34.00), SIMDE_FLOAT32_C( -335.00),
                         SIMDE_FLOAT32_C( -400.00), SIMDE_FLOAT32_C(  824.00),
                         SIMDE_FLOAT32_C(  442.00), SIMDE_FLOAT32_C(  699.00),
                         SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( -466.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -34.00), SIMDE_FLOAT32_C( -335.00),
                         SIMDE_FLOAT32_C( -400.00), SIMDE_FLOAT32_C(  824.00),
                         SIMDE_FLOAT32_C(  442.00), SIMDE_FLOAT32_C(  699.00),
                         SIMDE_FLOAT32_C( -144.00), SIMDE_FLOAT32_C( -466.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -33.00), SIMDE_FLOAT32_C( -334.00),
                         SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C(  825.00),
                         SIMDE_FLOAT32_C(  443.00), SIMDE_FLOAT32_C(  700.00),
                         SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( -465.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -33.00), SIMDE_FLOAT32_C( -334.00),
                         SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C(  824.00),
                         SIMDE_FLOAT32_C(  442.00), SIMDE_FLOAT32_C(  699.00),
                         SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( -465.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r;

    r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEAREST_INT);
    simde_assert_m256_close(r, test_vec[i].nearest, 1);

    r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEG_INF);
    simde_assert_m256_close(r, test_vec[i].neg_inf, 1);

    r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_POS_INF);
    simde_assert_m256_close(r, test_vec[i].pos_inf, 1);

    r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_ZERO);
    simde_assert_m256_close(r, test_vec[i].truncate, 1);
  }

  return 0;
}

static int
test_simde_mm256_round_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d nearest;
    simde__m256d neg_inf;
    simde__m256d pos_inf;
    simde__m256d truncate;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  312.12), SIMDE_FLOAT64_C(  818.22),
                         SIMDE_FLOAT64_C(   62.47), SIMDE_FLOAT64_C(  918.37)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  312.00), SIMDE_FLOAT64_C(  818.00),
                         SIMDE_FLOAT64_C(   62.00), SIMDE_FLOAT64_C(  918.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  312.00), SIMDE_FLOAT64_C(  818.00),
                         SIMDE_FLOAT64_C(   62.00), SIMDE_FLOAT64_C(  918.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  313.00), SIMDE_FLOAT64_C(  819.00),
                         SIMDE_FLOAT64_C(   63.00), SIMDE_FLOAT64_C(  919.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  312.00), SIMDE_FLOAT64_C(  818.00),
                         SIMDE_FLOAT64_C(   62.00), SIMDE_FLOAT64_C(  918.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  651.78), SIMDE_FLOAT64_C( -771.04),
                         SIMDE_FLOAT64_C(  544.48), SIMDE_FLOAT64_C(  333.27)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  652.00), SIMDE_FLOAT64_C( -771.00),
                         SIMDE_FLOAT64_C(  544.00), SIMDE_FLOAT64_C(  333.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  651.00), SIMDE_FLOAT64_C( -772.00),
                         SIMDE_FLOAT64_C(  544.00), SIMDE_FLOAT64_C(  333.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  652.00), SIMDE_FLOAT64_C( -771.00),
                         SIMDE_FLOAT64_C(  545.00), SIMDE_FLOAT64_C(  334.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  651.00), SIMDE_FLOAT64_C( -771.00),
                         SIMDE_FLOAT64_C(  544.00), SIMDE_FLOAT64_C(  333.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -661.62), SIMDE_FLOAT64_C(  921.42),
                         SIMDE_FLOAT64_C(   23.03), SIMDE_FLOAT64_C(  143.14)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -662.00), SIMDE_FLOAT64_C(  921.00),
                         SIMDE_FLOAT64_C(   23.00), SIMDE_FLOAT64_C(  143.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -662.00), SIMDE_FLOAT64_C(  921.00),
                         SIMDE_FLOAT64_C(   23.00), SIMDE_FLOAT64_C(  143.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -661.00), SIMDE_FLOAT64_C(  922.00),
                         SIMDE_FLOAT64_C(   24.00), SIMDE_FLOAT64_C(  144.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -661.00), SIMDE_FLOAT64_C(  921.00),
                         SIMDE_FLOAT64_C(   23.00), SIMDE_FLOAT64_C(  143.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  -70.00), SIMDE_FLOAT64_C( -189.29),
                         SIMDE_FLOAT64_C( -644.20), SIMDE_FLOAT64_C( -788.03)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -70.00), SIMDE_FLOAT64_C( -189.00),
                         SIMDE_FLOAT64_C( -644.00), SIMDE_FLOAT64_C( -788.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -70.00), SIMDE_FLOAT64_C( -190.00),
                         SIMDE_FLOAT64_C( -645.00), SIMDE_FLOAT64_C( -789.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -70.00), SIMDE_FLOAT64_C( -189.00),
                         SIMDE_FLOAT64_C( -644.00), SIMDE_FLOAT64_C( -788.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  -70.00), SIMDE_FLOAT64_C( -189.00),
                         SIMDE_FLOAT64_C( -644.00), SIMDE_FLOAT64_C( -788.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.31), SIMDE_FLOAT64_C(  917.83),
                         SIMDE_FLOAT64_C( -173.85), SIMDE_FLOAT64_C( -622.25)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.00), SIMDE_FLOAT64_C(  918.00),
                         SIMDE_FLOAT64_C( -174.00), SIMDE_FLOAT64_C( -622.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(-1000.00), SIMDE_FLOAT64_C(  917.00),
                         SIMDE_FLOAT64_C( -174.00), SIMDE_FLOAT64_C( -623.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.00), SIMDE_FLOAT64_C(  918.00),
                         SIMDE_FLOAT64_C( -173.00), SIMDE_FLOAT64_C( -622.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.00), SIMDE_FLOAT64_C(  917.00),
                         SIMDE_FLOAT64_C( -173.00), SIMDE_FLOAT64_C( -622.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.08), SIMDE_FLOAT64_C(   96.40),
                         SIMDE_FLOAT64_C(  481.12), SIMDE_FLOAT64_C(  989.53)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.00), SIMDE_FLOAT64_C(   96.00),
                         SIMDE_FLOAT64_C(  481.00), SIMDE_FLOAT64_C(  990.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -906.00), SIMDE_FLOAT64_C(   96.00),
                         SIMDE_FLOAT64_C(  481.00), SIMDE_FLOAT64_C(  989.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.00), SIMDE_FLOAT64_C(   97.00),
                         SIMDE_FLOAT64_C(  482.00), SIMDE_FLOAT64_C(  990.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.00), SIMDE_FLOAT64_C(   96.00),
                         SIMDE_FLOAT64_C(  481.00), SIMDE_FLOAT64_C(  989.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  380.53), SIMDE_FLOAT64_C(  251.75),
                         SIMDE_FLOAT64_C( -843.75), SIMDE_FLOAT64_C( -890.74)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  381.00), SIMDE_FLOAT64_C(  252.00),
                         SIMDE_FLOAT64_C( -844.00), SIMDE_FLOAT64_C( -891.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  380.00), SIMDE_FLOAT64_C(  251.00),
                         SIMDE_FLOAT64_C( -844.00), SIMDE_FLOAT64_C( -891.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  381.00), SIMDE_FLOAT64_C(  252.00),
                         SIMDE_FLOAT64_C( -843.00), SIMDE_FLOAT64_C( -890.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  380.00), SIMDE_FLOAT64_C(  251.00),
                         SIMDE_FLOAT64_C( -843.00), SIMDE_FLOAT64_C( -890.00)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -382.52), SIMDE_FLOAT64_C( -590.14),
                         SIMDE_FLOAT64_C(    3.25), SIMDE_FLOAT64_C(  599.23)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -383.00), SIMDE_FLOAT64_C( -590.00),
                         SIMDE_FLOAT64_C(    3.00), SIMDE_FLOAT64_C(  599.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -383.00), SIMDE_FLOAT64_C( -591.00),
                         SIMDE_FLOAT64_C(    3.00), SIMDE_FLOAT64_C(  599.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -382.00), SIMDE_FLOAT64_C( -590.00),
                         SIMDE_FLOAT64_C(    4.00), SIMDE_FLOAT64_C(  600.00)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -382.00), SIMDE_FLOAT64_C( -590.00),
                         SIMDE_FLOAT64_C(    3.00), SIMDE_FLOAT64_C(  599.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r;

    r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_NEAREST_INT);
    simde_assert_m256d_close(r, test_vec[i].nearest, 1);

    r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_NEG_INF);
    simde_assert_m256d_close(r, test_vec[i].neg_inf, 1);

    r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_POS_INF);
    simde_assert_m256d_close(r, test_vec[i].pos_inf, 1);

    r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_ZERO);
    simde_assert_m256d_close(r, test_vec[i].truncate, 1);
  }

  return 0;
}

static int
test_simde_mm256_rsqrt_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  376.34), SIMDE_FLOAT32_C(  781.09),
                         SIMDE_FLOAT32_C(  426.92), SIMDE_FLOAT32_C(  127.71),
                         SIMDE_FLOAT32_C(  308.06), SIMDE_FLOAT32_C(  169.26),
                         SIMDE_FLOAT32_C(  264.24), SIMDE_FLOAT32_C(   87.72)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.05), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.05), SIMDE_FLOAT32_C(    0.09),
                         SIMDE_FLOAT32_C(    0.06), SIMDE_FLOAT32_C(    0.08),
                         SIMDE_FLOAT32_C(    0.06), SIMDE_FLOAT32_C(    0.11)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  840.99), SIMDE_FLOAT32_C(  641.73),
                         SIMDE_FLOAT32_C(  425.88), SIMDE_FLOAT32_C(  794.85),
                         SIMDE_FLOAT32_C(  374.41), SIMDE_FLOAT32_C(  576.54),
                         SIMDE_FLOAT32_C(  840.83), SIMDE_FLOAT32_C(  886.63)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.03), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.05), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.05), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.03), SIMDE_FLOAT32_C(    0.03)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  314.85), SIMDE_FLOAT32_C(  671.43),
                         SIMDE_FLOAT32_C(  540.12), SIMDE_FLOAT32_C(  529.67),
                         SIMDE_FLOAT32_C(  498.35), SIMDE_FLOAT32_C(  224.61),
                         SIMDE_FLOAT32_C(  518.07), SIMDE_FLOAT32_C(  759.15)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.06), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.07),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.04)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  990.90), SIMDE_FLOAT32_C(  800.35),
                         SIMDE_FLOAT32_C(   95.53), SIMDE_FLOAT32_C(  852.74),
                         SIMDE_FLOAT32_C(  140.49), SIMDE_FLOAT32_C(  379.21),
                         SIMDE_FLOAT32_C(  930.80), SIMDE_FLOAT32_C(   70.01)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.03), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.10), SIMDE_FLOAT32_C(    0.03),
                         SIMDE_FLOAT32_C(    0.08), SIMDE_FLOAT32_C(    0.05),
                         SIMDE_FLOAT32_C(    0.03), SIMDE_FLOAT32_C(    0.12)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  251.14), SIMDE_FLOAT32_C(  612.05),
                         SIMDE_FLOAT32_C(   55.90), SIMDE_FLOAT32_C(  550.93),
                         SIMDE_FLOAT32_C(   71.18), SIMDE_FLOAT32_C(  968.41),
                         SIMDE_FLOAT32_C(   36.22), SIMDE_FLOAT32_C(  986.88)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.06), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.13), SIMDE_FLOAT32_C(    0.04),
                         SIMDE_FLOAT32_C(    0.12), SIMDE_FLOAT32_C(    0.03),
                         SIMDE_FLOAT32_C(    0.17), SIMDE_FLOAT32_C(    0.03)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  657.13), SIMDE_FLOAT32_C(  431.08),
                         SIMDE_FLOAT32_C(  717.98), SIMDE_FLOAT32_C(   27.05),
                         SIMDE_FLOAT32_C(  195.42), SIMDE_FLOAT32_C(  859.20),
                         SIMDE_FLOAT32_C(  157.91), SIMDE_FLOAT32_C(  578.79)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.05),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.19),
                         SIMDE_FLOAT32_C(    0.07), SIMDE_FLOAT32_C(    0.03),
                         SIMDE_FLOAT32_C(    0.08), SIMDE_FLOAT32_C(    0.04)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  738.73), SIMDE_FLOAT32_C(  198.62),
                         SIMDE_FLOAT32_C(  544.16), SIMDE_FLOAT32_C(  379.62),
                         SIMDE_FLOAT32_C(  782.12), SIMDE_FLOAT32_C(   91.05),
                         SIMDE_FLOAT32_C(  650.65), SIMDE_FLOAT32_C(  315.52)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.07),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.05),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.10),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.06)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  788.79), SIMDE_FLOAT32_C(  929.94),
                         SIMDE_FLOAT32_C(   55.55), SIMDE_FLOAT32_C(  137.30),
                         SIMDE_FLOAT32_C(  612.48), SIMDE_FLOAT32_C(    6.47),
                         SIMDE_FLOAT32_C(  828.22), SIMDE_FLOAT32_C(  971.40)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.03),
                         SIMDE_FLOAT32_C(    0.13), SIMDE_FLOAT32_C(    0.09),
                         SIMDE_FLOAT32_C(    0.04), SIMDE_FLOAT32_C(    0.39),
                         SIMDE_FLOAT32_C(    0.03), SIMDE_FLOAT32_C(    0.03)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_rsqrt_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_setr_epi8(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int8_t a[32];
    simde__m256i r;
  } test_vec[8] = {
    { { INT8_C(  -4), INT8_C(  97), INT8_C( -85), INT8_C( -82),
        INT8_C(  42), INT8_C(  35), INT8_C(  11), INT8_C(  62),
        INT8_C( -47), INT8_C(  10), INT8_C(-127), INT8_C(  56),
        INT8_C(  44), INT8_C(  59), INT8_C( -13), INT8_C(  22),
        INT8_C( -14), INT8_C(  77), INT8_C( -49), INT8_C( -46),
        INT8_C(  53), INT8_C(-109), INT8_C(  30), INT8_C(  70),
        INT8_C(-110), INT8_C(  70), INT8_C(  95), INT8_C( -22),
        INT8_C(  89), INT8_C( -50), INT8_C( -28), INT8_C(-122) },
      simde_mm256_set_epi8(INT8_C(-122), INT8_C( -28), INT8_C( -50), INT8_C(  89),
                           INT8_C( -22), INT8_C(  95), INT8_C(  70), INT8_C(-110),
                           INT8_C(  70), INT8_C(  30), INT8_C(-109), INT8_C(  53),
                           INT8_C( -46), INT8_C( -49), INT8_C(  77), INT8_C( -14),
                           INT8_C(  22), INT8_C( -13), INT8_C(  59), INT8_C(  44),
                           INT8_C(  56), INT8_C(-127), INT8_C(  10), INT8_C( -47),
                           INT8_C(  62), INT8_C(  11), INT8_C(  35), INT8_C(  42),
                           INT8_C( -82), INT8_C( -85), INT8_C(  97), INT8_C(  -4)) },
    { { INT8_C( 127), INT8_C(  42), INT8_C( -70), INT8_C( -73),
        INT8_C( -56), INT8_C(-116), INT8_C( -43), INT8_C(  20),
        INT8_C(  -7), INT8_C(-120), INT8_C( -33), INT8_C(   3),
        INT8_C(  87), INT8_C(  58), INT8_C(  61), INT8_C( -32),
        INT8_C(  37), INT8_C(-114), INT8_C(  14), INT8_C(  80),
        INT8_C( -87), INT8_C(  43), INT8_C( -56), INT8_C(  51),
        INT8_C(  64), INT8_C(-107), INT8_C(  80), INT8_C(  59),
        INT8_C( -99), INT8_C( -49), INT8_C(  22), INT8_C( 109) },
      simde_mm256_set_epi8(INT8_C( 109), INT8_C(  22), INT8_C( -49), INT8_C( -99),
                           INT8_C(  59), INT8_C(  80), INT8_C(-107), INT8_C(  64),
                           INT8_C(  51), INT8_C( -56), INT8_C(  43), INT8_C( -87),
                           INT8_C(  80), INT8_C(  14), INT8_C(-114), INT8_C(  37),
                           INT8_C( -32), INT8_C(  61), INT8_C(  58), INT8_C(  87),
                           INT8_C(   3), INT8_C( -33), INT8_C(-120), INT8_C(  -7),
                           INT8_C(  20), INT8_C( -43), INT8_C(-116), INT8_C( -56),
                           INT8_C( -73), INT8_C( -70), INT8_C(  42), INT8_C( 127)) },
    { { INT8_C(  18), INT8_C( 106), INT8_C( -14), INT8_C( -63),
        INT8_C(  49), INT8_C(  26), INT8_C( 111), INT8_C( 121),
        INT8_C(-109), INT8_C(  19), INT8_C(  59), INT8_C( -42),
        INT8_C( -61), INT8_C(  44), INT8_C(  95), INT8_C(   0),
        INT8_C(  14), INT8_C(  62), INT8_C(  88), INT8_C(  98),
        INT8_C(  26), INT8_C(  58), INT8_C(  76), INT8_C( -18),
        INT8_C(-104), INT8_C(  -9), INT8_C( -67), INT8_C(  20),
        INT8_C( -43), INT8_C( -63), INT8_C(  30), INT8_C( 123) },
      simde_mm256_set_epi8(INT8_C( 123), INT8_C(  30), INT8_C( -63), INT8_C( -43),
                           INT8_C(  20), INT8_C( -67), INT8_C(  -9), INT8_C(-104),
                           INT8_C( -18), INT8_C(  76), INT8_C(  58), INT8_C(  26),
                           INT8_C(  98), INT8_C(  88), INT8_C(  62), INT8_C(  14),
                           INT8_C(   0), INT8_C(  95), INT8_C(  44), INT8_C( -61),
                           INT8_C( -42), INT8_C(  59), INT8_C(  19), INT8_C(-109),
                           INT8_C( 121), INT8_C( 111), INT8_C(  26), INT8_C(  49),
                           INT8_C( -63), INT8_C( -14), INT8_C( 106), INT8_C(  18)) },
    { { INT8_C(-122), INT8_C( 106), INT8_C( -25), INT8_C(  57),
        INT8_C(-110), INT8_C(  80), INT8_C(  35), INT8_C( -81),
        INT8_C(-111), INT8_C(   7), INT8_C(  90), INT8_C( -14),
        INT8_C(  64), INT8_C(  90), INT8_C( -51), INT8_C(  87),
        INT8_C(  49), INT8_C(  77), INT8_C( 127), INT8_C( -93),
        INT8_C( -57), INT8_C( 112), INT8_C( -74), INT8_C(  26),
        INT8_C(-113), INT8_C( -77), INT8_C( -29), INT8_C( -29),
        INT8_C( 117), INT8_C(  31), INT8_C(-100), INT8_C( -86) },
      simde_mm256_set_epi8(INT8_C( -86), INT8_C(-100), INT8_C(  31), INT8_C( 117),
                           INT8_C( -29), INT8_C( -29), INT8_C( -77), INT8_C(-113),
                           INT8_C(  26), INT8_C( -74), INT8_C( 112), INT8_C( -57),
                           INT8_C( -93), INT8_C( 127), INT8_C(  77), INT8_C(  49),
                           INT8_C(  87), INT8_C( -51), INT8_C(  90), INT8_C(  64),
                           INT8_C( -14), INT8_C(  90), INT8_C(   7), INT8_C(-111),
                           INT8_C( -81), INT8_C(  35), INT8_C(  80), INT8_C(-110),
                           INT8_C(  57), INT8_C( -25), INT8_C( 106), INT8_C(-122)) },
    { { INT8_C(  58), INT8_C(   6), INT8_C( 118), INT8_C(  36),
        INT8_C( 115), INT8_C( -15), INT8_C(  37), INT8_C( 119),
        INT8_C( -89), INT8_C( -10), INT8_C( -50), INT8_C(-119),
        INT8_C(   6), INT8_C( -77), INT8_C(  70), INT8_C( 117),
        INT8_C( -12), INT8_C( -22), INT8_C( 114), INT8_C( -39),
        INT8_C( 100), INT8_C( 122), INT8_C(-102), INT8_C( -55),
        INT8_C( 116), INT8_C( -23), INT8_C( -29), INT8_C( -57),
        INT8_C( -40), INT8_C(  41), INT8_C( 119), INT8_C( 121) },
      simde_mm256_set_epi8(INT8_C( 121), INT8_C( 119), INT8_C(  41), INT8_C( -40),
                           INT8_C( -57), INT8_C( -29), INT8_C( -23), INT8_C( 116),
                           INT8_C( -55), INT8_C(-102), INT8_C( 122), INT8_C( 100),
                           INT8_C( -39), INT8_C( 114), INT8_C( -22), INT8_C( -12),
                           INT8_C( 117), INT8_C(  70), INT8_C( -77), INT8_C(   6),
                           INT8_C(-119), INT8_C( -50), INT8_C( -10), INT8_C( -89),
                           INT8_C( 119), INT8_C(  37), INT8_C( -15), INT8_C( 115),
                           INT8_C(  36), INT8_C( 118), INT8_C(   6), INT8_C(  58)) },
    { { INT8_C(  47), INT8_C(  33), INT8_C( -28), INT8_C(-105),
        INT8_C(  -6), INT8_C( -69), INT8_C( 111), INT8_C( -17),
        INT8_C(  43), INT8_C(-123), INT8_C(  56), INT8_C( 119),
        INT8_C(  18), INT8_C(  -6), INT8_C( -96), INT8_C(-126),
        INT8_C( 113), INT8_C(-107), INT8_C(  83), INT8_C(  24),
        INT8_C( -84), INT8_C(-124), INT8_C( -72), INT8_C( -86),
        INT8_C(  80), INT8_C(  33), INT8_C(  -6), INT8_C( -30),
        INT8_C(  85), INT8_C( -74), INT8_C(  58), INT8_C( -88) },
      simde_mm256_set_epi8(INT8_C( -88), INT8_C(  58), INT8_C( -74), INT8_C(  85),
                           INT8_C( -30), INT8_C(  -6), INT8_C(  33), INT8_C(  80),
                           INT8_C( -86), INT8_C( -72), INT8_C(-124), INT8_C( -84),
                           INT8_C(  24), INT8_C(  83), INT8_C(-107), INT8_C( 113),
                           INT8_C(-126), INT8_C( -96), INT8_C(  -6), INT8_C(  18),
                           INT8_C( 119), INT8_C(  56), INT8_C(-123), INT8_C(  43),
                           INT8_C( -17), INT8_C( 111), INT8_C( -69), INT8_C(  -6),
                           INT8_C(-105), INT8_C( -28), INT8_C(  33), INT8_C(  47)) },
    { { INT8_C(  12), INT8_C(  93), INT8_C( -74), INT8_C( 117),
        INT8_C( -55), INT8_C( -56), INT8_C(   9), INT8_C( -48),
        INT8_C( 100), INT8_C(  -4), INT8_C( 101), INT8_C(  -1),
        INT8_C( -41), INT8_C( -98), INT8_C(-128), INT8_C( -73),
        INT8_C( -47), INT8_C(  35), INT8_C( -89), INT8_C( -36),
        INT8_C(-117), INT8_C( -95), INT8_C( -70), INT8_C( -94),
        INT8_C( -61), INT8_C( -88), INT8_C( -41), INT8_C( -56),
        INT8_C(  -5), INT8_C( -90), INT8_C( -61), INT8_C(  58) },
      simde_mm256_set_epi8(INT8_C(  58), INT8_C( -61), INT8_C( -90), INT8_C(  -5),
                           INT8_C( -56), INT8_C( -41), INT8_C( -88), INT8_C( -61),
                           INT8_C( -94), INT8_C( -70), INT8_C( -95), INT8_C(-117),
                           INT8_C( -36), INT8_C( -89), INT8_C(  35), INT8_C( -47),
                           INT8_C( -73), INT8_C(-128), INT8_C( -98), INT8_C( -41),
                           INT8_C(  -1), INT8_C( 101), INT8_C(  -4), INT8_C( 100),
                           INT8_C( -48), INT8_C(   9), INT8_C( -56), INT8_C( -55),
                           INT8_C( 117), INT8_C( -74), INT8_C(  93), INT8_C(  12)) },
    { { INT8_C( -62), INT8_C( 106), INT8_C(  33), INT8_C( -86),
        INT8_C(  50), INT8_C(  51), INT8_C(   1), INT8_C(  83),
        INT8_C( -24), INT8_C(  24), INT8_C( 119), INT8_C( -35),
        INT8_C(  55), INT8_C( 109), INT8_C( -56), INT8_C( -46),
        INT8_C( -64), INT8_C( 124), INT8_C(  -1), INT8_C( -71),
        INT8_C(  27), INT8_C( 108), INT8_C(-113), INT8_C( -86),
        INT8_C(  94), INT8_C(  46), INT8_C( -59), INT8_C( -32),
        INT8_C( -63), INT8_C( -39), INT8_C( -52), INT8_C( 101) },
      simde_mm256_set_epi8(INT8_C( 101), INT8_C( -52), INT8_C( -39), INT8_C( -63),
                           INT8_C( -32), INT8_C( -59), INT8_C(  46), INT8_C(  94),
                           INT8_C( -86), INT8_C(-113), INT8_C( 108), INT8_C(  27),
                           INT8_C( -71), INT8_C(  -1), INT8_C( 124), INT8_C( -64),
                           INT8_C( -46), INT8_C( -56), INT8_C( 109), INT8_C(  55),
                           INT8_C( -35), INT8_C( 119), INT8_C(  24), INT8_C( -24),
                           INT8_C(  83), INT8_C(   1), INT8_C(  51), INT8_C(  50),
                           INT8_C( -86), INT8_C(  33), INT8_C( 106), INT8_C( -62)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_setr_epi8(
        test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
        test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7],
        test_vec[i].a[ 8], test_vec[i].a[ 9], test_vec[i].a[10], test_vec[i].a[11],
        test_vec[i].a[12], test_vec[i].a[13], test_vec[i].a[14], test_vec[i].a[15],
        test_vec[i].a[16], test_vec[i].a[17], test_vec[i].a[18], test_vec[i].a[19],
        test_vec[i].a[20], test_vec[i].a[21], test_vec[i].a[22], test_vec[i].a[23],
        test_vec[i].a[24], test_vec[i].a[25], test_vec[i].a[26], test_vec[i].a[27],
        test_vec[i].a[28], test_vec[i].a[29], test_vec[i].a[30], test_vec[i].a[31]);
    simde_assert_m256i_i8(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_setr_epi16(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int16_t a[16];
    simde__m256i r;
  } test_vec[8] = {
    { { INT16_C(-20822), INT16_C(  4719), INT16_C( 13700), INT16_C( 26280),
        INT16_C( -8393), INT16_C( 13684), INT16_C(-27950), INT16_C(-18508),
        INT16_C( 32037), INT16_C(-24299), INT16_C(-21546), INT16_C(  1669),
        INT16_C(   957), INT16_C( 10001), INT16_C(-15549), INT16_C(-30917) },
      simde_mm256_set_epi16(INT16_C(-30917), INT16_C(-15549), INT16_C( 10001), INT16_C(   957),
                            INT16_C(  1669), INT16_C(-21546), INT16_C(-24299), INT16_C( 32037),
                            INT16_C(-18508), INT16_C(-27950), INT16_C( 13684), INT16_C( -8393),
                            INT16_C( 26280), INT16_C( 13700), INT16_C(  4719), INT16_C(-20822)) },
    { { INT16_C(-21993), INT16_C(-20656), INT16_C( 29326), INT16_C( 28940),
        INT16_C(-31152), INT16_C(-19248), INT16_C(-26052), INT16_C(-19065),
        INT16_C(-11006), INT16_C( -7550), INT16_C( 14017), INT16_C( 19342),
        INT16_C(-32339), INT16_C(  3580), INT16_C( 28313), INT16_C(-13882) },
      simde_mm256_set_epi16(INT16_C(-13882), INT16_C( 28313), INT16_C(  3580), INT16_C(-32339),
                            INT16_C( 19342), INT16_C( 14017), INT16_C( -7550), INT16_C(-11006),
                            INT16_C(-19065), INT16_C(-26052), INT16_C(-19248), INT16_C(-31152),
                            INT16_C( 28940), INT16_C( 29326), INT16_C(-20656), INT16_C(-21993)) },
    { { INT16_C( 10866), INT16_C(  9786), INT16_C(  3944), INT16_C(-19272),
        INT16_C(-30670), INT16_C( 14551), INT16_C(  8410), INT16_C( -6777),
        INT16_C(-16568), INT16_C(-18107), INT16_C(-20605), INT16_C( 22309),
        INT16_C(-22975), INT16_C( 30575), INT16_C( -4285), INT16_C( 10884) },
      simde_mm256_set_epi16(INT16_C( 10884), INT16_C( -4285), INT16_C( 30575), INT16_C(-22975),
                            INT16_C( 22309), INT16_C(-20605), INT16_C(-18107), INT16_C(-16568),
                            INT16_C( -6777), INT16_C(  8410), INT16_C( 14551), INT16_C(-30670),
                            INT16_C(-19272), INT16_C(  3944), INT16_C(  9786), INT16_C( 10866)) },
    { { INT16_C(  3382), INT16_C(-18461), INT16_C( 23033), INT16_C(-12757),
        INT16_C(  -812), INT16_C( 15509), INT16_C(-23059), INT16_C(  2475),
        INT16_C(-26254), INT16_C(-14528), INT16_C(-12769), INT16_C( -6867),
        INT16_C(-17924), INT16_C( 22705), INT16_C(-26548), INT16_C(-32025) },
      simde_mm256_set_epi16(INT16_C(-32025), INT16_C(-26548), INT16_C( 22705), INT16_C(-17924),
                            INT16_C( -6867), INT16_C(-12769), INT16_C(-14528), INT16_C(-26254),
                            INT16_C(  2475), INT16_C(-23059), INT16_C( 15509), INT16_C(  -812),
                            INT16_C(-12757), INT16_C( 23033), INT16_C(-18461), INT16_C(  3382)) },
    { { INT16_C(-16227), INT16_C( 12780), INT16_C( 24958), INT16_C( 10168),
        INT16_C(-24922), INT16_C(-26733), INT16_C(  3884), INT16_C(  8130),
        INT16_C(  -363), INT16_C( -2828), INT16_C( -7524), INT16_C( 28685),
        INT16_C( -7215), INT16_C(  7765), INT16_C( 25104), INT16_C(-23004) },
      simde_mm256_set_epi16(INT16_C(-23004), INT16_C( 25104), INT16_C(  7765), INT16_C( -7215),
                            INT16_C( 28685), INT16_C( -7524), INT16_C( -2828), INT16_C(  -363),
                            INT16_C(  8130), INT16_C(  3884), INT16_C(-26733), INT16_C(-24922),
                            INT16_C( 10168), INT16_C( 24958), INT16_C( 12780), INT16_C(-16227)) },
    { { INT16_C(-23604), INT16_C(-17002), INT16_C( -3804), INT16_C(-31486),
        INT16_C(-31316), INT16_C(  8603), INT16_C( 13936), INT16_C(-30323),
        INT16_C( 17911), INT16_C(-25284), INT16_C( 11323), INT16_C( -3450),
        INT16_C( -2379), INT16_C( -4748), INT16_C( -1701), INT16_C(-14575) },
      simde_mm256_set_epi16(INT16_C(-14575), INT16_C( -1701), INT16_C( -4748), INT16_C( -2379),
                            INT16_C( -3450), INT16_C( 11323), INT16_C(-25284), INT16_C( 17911),
                            INT16_C(-30323), INT16_C( 13936), INT16_C(  8603), INT16_C(-31316),
                            INT16_C(-31486), INT16_C( -3804), INT16_C(-17002), INT16_C(-23604)) },
    { { INT16_C(  2504), INT16_C(  3886), INT16_C( -8527), INT16_C( 15137),
        INT16_C( -2956), INT16_C(  3741), INT16_C(-30624), INT16_C(-26724),
        INT16_C(  6830), INT16_C( 31838), INT16_C( 31654), INT16_C(-13744),
        INT16_C( -1202), INT16_C( 10750), INT16_C(  5862), INT16_C(-29772) },
      simde_mm256_set_epi16(INT16_C(-29772), INT16_C(  5862), INT16_C( 10750), INT16_C( -1202),
                            INT16_C(-13744), INT16_C( 31654), INT16_C( 31838), INT16_C(  6830),
                            INT16_C(-26724), INT16_C(-30624), INT16_C(  3741), INT16_C( -2956),
                            INT16_C( 15137), INT16_C( -8527), INT16_C(  3886), INT16_C(  2504)) },
    { { INT16_C(  9166), INT16_C( 24566), INT16_C(-20956), INT16_C( 25846),
        INT16_C( -9797), INT16_C(-30693), INT16_C( 17134), INT16_C(-20898),
        INT16_C(-11673), INT16_C( -1066), INT16_C( 24186), INT16_C( 15486),
        INT16_C( -6894), INT16_C( -1878), INT16_C(-19558), INT16_C( 25792) },
      simde_mm256_set_epi16(INT16_C( 25792), INT16_C(-19558), INT16_C( -1878), INT16_C( -6894),
                            INT16_C( 15486), INT16_C( 24186), INT16_C( -1066), INT16_C(-11673),
                            INT16_C(-20898), INT16_C( 17134), INT16_C(-30693), INT16_C( -9797),
                            INT16_C( 25846), INT16_C(-20956), INT16_C( 24566), INT16_C(  9166)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_setr_epi16(
        test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
        test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7],
        test_vec[i].a[ 8], test_vec[i].a[ 9], test_vec[i].a[10], test_vec[i].a[11],
        test_vec[i].a[12], test_vec[i].a[13], test_vec[i].a[14], test_vec[i].a[15]);
    simde_assert_m256i_i16(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_setr_epi32(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int32_t a[8];
    simde__m256i r;
  } test_vec[8] = {
    { { INT32_C(  932849909), INT32_C( -456580424), INT32_C(-1072840342), INT32_C(  187025165),
        INT32_C(  -54386372), INT32_C(-1527557226), INT32_C(  842765893), INT32_C(-1371730077) },
      simde_mm256_set_epi32(INT32_C(-1371730077), INT32_C(  842765893), INT32_C(-1527557226), INT32_C(  -54386372),
                            INT32_C(  187025165), INT32_C(-1072840342), INT32_C( -456580424), INT32_C(  932849909)) },
    { { INT32_C( 1893614455), INT32_C( 1294871072), INT32_C( 1552259151), INT32_C(  946045936),
        INT32_C(   -7047247), INT32_C(  177282155), INT32_C( -581856304), INT32_C(  673832922) },
      simde_mm256_set_epi32(INT32_C(  673832922), INT32_C( -581856304), INT32_C(  177282155), INT32_C(   -7047247),
                            INT32_C(  946045936), INT32_C( 1552259151), INT32_C( 1294871072), INT32_C( 1893614455)) },
    { { INT32_C(-1610219922), INT32_C(  -94583836), INT32_C( -424768577), INT32_C( -880788885),
        INT32_C(  602433069), INT32_C( -274391227), INT32_C( -328110003), INT32_C(  499660384) },
      simde_mm256_set_epi32(INT32_C(  499660384), INT32_C( -328110003), INT32_C( -274391227), INT32_C(  602433069),
                            INT32_C( -880788885), INT32_C( -424768577), INT32_C(  -94583836), INT32_C(-1610219922)) },
    { { INT32_C( 1302188877), INT32_C( -801832432), INT32_C( 1655080701), INT32_C(-1605614771),
        INT32_C( 1846614190), INT32_C( 1570676076), INT32_C(  -68393412), INT32_C( 1031272058) },
      simde_mm256_set_epi32(INT32_C( 1031272058), INT32_C(  -68393412), INT32_C( 1570676076), INT32_C( 1846614190),
                            INT32_C(-1605614771), INT32_C( 1655080701), INT32_C( -801832432), INT32_C( 1302188877)) },
    { { INT32_C(   25897078), INT32_C(-1241591361), INT32_C( -592602700), INT32_C( -348865550),
        INT32_C( 1694164628), INT32_C( -856795223), INT32_C( -997978026), INT32_C( 1280081679) },
      simde_mm256_set_epi32(INT32_C( 1280081679), INT32_C( -997978026), INT32_C( -856795223), INT32_C( 1694164628),
                            INT32_C( -348865550), INT32_C( -592602700), INT32_C(-1241591361), INT32_C(   25897078)) },
    { { INT32_C(  -87546396), INT32_C( 1852814507), INT32_C( -373825552), INT32_C( 1866208106),
        INT32_C(  910270627), INT32_C( 1550266609), INT32_C( 1485123950), INT32_C( -498285483) },
      simde_mm256_set_epi32(INT32_C( -498285483), INT32_C( 1485123950), INT32_C( 1550266609), INT32_C(  910270627),
                            INT32_C( 1866208106), INT32_C( -373825552), INT32_C( 1852814507), INT32_C(  -87546396)) },
    { { INT32_C( -786490570), INT32_C( -486650057), INT32_C(-1901610760), INT32_C(-1385527729),
        INT32_C( 1837621475), INT32_C(  362332872), INT32_C( 1409187239), INT32_C( -294514311) },
      simde_mm256_set_epi32(INT32_C( -294514311), INT32_C( 1409187239), INT32_C(  362332872), INT32_C( 1837621475),
                            INT32_C(-1385527729), INT32_C(-1901610760), INT32_C( -486650057), INT32_C( -786490570)) },
    { { INT32_C(-2037006285), INT32_C(-1237137601), INT32_C(-1490902854), INT32_C(-1337182966),
        INT32_C( -732587886), INT32_C(-1907285545), INT32_C(  165118547), INT32_C(-1097315632) },
      simde_mm256_set_epi32(INT32_C(-1097315632), INT32_C(  165118547), INT32_C(-1907285545), INT32_C( -732587886),
                            INT32_C(-1337182966), INT32_C(-1490902854), INT32_C(-1237137601), INT32_C(-2037006285)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_setr_epi32(
        test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
        test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7]);
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_setr_epi64x(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    int64_t a[4];
    simde__m256i r;
  } test_vec[8] = {
    { { INT64_C( 3013620110861784505), INT64_C(-9156069624919168580),
        INT64_C( 1343723656449999612), INT64_C(-3830101585267880776) },
      simde_mm256_set_epi64x(INT64_C(-3830101585267880776), INT64_C( 1343723656449999612),
                             INT64_C(-9156069624919168580), INT64_C( 3013620110861784505)) },
    { { INT64_C( -470898397325052178), INT64_C(-1684256644586675245),
        INT64_C(-8451403171467723697), INT64_C( 5467852576317781229) },
      simde_mm256_set_epi64x(INT64_C( 5467852576317781229), INT64_C(-8451403171467723697),
                             INT64_C(-1684256644586675245), INT64_C( -470898397325052178)) },
    { { INT64_C(-8481091302015892038), INT64_C(-5840489628108018840),
        INT64_C( 8103807582752765524), INT64_C( 9048592296921391543) },
      simde_mm256_set_epi64x(INT64_C( 9048592296921391543), INT64_C( 8103807582752765524),
                             INT64_C(-5840489628108018840), INT64_C(-8481091302015892038)) },
    { { INT64_C( 1422449841795305675), INT64_C( 2887994309822364165),
        INT64_C( 7807032920035636816), INT64_C(-3076736950419346689) },
      simde_mm256_set_epi64x(INT64_C(-3076736950419346689), INT64_C( 7807032920035636816),
                             INT64_C( 2887994309822364165), INT64_C( 1422449841795305675)) },
    { { INT64_C( 3070310353568185156), INT64_C(-8852504885484410210),
        INT64_C( 8605078790751557478), INT64_C(-3993303917440615301) },
      simde_mm256_set_epi64x(INT64_C(-3993303917440615301), INT64_C( 8605078790751557478),
                             INT64_C(-8852504885484410210), INT64_C( 3070310353568185156)) },
    { { INT64_C( 8628903781070638905), INT64_C( 7741876512722404057),
        INT64_C(-7211506260596057593), INT64_C( 4414889885954661792) },
      simde_mm256_set_epi64x(INT64_C( 4414889885954661792), INT64_C(-7211506260596057593),
                             INT64_C( 7741876512722404057), INT64_C( 8628903781070638905)) },
    { { INT64_C( 5522184073273144975), INT64_C(-7081867462548166489),
        INT64_C( 9175542926859973104), INT64_C( 1769179143810464101) },
      simde_mm256_set_epi64x(INT64_C( 1769179143810464101), INT64_C( 9175542926859973104),
                             INT64_C(-7081867462548166489), INT64_C( 5522184073273144975)) },
    { { INT64_C(-8500631716292798858), INT64_C( 4882720816332117442),
        INT64_C(  328133580565148934), INT64_C( 3537144852497440140) },
      simde_mm256_set_epi64x(INT64_C( 3537144852497440140), INT64_C(  328133580565148934),
                             INT64_C( 4882720816332117442), INT64_C(-8500631716292798858)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_setr_epi64x(
        test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]);
    simde_assert_m256i_i64(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_setr_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float32 a[8];
    simde__m256 r;
  } test_vec[8] = {
    { { SIMDE_FLOAT32_C(  -98.84), SIMDE_FLOAT32_C(  882.16),
        SIMDE_FLOAT32_C(  306.69), SIMDE_FLOAT32_C( -539.67),
        SIMDE_FLOAT32_C( -947.14), SIMDE_FLOAT32_C( -871.17),
        SIMDE_FLOAT32_C(  -26.40), SIMDE_FLOAT32_C( -202.75) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -202.75), SIMDE_FLOAT32_C(  -26.40),
                         SIMDE_FLOAT32_C( -871.17), SIMDE_FLOAT32_C( -947.14),
                         SIMDE_FLOAT32_C( -539.67), SIMDE_FLOAT32_C(  306.69),
                         SIMDE_FLOAT32_C(  882.16), SIMDE_FLOAT32_C(  -98.84)) },
    { { SIMDE_FLOAT32_C(  499.74), SIMDE_FLOAT32_C( -810.04),
        SIMDE_FLOAT32_C(  499.26), SIMDE_FLOAT32_C( -519.32),
        SIMDE_FLOAT32_C(  852.97), SIMDE_FLOAT32_C(  119.58),
        SIMDE_FLOAT32_C(   88.58), SIMDE_FLOAT32_C(  364.48) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  364.48), SIMDE_FLOAT32_C(   88.58),
                         SIMDE_FLOAT32_C(  119.58), SIMDE_FLOAT32_C(  852.97),
                         SIMDE_FLOAT32_C( -519.32), SIMDE_FLOAT32_C(  499.26),
                         SIMDE_FLOAT32_C( -810.04), SIMDE_FLOAT32_C(  499.74)) },
    { { SIMDE_FLOAT32_C(  127.60), SIMDE_FLOAT32_C(  904.28),
        SIMDE_FLOAT32_C(  -45.75), SIMDE_FLOAT32_C( -900.72),
        SIMDE_FLOAT32_C(  277.91), SIMDE_FLOAT32_C( -221.10),
        SIMDE_FLOAT32_C(  935.26), SIMDE_FLOAT32_C( -125.20) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C(  935.26),
                         SIMDE_FLOAT32_C( -221.10), SIMDE_FLOAT32_C(  277.91),
                         SIMDE_FLOAT32_C( -900.72), SIMDE_FLOAT32_C(  -45.75),
                         SIMDE_FLOAT32_C(  904.28), SIMDE_FLOAT32_C(  127.60)) },
    { { SIMDE_FLOAT32_C( -252.48), SIMDE_FLOAT32_C( -889.53),
        SIMDE_FLOAT32_C(  628.46), SIMDE_FLOAT32_C(  326.01),
        SIMDE_FLOAT32_C(  211.05), SIMDE_FLOAT32_C( -703.39),
        SIMDE_FLOAT32_C( -581.63), SIMDE_FLOAT32_C( -367.12) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -367.12), SIMDE_FLOAT32_C( -581.63),
                         SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C(  211.05),
                         SIMDE_FLOAT32_C(  326.01), SIMDE_FLOAT32_C(  628.46),
                         SIMDE_FLOAT32_C( -889.53), SIMDE_FLOAT32_C( -252.48)) },
    { { SIMDE_FLOAT32_C( -852.61), SIMDE_FLOAT32_C(  168.93),
        SIMDE_FLOAT32_C(  -51.67), SIMDE_FLOAT32_C( -699.78),
        SIMDE_FLOAT32_C( -215.36), SIMDE_FLOAT32_C(  505.82),
        SIMDE_FLOAT32_C(  -83.94), SIMDE_FLOAT32_C( -117.98) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -117.98), SIMDE_FLOAT32_C(  -83.94),
                         SIMDE_FLOAT32_C(  505.82), SIMDE_FLOAT32_C( -215.36),
                         SIMDE_FLOAT32_C( -699.78), SIMDE_FLOAT32_C(  -51.67),
                         SIMDE_FLOAT32_C(  168.93), SIMDE_FLOAT32_C( -852.61)) },
    { { SIMDE_FLOAT32_C(  422.80), SIMDE_FLOAT32_C(  684.40),
        SIMDE_FLOAT32_C(  497.91), SIMDE_FLOAT32_C( -511.24),
        SIMDE_FLOAT32_C(  504.14), SIMDE_FLOAT32_C(  871.91),
        SIMDE_FLOAT32_C(  175.65), SIMDE_FLOAT32_C( -754.38) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C(  175.65),
                         SIMDE_FLOAT32_C(  871.91), SIMDE_FLOAT32_C(  504.14),
                         SIMDE_FLOAT32_C( -511.24), SIMDE_FLOAT32_C(  497.91),
                         SIMDE_FLOAT32_C(  684.40), SIMDE_FLOAT32_C(  422.80)) },
    { { SIMDE_FLOAT32_C( -712.98), SIMDE_FLOAT32_C(   92.05),
        SIMDE_FLOAT32_C( -155.74), SIMDE_FLOAT32_C(  933.89),
        SIMDE_FLOAT32_C(  385.65), SIMDE_FLOAT32_C( -406.91),
        SIMDE_FLOAT32_C( -999.59), SIMDE_FLOAT32_C( -851.48) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -851.48), SIMDE_FLOAT32_C( -999.59),
                         SIMDE_FLOAT32_C( -406.91), SIMDE_FLOAT32_C(  385.65),
                         SIMDE_FLOAT32_C(  933.89), SIMDE_FLOAT32_C( -155.74),
                         SIMDE_FLOAT32_C(   92.05), SIMDE_FLOAT32_C( -712.98)) },
    { { SIMDE_FLOAT32_C( -182.06), SIMDE_FLOAT32_C( -447.19),
        SIMDE_FLOAT32_C( -170.21), SIMDE_FLOAT32_C( -504.91),
        SIMDE_FLOAT32_C(  448.84), SIMDE_FLOAT32_C( -232.24),
        SIMDE_FLOAT32_C( -688.18), SIMDE_FLOAT32_C( -405.72) },
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -405.72), SIMDE_FLOAT32_C( -688.18),
                         SIMDE_FLOAT32_C( -232.24), SIMDE_FLOAT32_C(  448.84),
                         SIMDE_FLOAT32_C( -504.91), SIMDE_FLOAT32_C( -170.21),
                         SIMDE_FLOAT32_C( -447.19), SIMDE_FLOAT32_C( -182.06)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_setr_ps(
        test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
        test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7]);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_setr_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde_float64 a[4];
    simde__m256d r;
  } test_vec[8] = {
    { { SIMDE_FLOAT64_C(  648.06), SIMDE_FLOAT64_C( -427.64),
        SIMDE_FLOAT64_C(  870.51), SIMDE_FLOAT64_C( -400.08) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -400.08), SIMDE_FLOAT64_C(  870.51),
                         SIMDE_FLOAT64_C( -427.64), SIMDE_FLOAT64_C(  648.06)) },
    { { SIMDE_FLOAT64_C(  631.12), SIMDE_FLOAT64_C(  452.84),
        SIMDE_FLOAT64_C(  521.67), SIMDE_FLOAT64_C(  516.74) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  516.74), SIMDE_FLOAT64_C(  521.67),
                         SIMDE_FLOAT64_C(  452.84), SIMDE_FLOAT64_C(  631.12)) },
    { { SIMDE_FLOAT64_C( -967.92), SIMDE_FLOAT64_C(   20.70),
        SIMDE_FLOAT64_C(  301.61), SIMDE_FLOAT64_C( -721.26) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -721.26), SIMDE_FLOAT64_C(  301.61),
                         SIMDE_FLOAT64_C(   20.70), SIMDE_FLOAT64_C( -967.92)) },
    { { SIMDE_FLOAT64_C(  324.87), SIMDE_FLOAT64_C( -688.66),
        SIMDE_FLOAT64_C( -942.28), SIMDE_FLOAT64_C( -476.77) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -476.77), SIMDE_FLOAT64_C( -942.28),
                         SIMDE_FLOAT64_C( -688.66), SIMDE_FLOAT64_C(  324.87)) },
    { { SIMDE_FLOAT64_C( -951.83), SIMDE_FLOAT64_C(   77.38),
        SIMDE_FLOAT64_C(   95.18), SIMDE_FLOAT64_C( -682.02) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -682.02), SIMDE_FLOAT64_C(   95.18),
                         SIMDE_FLOAT64_C(   77.38), SIMDE_FLOAT64_C( -951.83)) },
    { { SIMDE_FLOAT64_C( -650.77), SIMDE_FLOAT64_C( -285.31),
        SIMDE_FLOAT64_C(  662.58), SIMDE_FLOAT64_C(  693.61) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  693.61), SIMDE_FLOAT64_C(  662.58),
                         SIMDE_FLOAT64_C( -285.31), SIMDE_FLOAT64_C( -650.77)) },
    { { SIMDE_FLOAT64_C(  209.43), SIMDE_FLOAT64_C(  188.93),
        SIMDE_FLOAT64_C( -264.78), SIMDE_FLOAT64_C(  938.62) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  938.62), SIMDE_FLOAT64_C( -264.78),
                         SIMDE_FLOAT64_C(  188.93), SIMDE_FLOAT64_C(  209.43)) },
    { { SIMDE_FLOAT64_C(  887.57), SIMDE_FLOAT64_C(  787.01),
        SIMDE_FLOAT64_C( -658.13), SIMDE_FLOAT64_C(  241.09) },
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  241.09), SIMDE_FLOAT64_C( -658.13),
                         SIMDE_FLOAT64_C(  787.01), SIMDE_FLOAT64_C(  887.57)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_setr_pd(
        test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_setr_m128(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128 a;
    simde__m128 b;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -682.25), SIMDE_FLOAT32_C( -899.79), SIMDE_FLOAT32_C( -478.94), SIMDE_FLOAT32_C(  364.00)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -650.11), SIMDE_FLOAT32_C( -192.16), SIMDE_FLOAT32_C(  808.30), SIMDE_FLOAT32_C(  519.14)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -650.11), SIMDE_FLOAT32_C( -192.16),
                         SIMDE_FLOAT32_C(  808.30), SIMDE_FLOAT32_C(  519.14),
                         SIMDE_FLOAT32_C( -682.25), SIMDE_FLOAT32_C( -899.79),
                         SIMDE_FLOAT32_C( -478.94), SIMDE_FLOAT32_C(  364.00)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(   16.48), SIMDE_FLOAT32_C(  517.23), SIMDE_FLOAT32_C( -546.20), SIMDE_FLOAT32_C(  -61.05)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  715.06), SIMDE_FLOAT32_C( -476.50), SIMDE_FLOAT32_C( -479.17), SIMDE_FLOAT32_C( -869.09)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  715.06), SIMDE_FLOAT32_C( -476.50),
                         SIMDE_FLOAT32_C( -479.17), SIMDE_FLOAT32_C( -869.09),
                         SIMDE_FLOAT32_C(   16.48), SIMDE_FLOAT32_C(  517.23),
                         SIMDE_FLOAT32_C( -546.20), SIMDE_FLOAT32_C(  -61.05)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -779.37), SIMDE_FLOAT32_C(   30.06), SIMDE_FLOAT32_C( -690.77), SIMDE_FLOAT32_C(  921.96)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -173.53), SIMDE_FLOAT32_C(  887.42), SIMDE_FLOAT32_C(  309.36), SIMDE_FLOAT32_C(  929.48)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -173.53), SIMDE_FLOAT32_C(  887.42),
                         SIMDE_FLOAT32_C(  309.36), SIMDE_FLOAT32_C(  929.48),
                         SIMDE_FLOAT32_C( -779.37), SIMDE_FLOAT32_C(   30.06),
                         SIMDE_FLOAT32_C( -690.77), SIMDE_FLOAT32_C(  921.96)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  766.53), SIMDE_FLOAT32_C( -675.92), SIMDE_FLOAT32_C( -948.96), SIMDE_FLOAT32_C(  521.94)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  725.37), SIMDE_FLOAT32_C( -802.67), SIMDE_FLOAT32_C( -800.62), SIMDE_FLOAT32_C(  419.68)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  725.37), SIMDE_FLOAT32_C( -802.67),
                         SIMDE_FLOAT32_C( -800.62), SIMDE_FLOAT32_C(  419.68),
                         SIMDE_FLOAT32_C(  766.53), SIMDE_FLOAT32_C( -675.92),
                         SIMDE_FLOAT32_C( -948.96), SIMDE_FLOAT32_C(  521.94)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  133.15), SIMDE_FLOAT32_C(  853.30), SIMDE_FLOAT32_C(  295.19), SIMDE_FLOAT32_C( -233.49)),
      simde_mm_set_ps(SIMDE_FLOAT32_C(  973.48), SIMDE_FLOAT32_C(  235.18), SIMDE_FLOAT32_C(  111.09), SIMDE_FLOAT32_C( -515.37)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  973.48), SIMDE_FLOAT32_C(  235.18),
                         SIMDE_FLOAT32_C(  111.09), SIMDE_FLOAT32_C( -515.37),
                         SIMDE_FLOAT32_C(  133.15), SIMDE_FLOAT32_C(  853.30),
                         SIMDE_FLOAT32_C(  295.19), SIMDE_FLOAT32_C( -233.49)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C( -131.35), SIMDE_FLOAT32_C(  737.21), SIMDE_FLOAT32_C(  816.16), SIMDE_FLOAT32_C(  442.16)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -614.82), SIMDE_FLOAT32_C( -170.44), SIMDE_FLOAT32_C(  851.94), SIMDE_FLOAT32_C(  235.41)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -614.82), SIMDE_FLOAT32_C( -170.44),
                         SIMDE_FLOAT32_C(  851.94), SIMDE_FLOAT32_C(  235.41),
                         SIMDE_FLOAT32_C( -131.35), SIMDE_FLOAT32_C(  737.21),
                         SIMDE_FLOAT32_C(  816.16), SIMDE_FLOAT32_C(  442.16)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  291.38), SIMDE_FLOAT32_C( -442.22), SIMDE_FLOAT32_C(  756.36), SIMDE_FLOAT32_C( -768.65)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -266.91), SIMDE_FLOAT32_C( -275.67), SIMDE_FLOAT32_C( -687.10), SIMDE_FLOAT32_C(  236.32)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -266.91), SIMDE_FLOAT32_C( -275.67),
                         SIMDE_FLOAT32_C( -687.10), SIMDE_FLOAT32_C(  236.32),
                         SIMDE_FLOAT32_C(  291.38), SIMDE_FLOAT32_C( -442.22),
                         SIMDE_FLOAT32_C(  756.36), SIMDE_FLOAT32_C( -768.65)) },
    { simde_mm_set_ps(SIMDE_FLOAT32_C(  561.83), SIMDE_FLOAT32_C(  979.61), SIMDE_FLOAT32_C(   43.21), SIMDE_FLOAT32_C( -386.38)),
      simde_mm_set_ps(SIMDE_FLOAT32_C( -879.64), SIMDE_FLOAT32_C( -192.97), SIMDE_FLOAT32_C( -876.27), SIMDE_FLOAT32_C(  -36.00)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -879.64), SIMDE_FLOAT32_C( -192.97),
                         SIMDE_FLOAT32_C( -876.27), SIMDE_FLOAT32_C(  -36.00),
                         SIMDE_FLOAT32_C(  561.83), SIMDE_FLOAT32_C(  979.61),
                         SIMDE_FLOAT32_C(   43.21), SIMDE_FLOAT32_C( -386.38)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_setr_m128(test_vec[i].a, test_vec[i].b);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_setr_m128d(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128d a;
    simde__m128d b;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -456.75), SIMDE_FLOAT64_C( -671.00)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -831.34), SIMDE_FLOAT64_C(  280.05)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -831.34), SIMDE_FLOAT64_C(  280.05),
                         SIMDE_FLOAT64_C( -456.75), SIMDE_FLOAT64_C( -671.00)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  937.15), SIMDE_FLOAT64_C( -608.20)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -231.75), SIMDE_FLOAT64_C( -301.21)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -231.75), SIMDE_FLOAT64_C( -301.21),
                         SIMDE_FLOAT64_C(  937.15), SIMDE_FLOAT64_C( -608.20)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -418.68), SIMDE_FLOAT64_C( -219.09)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -262.95), SIMDE_FLOAT64_C( -857.27)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -262.95), SIMDE_FLOAT64_C( -857.27),
                         SIMDE_FLOAT64_C( -418.68), SIMDE_FLOAT64_C( -219.09)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  154.88), SIMDE_FLOAT64_C(   64.12)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  231.57), SIMDE_FLOAT64_C(  996.12)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  231.57), SIMDE_FLOAT64_C(  996.12),
                         SIMDE_FLOAT64_C(  154.88), SIMDE_FLOAT64_C(   64.12)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  -46.48), SIMDE_FLOAT64_C( -511.22)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  951.46), SIMDE_FLOAT64_C(  771.21)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  951.46), SIMDE_FLOAT64_C(  771.21),
                         SIMDE_FLOAT64_C(  -46.48), SIMDE_FLOAT64_C( -511.22)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C( -492.75), SIMDE_FLOAT64_C( -725.08)),
      simde_mm_set_pd(SIMDE_FLOAT64_C( -545.59), SIMDE_FLOAT64_C( -960.12)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -545.59), SIMDE_FLOAT64_C( -960.12),
                         SIMDE_FLOAT64_C( -492.75), SIMDE_FLOAT64_C( -725.08)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(   -9.74), SIMDE_FLOAT64_C(  -44.37)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(  854.62), SIMDE_FLOAT64_C( -942.41)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  854.62), SIMDE_FLOAT64_C( -942.41),
                         SIMDE_FLOAT64_C(   -9.74), SIMDE_FLOAT64_C(  -44.37)) },
    { simde_mm_set_pd(SIMDE_FLOAT64_C(  575.33), SIMDE_FLOAT64_C( -493.55)),
      simde_mm_set_pd(SIMDE_FLOAT64_C(   73.10), SIMDE_FLOAT64_C(   90.67)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   73.10), SIMDE_FLOAT64_C(   90.67),
                         SIMDE_FLOAT64_C(  575.33), SIMDE_FLOAT64_C( -493.55)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_setr_m128d(test_vec[i].a, test_vec[i].b);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_setr_m128i(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m128i a;
    simde__m128i b;
    simde__m256i r;
  } test_vec[8] = {
    { simde_mm_set_epi32(INT32_C(-1742712724), INT32_C( -314784100), INT32_C(  986737210), INT32_C( 1275380805)),
      simde_mm_set_epi32(INT32_C( -652328462), INT32_C(-1178876865), INT32_C(-2116026355), INT32_C(  283851183)),
      simde_mm256_set_epi32(INT32_C( -652328462), INT32_C(-1178876865), INT32_C(-2116026355), INT32_C(  283851183),
                            INT32_C(-1742712724), INT32_C( -314784100), INT32_C(  986737210), INT32_C( 1275380805)) },
    { simde_mm_set_epi32(INT32_C( 1950785462), INT32_C(-1647057227), INT32_C( 1610379205), INT32_C( -779524107)),
      simde_mm_set_epi32(INT32_C(  463748536), INT32_C( -797772071), INT32_C( 1736524491), INT32_C( 1281308863)),
      simde_mm256_set_epi32(INT32_C(  463748536), INT32_C( -797772071), INT32_C( 1736524491), INT32_C( 1281308863),
                            INT32_C( 1950785462), INT32_C(-1647057227), INT32_C( 1610379205), INT32_C( -779524107)) },
    { simde_mm_set_epi32(INT32_C(-2008212267), INT32_C(-2138916541), INT32_C(-1006728926), INT32_C(-1435438838)),
      simde_mm_set_epi32(INT32_C( -133349630), INT32_C(-1192564707), INT32_C(-2002224298), INT32_C(  917327905)),
      simde_mm256_set_epi32(INT32_C( -133349630), INT32_C(-1192564707), INT32_C(-2002224298), INT32_C(  917327905),
                            INT32_C(-2008212267), INT32_C(-2138916541), INT32_C(-1006728926), INT32_C(-1435438838)) },
    { simde_mm_set_epi32(INT32_C( -870567789), INT32_C(-1650884654), INT32_C(  516950890), INT32_C(-1478935293)),
      simde_mm_set_epi32(INT32_C(  346251173), INT32_C(   44036763), INT32_C( -453299917), INT32_C( 1313402969)),
      simde_mm256_set_epi32(INT32_C(  346251173), INT32_C(   44036763), INT32_C( -453299917), INT32_C( 1313402969),
                            INT32_C( -870567789), INT32_C(-1650884654), INT32_C(  516950890), INT32_C(-1478935293)) },
    { simde_mm_set_epi32(INT32_C( 1685745491), INT32_C(-1092039924), INT32_C( 1442866872), INT32_C(-1523614432)),
      simde_mm_set_epi32(INT32_C(  863519834), INT32_C(  230110187), INT32_C(  812017634), INT32_C( 1688191143)),
      simde_mm256_set_epi32(INT32_C(  863519834), INT32_C(  230110187), INT32_C(  812017634), INT32_C( 1688191143),
                            INT32_C( 1685745491), INT32_C(-1092039924), INT32_C( 1442866872), INT32_C(-1523614432)) },
    { simde_mm_set_epi32(INT32_C(  245453619), INT32_C( 1778016121), INT32_C(   58675090), INT32_C( 1219256368)),
      simde_mm_set_epi32(INT32_C( 1174470085), INT32_C( -388376691), INT32_C( -990477533), INT32_C( -476034642)),
      simde_mm256_set_epi32(INT32_C( 1174470085), INT32_C( -388376691), INT32_C( -990477533), INT32_C( -476034642),
                            INT32_C(  245453619), INT32_C( 1778016121), INT32_C(   58675090), INT32_C( 1219256368)) },
    { simde_mm_set_epi32(INT32_C(    4875253), INT32_C(-1938130041), INT32_C( -829985839), INT32_C( 1737785848)),
      simde_mm_set_epi32(INT32_C(  645358488), INT32_C(   69189244), INT32_C( 1744086784), INT32_C(  473310154)),
      simde_mm256_set_epi32(INT32_C(  645358488), INT32_C(   69189244), INT32_C( 1744086784), INT32_C(  473310154),
                            INT32_C(    4875253), INT32_C(-1938130041), INT32_C( -829985839), INT32_C( 1737785848)) },
    { simde_mm_set_epi32(INT32_C(  804470839), INT32_C(-1989324616), INT32_C( 2138294939), INT32_C(  -20370473)),
      simde_mm_set_epi32(INT32_C(  587534668), INT32_C(  665646160), INT32_C(-1572975914), INT32_C( 1262599280)),
      simde_mm256_set_epi32(INT32_C(  587534668), INT32_C(  665646160), INT32_C(-1572975914), INT32_C( 1262599280),
                            INT32_C(  804470839), INT32_C(-1989324616), INT32_C( 2138294939), INT32_C(  -20370473)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256i r = simde_mm256_setr_m128i(test_vec[i].a, test_vec[i].b);
    simde_assert_m256i_i32(r, ==, test_vec[i].r);
  }

  return 0;
}

static int
test_simde_mm256_shuffle_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 b;
    simde__m256 r1;
    simde__m256 r2;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -37.53), SIMDE_FLOAT32_C(  505.45),
                         SIMDE_FLOAT32_C( -772.05), SIMDE_FLOAT32_C( -524.38),
                         SIMDE_FLOAT32_C(   32.28), SIMDE_FLOAT32_C(  575.28),
                         SIMDE_FLOAT32_C(  459.50), SIMDE_FLOAT32_C( -869.92)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  190.31), SIMDE_FLOAT32_C( -827.59),
                         SIMDE_FLOAT32_C( -501.09), SIMDE_FLOAT32_C(  667.40),
                         SIMDE_FLOAT32_C( -205.26), SIMDE_FLOAT32_C(  908.59),
                         SIMDE_FLOAT32_C(  448.39), SIMDE_FLOAT32_C( -264.01)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -501.09), SIMDE_FLOAT32_C( -501.09),
                         SIMDE_FLOAT32_C( -772.05), SIMDE_FLOAT32_C( -772.05),
                         SIMDE_FLOAT32_C(  448.39), SIMDE_FLOAT32_C(  448.39),
                         SIMDE_FLOAT32_C(  459.50), SIMDE_FLOAT32_C(  459.50)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -827.59),
                         SIMDE_FLOAT32_C(  505.45), SIMDE_FLOAT32_C(  505.45),
                         SIMDE_FLOAT32_C(  908.59), SIMDE_FLOAT32_C(  908.59),
                         SIMDE_FLOAT32_C(  575.28), SIMDE_FLOAT32_C(  575.28)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -651.57), SIMDE_FLOAT32_C( -282.66),
                         SIMDE_FLOAT32_C( -530.16), SIMDE_FLOAT32_C( -552.16),
                         SIMDE_FLOAT32_C(  586.68), SIMDE_FLOAT32_C(  706.29),
                         SIMDE_FLOAT32_C(  537.21), SIMDE_FLOAT32_C( -450.04)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -642.32), SIMDE_FLOAT32_C( -725.64),
                         SIMDE_FLOAT32_C(  162.03), SIMDE_FLOAT32_C(  624.91),
                         SIMDE_FLOAT32_C(  415.33), SIMDE_FLOAT32_C(  -62.25),
                         SIMDE_FLOAT32_C(  445.83), SIMDE_FLOAT32_C( -888.88)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  162.03), SIMDE_FLOAT32_C(  162.03),
                         SIMDE_FLOAT32_C( -530.16), SIMDE_FLOAT32_C( -530.16),
                         SIMDE_FLOAT32_C(  445.83), SIMDE_FLOAT32_C(  445.83),
                         SIMDE_FLOAT32_C(  537.21), SIMDE_FLOAT32_C(  537.21)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -725.64), SIMDE_FLOAT32_C( -725.64),
                         SIMDE_FLOAT32_C( -282.66), SIMDE_FLOAT32_C( -282.66),
                         SIMDE_FLOAT32_C(  -62.25), SIMDE_FLOAT32_C(  -62.25),
                         SIMDE_FLOAT32_C(  706.29), SIMDE_FLOAT32_C(  706.29)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -586.61), SIMDE_FLOAT32_C( -444.58),
                         SIMDE_FLOAT32_C(  804.56), SIMDE_FLOAT32_C( -661.78),
                         SIMDE_FLOAT32_C( -398.96), SIMDE_FLOAT32_C(  555.99),
                         SIMDE_FLOAT32_C(  255.05), SIMDE_FLOAT32_C(  326.05)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   59.74), SIMDE_FLOAT32_C(  544.15),
                         SIMDE_FLOAT32_C(  414.09), SIMDE_FLOAT32_C(   11.73),
                         SIMDE_FLOAT32_C(  678.11), SIMDE_FLOAT32_C(  264.09),
                         SIMDE_FLOAT32_C(  492.67), SIMDE_FLOAT32_C( -690.47)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  414.09), SIMDE_FLOAT32_C(  414.09),
                         SIMDE_FLOAT32_C(  804.56), SIMDE_FLOAT32_C(  804.56),
                         SIMDE_FLOAT32_C(  492.67), SIMDE_FLOAT32_C(  492.67),
                         SIMDE_FLOAT32_C(  255.05), SIMDE_FLOAT32_C(  255.05)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  544.15), SIMDE_FLOAT32_C(  544.15),
                         SIMDE_FLOAT32_C( -444.58), SIMDE_FLOAT32_C( -444.58),
                         SIMDE_FLOAT32_C(  264.09), SIMDE_FLOAT32_C(  264.09),
                         SIMDE_FLOAT32_C(  555.99), SIMDE_FLOAT32_C(  555.99)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -738.46), SIMDE_FLOAT32_C(  931.97),
                         SIMDE_FLOAT32_C( -722.34), SIMDE_FLOAT32_C( -600.75),
                         SIMDE_FLOAT32_C( -215.41), SIMDE_FLOAT32_C( -472.40),
                         SIMDE_FLOAT32_C(  -60.64), SIMDE_FLOAT32_C(  120.78)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  773.86), SIMDE_FLOAT32_C( -298.57),
                         SIMDE_FLOAT32_C(  440.00), SIMDE_FLOAT32_C( -205.51),
                         SIMDE_FLOAT32_C( -237.18), SIMDE_FLOAT32_C( -760.22),
                         SIMDE_FLOAT32_C( -446.09), SIMDE_FLOAT32_C( -381.56)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  440.00), SIMDE_FLOAT32_C(  440.00),
                         SIMDE_FLOAT32_C( -722.34), SIMDE_FLOAT32_C( -722.34),
                         SIMDE_FLOAT32_C( -446.09), SIMDE_FLOAT32_C( -446.09),
                         SIMDE_FLOAT32_C(  -60.64), SIMDE_FLOAT32_C(  -60.64)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -298.57), SIMDE_FLOAT32_C( -298.57),
                         SIMDE_FLOAT32_C(  931.97), SIMDE_FLOAT32_C(  931.97),
                         SIMDE_FLOAT32_C( -760.22), SIMDE_FLOAT32_C( -760.22),
                         SIMDE_FLOAT32_C( -472.40), SIMDE_FLOAT32_C( -472.40)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -868.52), SIMDE_FLOAT32_C( -561.35),
                         SIMDE_FLOAT32_C( -571.37), SIMDE_FLOAT32_C(  511.95),
                         SIMDE_FLOAT32_C(  794.40), SIMDE_FLOAT32_C(  468.29),
                         SIMDE_FLOAT32_C(  949.07), SIMDE_FLOAT32_C(  504.01)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -600.88), SIMDE_FLOAT32_C( -128.06),
                         SIMDE_FLOAT32_C(   -3.70), SIMDE_FLOAT32_C( -620.63),
                         SIMDE_FLOAT32_C(  888.33), SIMDE_FLOAT32_C(  864.93),
                         SIMDE_FLOAT32_C( -548.30), SIMDE_FLOAT32_C(  693.70)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   -3.70), SIMDE_FLOAT32_C(   -3.70),
                         SIMDE_FLOAT32_C( -571.37), SIMDE_FLOAT32_C( -571.37),
                         SIMDE_FLOAT32_C( -548.30), SIMDE_FLOAT32_C( -548.30),
                         SIMDE_FLOAT32_C(  949.07), SIMDE_FLOAT32_C(  949.07)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -128.06), SIMDE_FLOAT32_C( -128.06),
                         SIMDE_FLOAT32_C( -561.35), SIMDE_FLOAT32_C( -561.35),
                         SIMDE_FLOAT32_C(  864.93), SIMDE_FLOAT32_C(  864.93),
                         SIMDE_FLOAT32_C(  468.29), SIMDE_FLOAT32_C(  468.29)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -274.24), SIMDE_FLOAT32_C( -910.17),
                         SIMDE_FLOAT32_C(  548.80), SIMDE_FLOAT32_C( -838.00),
                         SIMDE_FLOAT32_C( -379.63), SIMDE_FLOAT32_C(  775.00),
                         SIMDE_FLOAT32_C( -238.61), SIMDE_FLOAT32_C( -278.26)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   28.03), SIMDE_FLOAT32_C( -206.70),
                         SIMDE_FLOAT32_C(  -80.05), SIMDE_FLOAT32_C(  380.68),
                         SIMDE_FLOAT32_C(  342.48), SIMDE_FLOAT32_C(  525.81),
                         SIMDE_FLOAT32_C( -202.62), SIMDE_FLOAT32_C(  412.48)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  -80.05), SIMDE_FLOAT32_C(  -80.05),
                         SIMDE_FLOAT32_C(  548.80), SIMDE_FLOAT32_C(  548.80),
                         SIMDE_FLOAT32_C( -202.62), SIMDE_FLOAT32_C( -202.62),
                         SIMDE_FLOAT32_C( -238.61), SIMDE_FLOAT32_C( -238.61)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -206.70), SIMDE_FLOAT32_C( -206.70),
                         SIMDE_FLOAT32_C( -910.17), SIMDE_FLOAT32_C( -910.17),
                         SIMDE_FLOAT32_C(  525.81), SIMDE_FLOAT32_C(  525.81),
                         SIMDE_FLOAT32_C(  775.00), SIMDE_FLOAT32_C(  775.00)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  270.95), SIMDE_FLOAT32_C(  727.79),
                         SIMDE_FLOAT32_C(  361.48), SIMDE_FLOAT32_C(  843.29),
                         SIMDE_FLOAT32_C( -519.67), SIMDE_FLOAT32_C( -181.50),
                         SIMDE_FLOAT32_C( -112.74), SIMDE_FLOAT32_C(  545.62)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -132.55), SIMDE_FLOAT32_C( -718.86),
                         SIMDE_FLOAT32_C(  142.59), SIMDE_FLOAT32_C(  742.01),
                         SIMDE_FLOAT32_C(  593.39), SIMDE_FLOAT32_C(  515.42),
                         SIMDE_FLOAT32_C(  897.24), SIMDE_FLOAT32_C(  759.74)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  142.59), SIMDE_FLOAT32_C(  142.59),
                         SIMDE_FLOAT32_C(  361.48), SIMDE_FLOAT32_C(  361.48),
                         SIMDE_FLOAT32_C(  897.24), SIMDE_FLOAT32_C(  897.24),
                         SIMDE_FLOAT32_C( -112.74), SIMDE_FLOAT32_C( -112.74)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -718.86), SIMDE_FLOAT32_C( -718.86),
                         SIMDE_FLOAT32_C(  727.79), SIMDE_FLOAT32_C(  727.79),
                         SIMDE_FLOAT32_C(  515.42), SIMDE_FLOAT32_C(  515.42),
                         SIMDE_FLOAT32_C( -181.50), SIMDE_FLOAT32_C( -181.50)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -964.03), SIMDE_FLOAT32_C(  334.31),
                         SIMDE_FLOAT32_C( -520.63), SIMDE_FLOAT32_C(  -60.01),
                         SIMDE_FLOAT32_C(  788.31), SIMDE_FLOAT32_C( -532.00),
                         SIMDE_FLOAT32_C(  146.02), SIMDE_FLOAT32_C(  -45.94)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -225.66), SIMDE_FLOAT32_C( -255.31),
                         SIMDE_FLOAT32_C(  440.71), SIMDE_FLOAT32_C( -673.25),
                         SIMDE_FLOAT32_C( -649.50), SIMDE_FLOAT32_C( -704.29),
                         SIMDE_FLOAT32_C(  340.20), SIMDE_FLOAT32_C( -395.47)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(  440.71), SIMDE_FLOAT32_C(  440.71),
                         SIMDE_FLOAT32_C( -520.63), SIMDE_FLOAT32_C( -520.63),
                         SIMDE_FLOAT32_C(  340.20), SIMDE_FLOAT32_C(  340.20),
                         SIMDE_FLOAT32_C(  146.02), SIMDE_FLOAT32_C(  146.02)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C( -255.31), SIMDE_FLOAT32_C( -255.31),
                         SIMDE_FLOAT32_C(  334.31), SIMDE_FLOAT32_C(  334.31),
                         SIMDE_FLOAT32_C( -704.29), SIMDE_FLOAT32_C( -704.29),
                         SIMDE_FLOAT32_C( -532.00), SIMDE_FLOAT32_C( -532.00)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r;

    r = simde_mm256_shuffle_ps(test_vec[i].a, test_vec[i].b, 0x55);
    simde_assert_m256_close(r, test_vec[i].r1, 1);

    r = simde_mm256_shuffle_ps(test_vec[i].a, test_vec[i].b, 0xaa);
    simde_assert_m256_close(r, test_vec[i].r2, 1);
  }

  return 0;
}

static int
test_simde_mm256_shuffle_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d b;
    simde__m256d r1;
    simde__m256d r2;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  934.66), SIMDE_FLOAT64_C( -881.67),
                         SIMDE_FLOAT64_C(  836.94), SIMDE_FLOAT64_C( -777.20)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -249.31), SIMDE_FLOAT64_C(  364.30),
                         SIMDE_FLOAT64_C( -553.11), SIMDE_FLOAT64_C( -269.32)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  364.30), SIMDE_FLOAT64_C(  934.66),
                         SIMDE_FLOAT64_C( -269.32), SIMDE_FLOAT64_C(  836.94)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -249.31), SIMDE_FLOAT64_C( -881.67),
                         SIMDE_FLOAT64_C( -553.11), SIMDE_FLOAT64_C( -777.20)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -570.38), SIMDE_FLOAT64_C(  768.57),
                         SIMDE_FLOAT64_C(  912.15), SIMDE_FLOAT64_C(  -23.81)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -107.98), SIMDE_FLOAT64_C( -226.33),
                         SIMDE_FLOAT64_C(  924.14), SIMDE_FLOAT64_C( -792.70)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -226.33), SIMDE_FLOAT64_C( -570.38),
                         SIMDE_FLOAT64_C( -792.70), SIMDE_FLOAT64_C(  912.15)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -107.98), SIMDE_FLOAT64_C(  768.57),
                         SIMDE_FLOAT64_C(  924.14), SIMDE_FLOAT64_C(  -23.81)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -307.34), SIMDE_FLOAT64_C(  256.70),
                         SIMDE_FLOAT64_C(  615.34), SIMDE_FLOAT64_C(  966.02)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   80.56), SIMDE_FLOAT64_C( -102.88),
                         SIMDE_FLOAT64_C(  558.25), SIMDE_FLOAT64_C(  907.54)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -102.88), SIMDE_FLOAT64_C( -307.34),
                         SIMDE_FLOAT64_C(  907.54), SIMDE_FLOAT64_C(  615.34)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   80.56), SIMDE_FLOAT64_C(  256.70),
                         SIMDE_FLOAT64_C(  558.25), SIMDE_FLOAT64_C(  966.02)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C( -428.91), SIMDE_FLOAT64_C( -946.94),
                         SIMDE_FLOAT64_C( -242.51), SIMDE_FLOAT64_C(  207.30)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  871.74), SIMDE_FLOAT64_C(  294.25),
                         SIMDE_FLOAT64_C(  -23.76), SIMDE_FLOAT64_C(  857.02)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  294.25), SIMDE_FLOAT64_C( -428.91),
                         SIMDE_FLOAT64_C(  857.02), SIMDE_FLOAT64_C( -242.51)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  871.74), SIMDE_FLOAT64_C( -946.94),
                         SIMDE_FLOAT64_C(  -23.76), SIMDE_FLOAT64_C(  207.30)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  684.37), SIMDE_FLOAT64_C(  -77.07),
                         SIMDE_FLOAT64_C( -492.40), SIMDE_FLOAT64_C( -711.90)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  911.18), SIMDE_FLOAT64_C( -875.79),
                         SIMDE_FLOAT64_C(  168.17), SIMDE_FLOAT64_C( -582.90)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -875.79), SIMDE_FLOAT64_C(  684.37),
                         SIMDE_FLOAT64_C( -582.90), SIMDE_FLOAT64_C( -492.40)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  911.18), SIMDE_FLOAT64_C(  -77.07),
                         SIMDE_FLOAT64_C(  168.17), SIMDE_FLOAT64_C( -711.90)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  844.28), SIMDE_FLOAT64_C( -547.02),
                         SIMDE_FLOAT64_C( -536.51), SIMDE_FLOAT64_C( -341.28)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.14), SIMDE_FLOAT64_C(   35.47),
                         SIMDE_FLOAT64_C(  536.74), SIMDE_FLOAT64_C(  843.54)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   35.47), SIMDE_FLOAT64_C(  844.28),
                         SIMDE_FLOAT64_C(  843.54), SIMDE_FLOAT64_C( -536.51)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.14), SIMDE_FLOAT64_C( -547.02),
                         SIMDE_FLOAT64_C(  536.74), SIMDE_FLOAT64_C( -341.28)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  597.24), SIMDE_FLOAT64_C(   73.58),
                         SIMDE_FLOAT64_C(  575.62), SIMDE_FLOAT64_C( -337.42)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  430.69), SIMDE_FLOAT64_C( -764.62),
                         SIMDE_FLOAT64_C(  152.29), SIMDE_FLOAT64_C(  529.08)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -764.62), SIMDE_FLOAT64_C(  597.24),
                         SIMDE_FLOAT64_C(  529.08), SIMDE_FLOAT64_C(  575.62)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  430.69), SIMDE_FLOAT64_C(   73.58),
                         SIMDE_FLOAT64_C(  152.29), SIMDE_FLOAT64_C( -337.42)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  276.59), SIMDE_FLOAT64_C(  918.52),
                         SIMDE_FLOAT64_C(  859.45), SIMDE_FLOAT64_C(   26.68)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -365.57), SIMDE_FLOAT64_C(  780.68),
                         SIMDE_FLOAT64_C(  333.70), SIMDE_FLOAT64_C( -391.20)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(  780.68), SIMDE_FLOAT64_C(  276.59),
                         SIMDE_FLOAT64_C( -391.20), SIMDE_FLOAT64_C(  859.45)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C( -365.57), SIMDE_FLOAT64_C(  918.52),
                         SIMDE_FLOAT64_C(  333.70), SIMDE_FLOAT64_C(   26.68)) }
  };
  uint64_t a[] = {1, 0, 0, 0};
  uint64_t b[] = {0, 0, 1, 0};
  int64_t target[4] = {INT64_C(1),  INT64_C(0),  INT64_C(0),  INT64_C(0) };
  simde__m256d r, tmp_0_yd, tmp_1_yd;

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {

    r = simde_mm256_shuffle_pd(test_vec[i].a, test_vec[i].b, 0x5);
    simde_assert_m256d_close(r, test_vec[i].r1, 1);

    r = simde_mm256_shuffle_pd(test_vec[i].a, test_vec[i].b, 0xa);
    simde_assert_m256d_close(r, test_vec[i].r2, 1);
  }

  tmp_0_yd = simde_mm256_loadu_pd(HEDLEY_REINTERPRET_CAST(double*, a));
  tmp_1_yd = simde_mm256_loadu_pd(HEDLEY_REINTERPRET_CAST(double*, b));
  r = simde_mm256_shuffle_pd(tmp_0_yd, tmp_1_yd, 0xc); // 0b1100

  simde_test_x86_assert_equal_i64x4(simde_mm256_castpd_si256(r), simde_mm256_loadu_epi64(target));

  //simde_test_x86_write_i64x4(2, simde_mm256_castpd_si256(r), SIMDE_TEST_VEC_POS_LAST);

  return 0;
}

static int
test_simde_mm256_sqrt_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    simde__m256 r;
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   37.27), SIMDE_FLOAT32_C(  842.37),
                         SIMDE_FLOAT32_C(  821.35), SIMDE_FLOAT32_C(  882.42),
                         SIMDE_FLOAT32_C(  506.85), SIMDE_FLOAT32_C(  418.78),
                         SIMDE_FLOAT32_C(  759.20), SIMDE_FLOAT32_C(  903.29)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(    6.10), SIMDE_FLOAT32_C(   29.02),
                         SIMDE_FLOAT32_C(   28.66), SIMDE_FLOAT32_C(   29.71),
                         SIMDE_FLOAT32_C(   22.51), SIMDE_FLOAT32_C(   20.46),
                         SIMDE_FLOAT32_C(   27.55), SIMDE_FLOAT32_C(   30.05)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  675.11), SIMDE_FLOAT32_C(  473.44),
                         SIMDE_FLOAT32_C(  936.76), SIMDE_FLOAT32_C(  315.53),
                         SIMDE_FLOAT32_C(  585.70), SIMDE_FLOAT32_C(  466.99),
                         SIMDE_FLOAT32_C(  876.99), SIMDE_FLOAT32_C(  421.09)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   25.98), SIMDE_FLOAT32_C(   21.76),
                         SIMDE_FLOAT32_C(   30.61), SIMDE_FLOAT32_C(   17.76),
                         SIMDE_FLOAT32_C(   24.20), SIMDE_FLOAT32_C(   21.61),
                         SIMDE_FLOAT32_C(   29.61), SIMDE_FLOAT32_C(   20.52)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  455.63), SIMDE_FLOAT32_C(  708.48),
                         SIMDE_FLOAT32_C(  426.65), SIMDE_FLOAT32_C(   16.24),
                         SIMDE_FLOAT32_C(  899.49), SIMDE_FLOAT32_C(  710.23),
                         SIMDE_FLOAT32_C(  195.07), SIMDE_FLOAT32_C(  877.55)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   21.35), SIMDE_FLOAT32_C(   26.62),
                         SIMDE_FLOAT32_C(   20.66), SIMDE_FLOAT32_C(    4.03),
                         SIMDE_FLOAT32_C(   29.99), SIMDE_FLOAT32_C(   26.65),
                         SIMDE_FLOAT32_C(   13.97), SIMDE_FLOAT32_C(   29.62)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  247.65), SIMDE_FLOAT32_C(  650.69),
                         SIMDE_FLOAT32_C(  691.01), SIMDE_FLOAT32_C(  931.91),
                         SIMDE_FLOAT32_C(  760.76), SIMDE_FLOAT32_C(  925.05),
                         SIMDE_FLOAT32_C(  438.39), SIMDE_FLOAT32_C(  204.75)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   15.74), SIMDE_FLOAT32_C(   25.51),
                         SIMDE_FLOAT32_C(   26.29), SIMDE_FLOAT32_C(   30.53),
                         SIMDE_FLOAT32_C(   27.58), SIMDE_FLOAT32_C(   30.41),
                         SIMDE_FLOAT32_C(   20.94), SIMDE_FLOAT32_C(   14.31)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  281.85), SIMDE_FLOAT32_C(  525.43),
                         SIMDE_FLOAT32_C(   50.88), SIMDE_FLOAT32_C(  685.15),
                         SIMDE_FLOAT32_C(  223.40), SIMDE_FLOAT32_C(  911.30),
                         SIMDE_FLOAT32_C(   97.50), SIMDE_FLOAT32_C(  436.55)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   16.79), SIMDE_FLOAT32_C(   22.92),
                         SIMDE_FLOAT32_C(    7.13), SIMDE_FLOAT32_C(   26.18),
                         SIMDE_FLOAT32_C(   14.95), SIMDE_FLOAT32_C(   30.19),
                         SIMDE_FLOAT32_C(    9.87), SIMDE_FLOAT32_C(   20.89)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  420.63), SIMDE_FLOAT32_C(  643.40),
                         SIMDE_FLOAT32_C(  474.44), SIMDE_FLOAT32_C(  474.06),
                         SIMDE_FLOAT32_C(  331.02), SIMDE_FLOAT32_C(  191.18),
                         SIMDE_FLOAT32_C(  614.70), SIMDE_FLOAT32_C(  135.59)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   20.51), SIMDE_FLOAT32_C(   25.37),
                         SIMDE_FLOAT32_C(   21.78), SIMDE_FLOAT32_C(   21.77),
                         SIMDE_FLOAT32_C(   18.19), SIMDE_FLOAT32_C(   13.83),
                         SIMDE_FLOAT32_C(   24.79), SIMDE_FLOAT32_C(   11.64)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  777.57), SIMDE_FLOAT32_C(  684.09),
                         SIMDE_FLOAT32_C(  183.20), SIMDE_FLOAT32_C(  761.60),
                         SIMDE_FLOAT32_C(  226.72), SIMDE_FLOAT32_C(  710.47),
                         SIMDE_FLOAT32_C(  853.48), SIMDE_FLOAT32_C(  115.80)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   27.88), SIMDE_FLOAT32_C(   26.16),
                         SIMDE_FLOAT32_C(   13.54), SIMDE_FLOAT32_C(   27.60),
                         SIMDE_FLOAT32_C(   15.06), SIMDE_FLOAT32_C(   26.65),
                         SIMDE_FLOAT32_C(   29.21), SIMDE_FLOAT32_C(   10.76)) },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  797.47), SIMDE_FLOAT32_C(  634.08),
                         SIMDE_FLOAT32_C(  881.12), SIMDE_FLOAT32_C(  697.30),
                         SIMDE_FLOAT32_C(  189.46), SIMDE_FLOAT32_C(   47.99),
                         SIMDE_FLOAT32_C(   85.88), SIMDE_FLOAT32_C(  938.36)),
      simde_mm256_set_ps(SIMDE_FLOAT32_C(   28.24), SIMDE_FLOAT32_C(   25.18),
                         SIMDE_FLOAT32_C(   29.68), SIMDE_FLOAT32_C(   26.41),
                         SIMDE_FLOAT32_C(   13.76), SIMDE_FLOAT32_C(    6.93),
                         SIMDE_FLOAT32_C(    9.27), SIMDE_FLOAT32_C(   30.63)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256 r = simde_mm256_sqrt_ps(test_vec[i].a);
    simde_assert_m256_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_setzero_ps(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256 r =
    simde_mm256_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                       SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                       SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00),
                       SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00));

  simde__m256 res = simde_mm256_setzero_ps();
  simde_assert_m256_close(r, res, 1);

  return 0;
}

static int
test_simde_mm256_setzero_pd(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256d r =
    simde_mm256_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00),
                       SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(    0.00));

  simde__m256d res = simde_mm256_setzero_pd();
  simde_assert_m256d_close(r, res, 1);

  return 0;
}

static int
test_simde_mm256_setzero_si256(SIMDE_MUNIT_TEST_ARGS) {
  simde__m256i r =
    simde_mm256_set_epi32(INT32_C(0), INT32_C(0), INT32_C(0), INT32_C(0),
                          INT32_C(0), INT32_C(0), INT32_C(0), INT32_C(0));

  simde__m256i res = simde_mm256_setzero_si256();
  simde_assert_m256i_i32(r, ==, res);

  return 0;
}

static int
test_simde_mm256_sqrt_pd(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256d a;
    simde__m256d r;
  } test_vec[8] = {
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  121.95), SIMDE_FLOAT64_C(  169.21),
                         SIMDE_FLOAT64_C(  224.34), SIMDE_FLOAT64_C(  661.75)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   11.04), SIMDE_FLOAT64_C(   13.01),
                         SIMDE_FLOAT64_C(   14.98), SIMDE_FLOAT64_C(   25.72)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   41.79), SIMDE_FLOAT64_C(   48.53),
                         SIMDE_FLOAT64_C(   17.25), SIMDE_FLOAT64_C(  585.21)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    6.46), SIMDE_FLOAT64_C(    6.97),
                         SIMDE_FLOAT64_C(    4.15), SIMDE_FLOAT64_C(   24.19)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  323.54), SIMDE_FLOAT64_C(   12.60),
                         SIMDE_FLOAT64_C(  916.80), SIMDE_FLOAT64_C(  392.02)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   17.99), SIMDE_FLOAT64_C(    3.55),
                         SIMDE_FLOAT64_C(   30.28), SIMDE_FLOAT64_C(   19.80)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  317.36), SIMDE_FLOAT64_C(  248.26),
                         SIMDE_FLOAT64_C(   48.91), SIMDE_FLOAT64_C(  291.13)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   17.81), SIMDE_FLOAT64_C(   15.76),
                         SIMDE_FLOAT64_C(    6.99), SIMDE_FLOAT64_C(   17.06)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  476.37), SIMDE_FLOAT64_C(  799.71),
                         SIMDE_FLOAT64_C(  234.23), SIMDE_FLOAT64_C(  908.93)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   21.83), SIMDE_FLOAT64_C(   28.28),
                         SIMDE_FLOAT64_C(   15.30), SIMDE_FLOAT64_C(   30.15)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  937.16), SIMDE_FLOAT64_C(  886.92),
                         SIMDE_FLOAT64_C(  703.77), SIMDE_FLOAT64_C(  383.22)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   30.61), SIMDE_FLOAT64_C(   29.78),
                         SIMDE_FLOAT64_C(   26.53), SIMDE_FLOAT64_C(   19.58)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(   36.08), SIMDE_FLOAT64_C(  932.02),
                         SIMDE_FLOAT64_C(  592.55), SIMDE_FLOAT64_C(  593.01)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(    6.01), SIMDE_FLOAT64_C(   30.53),
                         SIMDE_FLOAT64_C(   24.34), SIMDE_FLOAT64_C(   24.35)) },
    { simde_mm256_set_pd(SIMDE_FLOAT64_C(  436.31), SIMDE_FLOAT64_C(  915.76),
                         SIMDE_FLOAT64_C(  575.57), SIMDE_FLOAT64_C(  268.70)),
      simde_mm256_set_pd(SIMDE_FLOAT64_C(   20.89), SIMDE_FLOAT64_C(   30.26),
                         SIMDE_FLOAT64_C(   23.99), SIMDE_FLOAT64_C(   16.39)) }
  };

  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
    simde__m256d r = simde_mm256_sqrt_pd(test_vec[i].a);
    simde_assert_m256d_close(r, test_vec[i].r, 1);
  }

  return 0;
}

static int
test_simde_mm256_store_ps(SIMDE_MUNIT_TEST_ARGS) {
  const struct {
    simde__m256 a;
    SIMDE_ALIGN_LIKE_32(simde__m256) simde_float32 r[8];
  } test_vec[8] = {
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  256.09), SIMDE_FLOAT32_C(  768.79),
                         SIMDE_FLOAT32_C(  201.90), SIMDE_FLOAT32_C(  339.33),
                         SIMDE_FLOAT32_C(  957.46), SIMDE_FLOAT32_C(  728.44),
                         SIMDE_FLOAT32_C(   73.67), SIMDE_FLOAT32_C(  440.11)),
      { SIMDE_FLOAT32_C(  440.11), SIMDE_FLOAT32_C(   73.67),
        SIMDE_FLOAT32_C(  728.44), SIMDE_FLOAT32_C(  957.46),
        SIMDE_FLOAT32_C(  339.33), SIMDE_FLOAT32_C(  201.90),
        SIMDE_FLOAT32_C(  768.79), SIMDE_FLOAT32_C(  256.09) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(  -72.04), SIMDE_FLOAT32_C( -425.25),
                         SIMDE_FLOAT32_C(  471.77), SIMDE_FLOAT32_C(  976.75),
                         SIMDE_FLOAT32_C( -510.20), SIMDE_FLOAT32_C(  696.54),
                         SIMDE_FLOAT32_C( -843.54), SIMDE_FLOAT32_C( -868.41)),
      { SIMDE_FLOAT32_C( -868.41), SIMDE_FLOAT32_C( -843.54),
        SIMDE_FLOAT32_C(  696.54), SIMDE_FLOAT32_C( -510.20),
        SIMDE_FLOAT32_C(  976.75), SIMDE_FLOAT32_C(  471.77),
        SIMDE_FLOAT32_C( -425.25), SIMDE_FLOAT32_C(  -72.04) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -304.23), SIMDE_FLOAT32_C(  477.48),
                         SIMDE_FLOAT32_C(  356.58), SIMDE_FLOAT32_C(  955.81),
                         SIMDE_FLOAT32_C(  999.99), SIMDE_FLOAT32_C(  487.33),
                         SIMDE_FLOAT32_C(  633.61), SIMDE_FLOAT32_C(  518.11)),
      { SIMDE_FLOAT32_C(  518.11), SIMDE_FLOAT32_C(  633.61),
        SIMDE_FLOAT32_C(  487.33), SIMDE_FLOAT32_C(  999.99),
        SIMDE_FLOAT32_C(  955.81), SIMDE_FLOAT32_C(  356.58),
        SIMDE_FLOAT32_C(  477.48), SIMDE_FLOAT32_C( -304.23) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C(   88.04), SIMDE_FLOAT32_C( -296.25),
                         SIMDE_FLOAT32_C(  948.33), SIMDE_FLOAT32_C( -584.17),
                         SIMDE_FLOAT32_C( -796.11), SIMDE_FLOAT32_C(  -38.84),
                         SIMDE_FLOAT32_C( -706.11), SIMDE_FLOAT32_C(  347.32)),
      { SIMDE_FLOAT32_C(  347.32), SIMDE_FLOAT32_C( -706.11),
        SIMDE_FLOAT32_C(  -38.84), SIMDE_FLOAT32_C( -796.11),
        SIMDE_FLOAT32_C( -584.17), SIMDE_FLOAT32_C(  948.33),
        SIMDE_FLOAT32_C( -296.25), SIMDE_FLOAT32_C(   88.04) } },
    { simde_mm256_set_ps(SIMDE_FLOAT32_C( -143.76), SIMDE_FLOAT32_C(  504.36),
                         SIMDE_FLOAT32_C( -154.12), SIMDE_FLOAT32_C(  375.43),
                         SIMDE_FLOAT32_C( -307.18), SIMDE_FLOAT32_C(  256.93),
                         SIMDE_FLOAT32_C(   78.86), SIMDE_FLOAT32_C(  526.77)),
      { SIMDE_FLOAT32_C(  526.77), SIMDE_FLOAT32_C(   78.86),
        SIMDE_FLOAT32_C(  256.93), SIMDE_FLOAT32_C(