2828#define SIMDE_ARM_NEON_ADDW_HIGH_H
2929
3030#include "types.h"
31- #include "movl_high .h"
32- #include "add .h"
31+ #include "get_high .h"
32+ #include "addw .h"
3333
3434HEDLEY_DIAGNOSTIC_PUSH
3535SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
@@ -40,19 +40,8 @@ simde_int16x8_t
4040simde_vaddw_high_s8 (simde_int16x8_t a , simde_int8x16_t b ) {
4141 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
4242 return vaddw_high_s8 (a , b );
43- #elif SIMDE_NATURAL_VECTOR_SIZE_GE (128 )
44- return simde_vaddq_s16 (a , simde_vmovl_high_s8 (b ));
4543 #else
46- simde_int16x8_private r_ ;
47- simde_int16x8_private a_ = simde_int16x8_to_private (a );
48- simde_int8x16_private b_ = simde_int8x16_to_private (b );
49-
50- SIMDE_VECTORIZE
51- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
52- r_ .values [i ] = a_ .values [i ] + b_ .values [i + ((sizeof (b_ .values ) / sizeof (b_ .values [0 ])) / 2 )];
53- }
54-
55- return simde_int16x8_from_private (r_ );
44+ return simde_vaddw_s8 (a , simde_vget_high_s8 (b ));
5645 #endif
5746}
5847#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES )
@@ -65,19 +54,8 @@ simde_int32x4_t
6554simde_vaddw_high_s16 (simde_int32x4_t a , simde_int16x8_t b ) {
6655 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
6756 return vaddw_high_s16 (a , b );
68- #elif SIMDE_NATURAL_VECTOR_SIZE_GE (128 )
69- return simde_vaddq_s32 (a , simde_vmovl_high_s16 (b ));
7057 #else
71- simde_int32x4_private r_ ;
72- simde_int32x4_private a_ = simde_int32x4_to_private (a );
73- simde_int16x8_private b_ = simde_int16x8_to_private (b );
74-
75- SIMDE_VECTORIZE
76- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
77- r_ .values [i ] = a_ .values [i ] + b_ .values [i + ((sizeof (b_ .values ) / sizeof (b_ .values [0 ])) / 2 )];
78- }
79-
80- return simde_int32x4_from_private (r_ );
58+ return simde_vaddw_s16 (a , simde_vget_high_s16 (b ));
8159 #endif
8260}
8361#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES )
@@ -90,19 +68,8 @@ simde_int64x2_t
9068simde_vaddw_high_s32 (simde_int64x2_t a , simde_int32x4_t b ) {
9169 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
9270 return vaddw_high_s32 (a , b );
93- #elif SIMDE_NATURAL_VECTOR_SIZE_GE (128 )
94- return simde_vaddq_s64 (a , simde_vmovl_high_s32 (b ));
9571 #else
96- simde_int64x2_private r_ ;
97- simde_int64x2_private a_ = simde_int64x2_to_private (a );
98- simde_int32x4_private b_ = simde_int32x4_to_private (b );
99-
100- SIMDE_VECTORIZE
101- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
102- r_ .values [i ] = a_ .values [i ] + b_ .values [i + ((sizeof (b_ .values ) / sizeof (b_ .values [0 ])) / 2 )];
103- }
104-
105- return simde_int64x2_from_private (r_ );
72+ return simde_vaddw_s32 (a , simde_vget_high_s32 (b ));
10673 #endif
10774}
10875#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES )
@@ -115,19 +82,8 @@ simde_uint16x8_t
11582simde_vaddw_high_u8 (simde_uint16x8_t a , simde_uint8x16_t b ) {
11683 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
11784 return vaddw_high_u8 (a , b );
118- #elif SIMDE_NATURAL_VECTOR_SIZE_GE (128 )
119- return simde_vaddq_u16 (a , simde_vmovl_high_u8 (b ));
12085 #else
121- simde_uint16x8_private r_ ;
122- simde_uint16x8_private a_ = simde_uint16x8_to_private (a );
123- simde_uint8x16_private b_ = simde_uint8x16_to_private (b );
124-
125- SIMDE_VECTORIZE
126- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
127- r_ .values [i ] = a_ .values [i ] + b_ .values [i + ((sizeof (b_ .values ) / sizeof (b_ .values [0 ])) / 2 )];
128- }
129-
130- return simde_uint16x8_from_private (r_ );
86+ return simde_vaddw_u8 (a , simde_vget_high_u8 (b ));
13187 #endif
13288}
13389#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES )
@@ -140,19 +96,8 @@ simde_uint32x4_t
14096simde_vaddw_high_u16 (simde_uint32x4_t a , simde_uint16x8_t b ) {
14197 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
14298 return vaddw_high_u16 (a , b );
143- #elif SIMDE_NATURAL_VECTOR_SIZE_GE (128 )
144- return simde_vaddq_u32 (a , simde_vmovl_high_u16 (b ));
14599 #else
146- simde_uint32x4_private r_ ;
147- simde_uint32x4_private a_ = simde_uint32x4_to_private (a );
148- simde_uint16x8_private b_ = simde_uint16x8_to_private (b );
149-
150- SIMDE_VECTORIZE
151- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
152- r_ .values [i ] = a_ .values [i ] + b_ .values [i + ((sizeof (b_ .values ) / sizeof (b_ .values [0 ])) / 2 )];
153- }
154-
155- return simde_uint32x4_from_private (r_ );
100+ return simde_vaddw_u16 (a , simde_vget_high_u16 (b ));
156101 #endif
157102}
158103#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES )
@@ -165,19 +110,8 @@ simde_uint64x2_t
165110simde_vaddw_high_u32 (simde_uint64x2_t a , simde_uint32x4_t b ) {
166111 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE )
167112 return vaddw_high_u32 (a , b );
168- #elif SIMDE_NATURAL_VECTOR_SIZE_GE (128 )
169- return simde_vaddq_u64 (a , simde_vmovl_high_u32 (b ));
170113 #else
171- simde_uint64x2_private r_ ;
172- simde_uint64x2_private a_ = simde_uint64x2_to_private (a );
173- simde_uint32x4_private b_ = simde_uint32x4_to_private (b );
174-
175- SIMDE_VECTORIZE
176- for (size_t i = 0 ; i < (sizeof (r_ .values ) / sizeof (r_ .values [0 ])) ; i ++ ) {
177- r_ .values [i ] = a_ .values [i ] + b_ .values [i + ((sizeof (b_ .values ) / sizeof (b_ .values [0 ])) / 2 )];
178- }
179-
180- return simde_uint64x2_from_private (r_ );
114+ return simde_vaddw_u32 (a , simde_vget_high_u32 (b ));
181115 #endif
182116}
183117#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES )
0 commit comments