libstdc++
simd.h
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2024 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <cmath>
39 #include <functional>
40 #include <iosfwd>
41 #include <utility>
42 #include <algorithm>
43 
44 #if _GLIBCXX_SIMD_X86INTRIN
45 #include <x86intrin.h>
46 #elif _GLIBCXX_SIMD_HAVE_NEON
47 #pragma GCC diagnostic push
48 // narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
49 // 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
50 #pragma GCC diagnostic ignored "-Wnarrowing"
51 #include <arm_neon.h>
52 #pragma GCC diagnostic pop
53 #endif
54 #if _GLIBCXX_SIMD_HAVE_SVE
55 #include <arm_sve.h>
56 #endif
57 
58 /** @ingroup ts_simd
59  * @{
60  */
61 /* There are several closely related types, with the following naming
62  * convention:
63  * _Tp: vectorizable (arithmetic) type (or any type)
64  * _TV: __vector_type_t<_Tp, _Np>
65  * _TW: _SimdWrapper<_Tp, _Np>
66  * _TI: __intrinsic_type_t<_Tp, _Np>
67  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
68  * If one additional type is needed use _U instead of _T.
69  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
70  *
71  * More naming conventions:
72  * _Ap or _Abi: An ABI tag from the simd_abi namespace
73  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
74  * _IV, _IW as for _TV, _TW
75  * _Np: number of elements (not bytes)
76  * _Bytes: number of bytes
77  *
78  * Variable names:
79  * __k: mask object (vector- or bitmask)
80  */
81 _GLIBCXX_SIMD_BEGIN_NAMESPACE
82 
83 #if !_GLIBCXX_SIMD_X86INTRIN
84 using __m128 [[__gnu__::__vector_size__(16)]] = float;
85 using __m128d [[__gnu__::__vector_size__(16)]] = double;
86 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
87 using __m256 [[__gnu__::__vector_size__(32)]] = float;
88 using __m256d [[__gnu__::__vector_size__(32)]] = double;
89 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
90 using __m512 [[__gnu__::__vector_size__(64)]] = float;
91 using __m512d [[__gnu__::__vector_size__(64)]] = double;
92 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
93 #endif
94 
95 #if _GLIBCXX_SIMD_HAVE_SVE
96 constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8;
97 #else
98 constexpr inline int __sve_vectorized_size_bytes = 0;
99 #endif
100 
101 namespace simd_abi {
102 // simd_abi forward declarations {{{
103 // implementation details:
104 struct _Scalar;
105 
106 template <int _Np>
107  struct _Fixed;
108 
109 // There are two major ABIs that appear on different architectures.
110 // Both have non-boolean values packed into an N Byte register
111 // -> #elements = N / sizeof(T)
112 // Masks differ:
113 // 1. Use value vector registers for masks (all 0 or all 1)
114 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
115 // value vector
116 //
117 // Both can be partially used, masking off the rest when doing horizontal
118 // operations or operations that can trap (e.g. FP_INVALID or integer division
119 // by 0). This is encoded as the number of used bytes.
120 template <int _UsedBytes>
121  struct _VecBuiltin;
122 
123 template <int _UsedBytes>
124  struct _VecBltnBtmsk;
125 
126 template <int _UsedBytes, int _TotalBytes = __sve_vectorized_size_bytes>
127  struct _SveAbi;
128 
129 template <typename _Tp, int _Np>
130  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
131 
132 template <int _UsedBytes = 16>
133  using _Sse = _VecBuiltin<_UsedBytes>;
134 
135 template <int _UsedBytes = 32>
136  using _Avx = _VecBuiltin<_UsedBytes>;
137 
138 template <int _UsedBytes = 64>
139  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
140 
141 template <int _UsedBytes = 16>
142  using _Neon = _VecBuiltin<_UsedBytes>;
143 
144 template <int _UsedBytes = __sve_vectorized_size_bytes>
145  using _Sve = _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>;
146 
147 // implementation-defined:
148 using __sse = _Sse<>;
149 using __avx = _Avx<>;
150 using __avx512 = _Avx512<>;
151 using __neon = _Neon<>;
152 using __neon128 = _Neon<16>;
153 using __neon64 = _Neon<8>;
154 using __sve = _Sve<>;
155 
156 // standard:
157 template <typename _Tp, size_t _Np, typename...>
158  struct deduce;
159 
160 template <int _Np>
161  using fixed_size = _Fixed<_Np>;
162 
163 using scalar = _Scalar;
164 
165 // }}}
166 } // namespace simd_abi
167 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
168 template <typename _Tp>
169  struct is_simd;
170 
171 template <typename _Tp>
172  struct is_simd_mask;
173 
174 template <typename _Tp, typename _Abi>
175  class simd;
176 
177 template <typename _Tp, typename _Abi>
178  class simd_mask;
179 
180 template <typename _Tp, typename _Abi>
181  struct simd_size;
182 
183 // }}}
184 // load/store flags {{{
185 struct element_aligned_tag
186 {
187  template <typename _Tp, typename _Up = typename _Tp::value_type>
188  static constexpr size_t _S_alignment = alignof(_Up);
189 
190  template <typename _Tp, typename _Up>
191  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
192  _S_apply(_Up* __ptr)
193  { return __ptr; }
194 };
195 
196 struct vector_aligned_tag
197 {
198  template <typename _Tp, typename _Up = typename _Tp::value_type>
199  static constexpr size_t _S_alignment
200  = std::__bit_ceil(sizeof(_Up) * _Tp::size());
201 
202  template <typename _Tp, typename _Up>
203  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
204  _S_apply(_Up* __ptr)
205  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
206 };
207 
208 template <size_t _Np> struct overaligned_tag
209 {
210  template <typename _Tp, typename _Up = typename _Tp::value_type>
211  static constexpr size_t _S_alignment = _Np;
212 
213  template <typename _Tp, typename _Up>
214  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
215  _S_apply(_Up* __ptr)
216  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
217 };
218 
219 inline constexpr element_aligned_tag element_aligned = {};
220 
221 inline constexpr vector_aligned_tag vector_aligned = {};
222 
223 template <size_t _Np>
224  inline constexpr overaligned_tag<_Np> overaligned = {};
225 
226 // }}}
227 template <size_t _Xp>
228  using _SizeConstant = integral_constant<size_t, _Xp>;
229 // constexpr feature detection{{{
230 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
231 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
232 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
233 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
234 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
235 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
236 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
237 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
238 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
239 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
240 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
241 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
242 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
243 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
244 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
245 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
246 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
247 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
248 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
249 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
250 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
251 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
252 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
253 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
254 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
255 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
256 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
257 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
258 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
259 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
260 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
261 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
262 
263 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
264 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
265 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
266 constexpr inline bool __support_neon_float =
267 #if defined __GCC_IEC_559
268  __GCC_IEC_559 == 0;
269 #elif defined __FAST_MATH__
270  true;
271 #else
272  false;
273 #endif
274 
275 constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE;
276 constexpr inline bool __have_sve2 = _GLIBCXX_SIMD_HAVE_SVE2;
277 
278 #ifdef _ARCH_PWR10
279 constexpr inline bool __have_power10vec = true;
280 #else
281 constexpr inline bool __have_power10vec = false;
282 #endif
283 #ifdef __POWER9_VECTOR__
284 constexpr inline bool __have_power9vec = true;
285 #else
286 constexpr inline bool __have_power9vec = false;
287 #endif
288 #if defined __POWER8_VECTOR__
289 constexpr inline bool __have_power8vec = true;
290 #else
291 constexpr inline bool __have_power8vec = __have_power9vec;
292 #endif
293 #if defined __VSX__
294 constexpr inline bool __have_power_vsx = true;
295 #else
296 constexpr inline bool __have_power_vsx = __have_power8vec;
297 #endif
298 #if defined __ALTIVEC__
299 constexpr inline bool __have_power_vmx = true;
300 #else
301 constexpr inline bool __have_power_vmx = __have_power_vsx;
302 #endif
303 
304 // }}}
305 
306 namespace __detail
307 {
308 #ifdef math_errhandling
309  // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
310  // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
311  // must be guessed.
312  template <int = math_errhandling>
313  constexpr bool
314  __handle_fpexcept_impl(int)
315  { return math_errhandling & MATH_ERREXCEPT; }
316 #endif
317 
318  // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
319  // ignored, otherwise implement correct exception behavior.
320  constexpr bool
321  __handle_fpexcept_impl(float)
322  {
323 #if defined __FAST_MATH__
324  return false;
325 #else
326  return true;
327 #endif
328  }
329 
330  /// True if math functions must raise floating-point exceptions as specified by C17.
331  static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
332 
333  constexpr std::uint_least64_t
334  __floating_point_flags()
335  {
336  std::uint_least64_t __flags = 0;
337  if constexpr (_S_handle_fpexcept)
338  __flags |= 1;
339 #ifdef __FAST_MATH__
340  __flags |= 1 << 1;
341 #elif __FINITE_MATH_ONLY__
342  __flags |= 2 << 1;
343 #elif __GCC_IEC_559 < 2
344  __flags |= 3 << 1;
345 #endif
346  __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
347  return __flags;
348  }
349 
350  constexpr std::uint_least64_t
351  __machine_flags()
352  {
353  if constexpr (__have_mmx || __have_sse)
354  return __have_mmx
355  | (__have_sse << 1)
356  | (__have_sse2 << 2)
357  | (__have_sse3 << 3)
358  | (__have_ssse3 << 4)
359  | (__have_sse4_1 << 5)
360  | (__have_sse4_2 << 6)
361  | (__have_xop << 7)
362  | (__have_avx << 8)
363  | (__have_avx2 << 9)
364  | (__have_bmi << 10)
365  | (__have_bmi2 << 11)
366  | (__have_lzcnt << 12)
367  | (__have_sse4a << 13)
368  | (__have_fma << 14)
369  | (__have_fma4 << 15)
370  | (__have_f16c << 16)
371  | (__have_popcnt << 17)
372  | (__have_avx512f << 18)
373  | (__have_avx512dq << 19)
374  | (__have_avx512vl << 20)
375  | (__have_avx512bw << 21)
376  | (__have_avx512bitalg << 22)
377  | (__have_avx512vbmi2 << 23)
378  | (__have_avx512vbmi << 24)
379  | (__have_avx512ifma << 25)
380  | (__have_avx512cd << 26)
381  | (__have_avx512vnni << 27)
382  | (__have_avx512vpopcntdq << 28)
383  | (__have_avx512vp2intersect << 29);
384  else if constexpr (__have_neon || __have_sve)
385  return __have_neon
386  | (__have_neon_a32 << 1)
387  | (__have_neon_a64 << 2)
388  | (__have_neon_a64 << 2)
389  | (__support_neon_float << 3)
390  | (__have_sve << 4)
391  | (__have_sve2 << 5);
392  else if constexpr (__have_power_vmx)
393  return __have_power_vmx
394  | (__have_power_vsx << 1)
395  | (__have_power8vec << 2)
396  | (__have_power9vec << 3)
397  | (__have_power10vec << 4);
398  else
399  return 0;
400  }
401 
402  namespace
403  {
404  struct _OdrEnforcer {};
405  }
406 
407  template <std::uint_least64_t...>
408  struct _MachineFlagsTemplate {};
409 
410  /**@internal
411  * Use this type as default template argument to all function templates that
412  * are not declared always_inline. It ensures, that a function
413  * specialization, which the compiler decides not to inline, has a unique symbol
414  * (_OdrEnforcer) or a symbol matching the machine/architecture flags
415  * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
416  * users link TUs compiled with different flags. This is especially important
417  * for using simd in libraries.
418  */
419  using __odr_helper
420  = conditional_t<__machine_flags() == 0, _OdrEnforcer,
421  _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
422 
423  struct _Minimum
424  {
425  template <typename _Tp>
426  _GLIBCXX_SIMD_INTRINSIC constexpr
427  _Tp
428  operator()(_Tp __a, _Tp __b) const
429  {
430  using std::min;
431  return min(__a, __b);
432  }
433  };
434 
435  struct _Maximum
436  {
437  template <typename _Tp>
438  _GLIBCXX_SIMD_INTRINSIC constexpr
439  _Tp
440  operator()(_Tp __a, _Tp __b) const
441  {
442  using std::max;
443  return max(__a, __b);
444  }
445  };
446 } // namespace __detail
447 
448 // unrolled/pack execution helpers
449 // __execute_n_times{{{
450 template <typename _Fp, size_t... _I>
451  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
452  void
453  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
454  { ((void)__f(_SizeConstant<_I>()), ...); }
455 
456 template <typename _Fp>
457  _GLIBCXX_SIMD_INTRINSIC constexpr void
458  __execute_on_index_sequence(_Fp&&, index_sequence<>)
459  { }
460 
461 template <size_t _Np, typename _Fp>
462  _GLIBCXX_SIMD_INTRINSIC constexpr void
463  __execute_n_times(_Fp&& __f)
464  {
465  __execute_on_index_sequence(static_cast<_Fp&&>(__f),
466  make_index_sequence<_Np>{});
467  }
468 
469 // }}}
470 // __generate_from_n_evaluations{{{
471 template <typename _R, typename _Fp, size_t... _I>
472  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
473  _R
474  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
475  { return _R{__f(_SizeConstant<_I>())...}; }
476 
477 template <size_t _Np, typename _R, typename _Fp>
478  _GLIBCXX_SIMD_INTRINSIC constexpr _R
479  __generate_from_n_evaluations(_Fp&& __f)
480  {
481  return __execute_on_index_sequence_with_return<_R>(
482  static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
483  }
484 
485 // }}}
486 // __call_with_n_evaluations{{{
487 template <size_t... _I, typename _F0, typename _FArgs>
488  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
489  auto
490  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
491  { return __f0(__fargs(_SizeConstant<_I>())...); }
492 
493 template <size_t _Np, typename _F0, typename _FArgs>
494  _GLIBCXX_SIMD_INTRINSIC constexpr auto
495  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
496  {
497  return __call_with_n_evaluations(make_index_sequence<_Np>{},
498  static_cast<_F0&&>(__f0),
499  static_cast<_FArgs&&>(__fargs));
500  }
501 
502 // }}}
503 // __call_with_subscripts{{{
504 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
505  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
506  auto
507  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
508  { return __fun(__x[_First + _It]...); }
509 
510 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
511  _GLIBCXX_SIMD_INTRINSIC constexpr auto
512  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
513  {
514  return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
515  make_index_sequence<_Np>(),
516  static_cast<_Fp&&>(__fun));
517  }
518 
519 // }}}
520 
521 // vvv ---- type traits ---- vvv
522 // integer type aliases{{{
523 using _UChar = unsigned char;
524 using _SChar = signed char;
525 using _UShort = unsigned short;
526 using _UInt = unsigned int;
527 using _ULong = unsigned long;
528 using _ULLong = unsigned long long;
529 using _LLong = long long;
530 
531 //}}}
532 // __first_of_pack{{{
533 template <typename _T0, typename...>
534  struct __first_of_pack
535  { using type = _T0; };
536 
537 template <typename... _Ts>
538  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
539 
540 //}}}
541 // __value_type_or_identity_t {{{
542 template <typename _Tp>
543  typename _Tp::value_type
544  __value_type_or_identity_impl(int);
545 
546 template <typename _Tp>
547  _Tp
548  __value_type_or_identity_impl(float);
549 
550 template <typename _Tp>
551  using __value_type_or_identity_t
552  = decltype(__value_type_or_identity_impl<_Tp>(int()));
553 
554 // }}}
555 // __is_vectorizable {{{
556 template <typename _Tp>
557  struct __is_vectorizable : public is_arithmetic<_Tp> {};
558 
559 template <>
560  struct __is_vectorizable<bool> : public false_type {};
561 
562 template <typename _Tp>
563  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
564 
565 // Deduces to a vectorizable type
566 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
567  using _Vectorizable = _Tp;
568 
569 // }}}
570 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
571 template <typename _Ptr, typename _ValueType>
572  struct __is_possible_loadstore_conversion
573  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
574 
575 template <>
576  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
577 
578 // Deduces to a type allowed for load/store with the given value type.
579 template <typename _Ptr, typename _ValueType,
580  typename = enable_if_t<
581  __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
582  using _LoadStorePtr = _Ptr;
583 
584 // }}}
585 // __is_bitmask{{{
586 template <typename _Tp, typename = void_t<>>
587  struct __is_bitmask : false_type {};
588 
589 template <typename _Tp>
590  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
591 
592 // the __mmaskXX case:
593 template <typename _Tp>
594  struct __is_bitmask<_Tp,
595  void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
596  : true_type {};
597 
598 // }}}
599 // __int_for_sizeof{{{
600 #pragma GCC diagnostic push
601 #pragma GCC diagnostic ignored "-Wpedantic"
602 template <size_t _Bytes>
603  constexpr auto
604  __int_for_sizeof()
605  {
606  static_assert(_Bytes > 0);
607  if constexpr (_Bytes == sizeof(int))
608  return int();
609  else if constexpr (_Bytes == sizeof(_SChar))
610  return _SChar();
611  else if constexpr (_Bytes == sizeof(short))
612  return short();
613  else if constexpr (_Bytes == sizeof(long))
614  return long();
615  else if constexpr (_Bytes == sizeof(_LLong))
616  return _LLong();
617  #ifdef __SIZEOF_INT128__
618  else if constexpr (_Bytes == sizeof(__int128))
619  return __int128();
620  #endif // __SIZEOF_INT128__
621  else if constexpr (_Bytes % sizeof(int) == 0)
622  {
623  constexpr size_t _Np = _Bytes / sizeof(int);
624  struct _Ip
625  {
626  int _M_data[_Np];
627 
628  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
629  operator&(_Ip __rhs) const
630  {
631  return __generate_from_n_evaluations<_Np, _Ip>(
632  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
633  return __rhs._M_data[__i] & _M_data[__i];
634  });
635  }
636 
637  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
638  operator|(_Ip __rhs) const
639  {
640  return __generate_from_n_evaluations<_Np, _Ip>(
641  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
642  return __rhs._M_data[__i] | _M_data[__i];
643  });
644  }
645 
646  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
647  operator^(_Ip __rhs) const
648  {
649  return __generate_from_n_evaluations<_Np, _Ip>(
650  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
651  return __rhs._M_data[__i] ^ _M_data[__i];
652  });
653  }
654 
655  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
656  operator~() const
657  {
658  return __generate_from_n_evaluations<_Np, _Ip>(
659  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
660  }
661  };
662  return _Ip{};
663  }
664  else
665  static_assert(_Bytes == 0, "this should be unreachable");
666  }
667 #pragma GCC diagnostic pop
668 
669 template <typename _Tp>
670  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
671 
672 template <size_t _Np>
673  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
674 
675 // }}}
676 // __is_fixed_size_abi{{{
677 template <typename _Tp>
678  struct __is_fixed_size_abi : false_type {};
679 
680 template <int _Np>
681  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
682 
683 template <typename _Tp>
684  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
685 
686 // }}}
687 // __is_scalar_abi {{{
688 template <typename _Abi>
689  constexpr bool
690  __is_scalar_abi()
691  { return is_same_v<simd_abi::scalar, _Abi>; }
692 
693 // }}}
694 // __abi_bytes_v {{{
695 template <template <int> class _Abi, int _Bytes>
696  constexpr int
697  __abi_bytes_impl(_Abi<_Bytes>*)
698  { return _Bytes; }
699 
700 template <typename _Tp>
701  constexpr int
702  __abi_bytes_impl(_Tp*)
703  { return -1; }
704 
705 template <typename _Abi>
706  inline constexpr int __abi_bytes_v
707  = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
708 
709 // }}}
710 // __is_builtin_bitmask_abi {{{
711 template <typename _Abi>
712  constexpr bool
713  __is_builtin_bitmask_abi()
714  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
715 
716 // }}}
717 // __is_sse_abi {{{
718 template <typename _Abi>
719  constexpr bool
720  __is_sse_abi()
721  {
722  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
723  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
724  }
725 
726 // }}}
727 // __is_avx_abi {{{
728 template <typename _Abi>
729  constexpr bool
730  __is_avx_abi()
731  {
732  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
733  return _Bytes > 16 && _Bytes <= 32
734  && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
735  }
736 
737 // }}}
738 // __is_avx512_abi {{{
739 template <typename _Abi>
740  constexpr bool
741  __is_avx512_abi()
742  {
743  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
744  return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
745  }
746 
747 // }}}
748 // __is_neon_abi {{{
749 template <typename _Abi>
750  constexpr bool
751  __is_neon_abi()
752  {
753  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
754  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
755  }
756 
757 // }}}
758 // __is_sve_abi {{{
759 template <typename _Abi>
760  constexpr bool
761  __is_sve_abi()
762  {
763  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
764  return _Bytes <= __sve_vectorized_size_bytes && is_same_v<simd_abi::_Sve<_Bytes>, _Abi>;
765  }
766 
767 // }}}
768 // __make_dependent_t {{{
769 template <typename, typename _Up>
770  struct __make_dependent
771  { using type = _Up; };
772 
773 template <typename _Tp, typename _Up>
774  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
775 
776 // }}}
777 // ^^^ ---- type traits ---- ^^^
778 
779 // __invoke_ub{{{
780 template <typename... _Args>
781  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
782  __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
783  {
784 #ifdef _GLIBCXX_DEBUG_UB
785  __builtin_fprintf(stderr, __msg, __args...);
786  __builtin_trap();
787 #else
788  __builtin_unreachable();
789 #endif
790  }
791 
792 // }}}
793 // __assert_unreachable{{{
794 template <typename _Tp>
795  struct __assert_unreachable
796  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
797 
798 // }}}
799 // __size_or_zero_v {{{
800 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
801  constexpr size_t
802  __size_or_zero_dispatch(int)
803  { return _Np; }
804 
805 template <typename _Tp, typename _Ap>
806  constexpr size_t
807  __size_or_zero_dispatch(float)
808  { return 0; }
809 
810 template <typename _Tp, typename _Ap>
811  inline constexpr size_t __size_or_zero_v
812  = __size_or_zero_dispatch<_Tp, _Ap>(0);
813 
814 // }}}
815 // __div_roundup {{{
816 inline constexpr size_t
817 __div_roundup(size_t __a, size_t __b)
818 { return (__a + __b - 1) / __b; }
819 
820 // }}}
821 // _ExactBool{{{
822 class _ExactBool
823 {
824  const bool _M_data;
825 
826 public:
827  _GLIBCXX_SIMD_INTRINSIC constexpr
828  _ExactBool(bool __b) : _M_data(__b) {}
829 
830  _ExactBool(int) = delete;
831 
832  _GLIBCXX_SIMD_INTRINSIC constexpr
833  operator bool() const
834  { return _M_data; }
835 };
836 
837 // }}}
838 // __may_alias{{{
839 /**@internal
840  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
841  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
842  * that support it).
843  */
844 template <typename _Tp>
845  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
846 
847 // }}}
848 // _UnsupportedBase {{{
849 // simd and simd_mask base for unsupported <_Tp, _Abi>
850 struct _UnsupportedBase
851 {
852  _UnsupportedBase() = delete;
853  _UnsupportedBase(const _UnsupportedBase&) = delete;
854  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
855  ~_UnsupportedBase() = delete;
856 };
857 
858 // }}}
859 // _InvalidTraits {{{
860 /**
861  * @internal
862  * Defines the implementation of __a given <_Tp, _Abi>.
863  *
864  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
865  * possible. Static assertions in the type definition do not suffice. It is
866  * important that SFINAE works.
867  */
868 struct _InvalidTraits
869 {
870  using _IsValid = false_type;
871  using _SimdBase = _UnsupportedBase;
872  using _MaskBase = _UnsupportedBase;
873 
874  static constexpr size_t _S_full_size = 0;
875  static constexpr bool _S_is_partial = false;
876 
877  static constexpr size_t _S_simd_align = 1;
878  struct _SimdImpl;
879  struct _SimdMember {};
880  struct _SimdCastType;
881 
882  static constexpr size_t _S_mask_align = 1;
883  struct _MaskImpl;
884  struct _MaskMember {};
885  struct _MaskCastType;
886 };
887 
888 // }}}
889 // _SimdTraits {{{
890 template <typename _Tp, typename _Abi, typename = void_t<>>
891  struct _SimdTraits : _InvalidTraits {};
892 
893 // }}}
894 // __private_init, __bitset_init{{{
895 /**
896  * @internal
897  * Tag used for private init constructor of simd and simd_mask
898  */
899 inline constexpr struct _PrivateInit {} __private_init = {};
900 
901 inline constexpr struct _BitsetInit {} __bitset_init = {};
902 
903 // }}}
904 // __is_narrowing_conversion<_From, _To>{{{
905 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
906  bool = is_arithmetic_v<_To>>
907  struct __is_narrowing_conversion;
908 
909 // ignore "signed/unsigned mismatch" in the following trait.
910 // The implicit conversions will do the right thing here.
911 template <typename _From, typename _To>
912  struct __is_narrowing_conversion<_From, _To, true, true>
913  : public __bool_constant<(
914  __digits_v<_From> > __digits_v<_To>
915  || __finite_max_v<_From> > __finite_max_v<_To>
916  || __finite_min_v<_From> < __finite_min_v<_To>
917  || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
918 
919 template <typename _Tp>
920  struct __is_narrowing_conversion<_Tp, bool, true, true>
921  : public true_type {};
922 
923 template <>
924  struct __is_narrowing_conversion<bool, bool, true, true>
925  : public false_type {};
926 
927 template <typename _Tp>
928  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
929  : public false_type {};
930 
931 template <typename _From, typename _To>
932  struct __is_narrowing_conversion<_From, _To, false, true>
933  : public negation<is_convertible<_From, _To>> {};
934 
935 // }}}
936 // __converts_to_higher_integer_rank{{{
937 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
938  struct __converts_to_higher_integer_rank : public true_type {};
939 
940 // this may fail for char -> short if sizeof(char) == sizeof(short)
941 template <typename _From, typename _To>
942  struct __converts_to_higher_integer_rank<_From, _To, false>
943  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
944 
945 // }}}
946 // __data(simd/simd_mask) {{{
947 template <typename _Tp, typename _Ap>
948  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
949  __data(const simd<_Tp, _Ap>& __x);
950 
951 template <typename _Tp, typename _Ap>
952  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
953  __data(simd<_Tp, _Ap>& __x);
954 
955 template <typename _Tp, typename _Ap>
956  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
957  __data(const simd_mask<_Tp, _Ap>& __x);
958 
959 template <typename _Tp, typename _Ap>
960  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
961  __data(simd_mask<_Tp, _Ap>& __x);
962 
963 // }}}
964 // _SimdConverter {{{
965 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
966  typename = void>
967  struct _SimdConverter;
968 
969 template <typename _Tp, typename _Ap>
970  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
971  {
972  template <typename _Up>
973  _GLIBCXX_SIMD_INTRINSIC const _Up&
974  operator()(const _Up& __x)
975  { return __x; }
976  };
977 
978 // }}}
979 // __to_value_type_or_member_type {{{
980 template <typename _V>
981  _GLIBCXX_SIMD_INTRINSIC constexpr auto
982  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
983  { return __data(__x); }
984 
985 template <typename _V>
986  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
987  __to_value_type_or_member_type(const typename _V::value_type& __x)
988  { return __x; }
989 
990 // }}}
991 // __bool_storage_member_type{{{
992 template <size_t _Size>
993  struct __bool_storage_member_type;
994 
995 template <size_t _Size>
996  using __bool_storage_member_type_t =
997  typename __bool_storage_member_type<_Size>::type;
998 
999 // }}}
1000 // _SimdTuple {{{
1001 // why not tuple?
1002 // 1. tuple gives no guarantee about the storage order, but I require
1003 // storage
1004 // equivalent to array<_Tp, _Np>
1005 // 2. direct access to the element type (first template argument)
1006 // 3. enforces equal element type, only different _Abi types are allowed
1007 template <typename _Tp, typename... _Abis>
1008  struct _SimdTuple;
1009 
1010 //}}}
1011 // __fixed_size_storage_t {{{
1012 template <typename _Tp, int _Np>
1013  struct __fixed_size_storage;
1014 
1015 template <typename _Tp, int _Np>
1016  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
1017 
1018 // }}}
1019 // _SimdWrapper fwd decl{{{
1020 template <typename _Tp, size_t _Size, typename = void_t<>>
1021  struct _SimdWrapper;
1022 
1023 template <typename _Tp>
1024  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
1025 template <typename _Tp>
1026  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
1027 template <typename _Tp>
1028  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
1029 template <typename _Tp>
1030  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
1031 
1032 template <typename _Tp, size_t _Width>
1033  struct _SveSimdWrapper;
1034 
1035 // }}}
1036 // __is_simd_wrapper {{{
1037 template <typename _Tp>
1038  struct __is_simd_wrapper : false_type {};
1039 
1040 template <typename _Tp, size_t _Np>
1041  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1042 
1043 template <typename _Tp>
1044  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1045 
1046 // }}}
1047 // _BitOps {{{
1048 struct _BitOps
1049 {
1050  // _S_bit_iteration {{{
1051  template <typename _Tp, typename _Fp>
1052  static void
1053  _S_bit_iteration(_Tp __mask, _Fp&& __f)
1054  {
1055  static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1056  conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1057  if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1058  __k = __mask;
1059  else
1060  __k = __mask.to_ullong();
1061  while(__k)
1062  {
1063  __f(std::__countr_zero(__k));
1064  __k &= (__k - 1);
1065  }
1066  }
1067 
1068  //}}}
1069 };
1070 
1071 //}}}
1072 // __increment, __decrement {{{
1073 template <typename _Tp = void>
1074  struct __increment
1075  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1076 
1077 template <>
1078  struct __increment<void>
1079  {
1080  template <typename _Tp>
1081  constexpr _Tp
1082  operator()(_Tp __a) const
1083  { return ++__a; }
1084  };
1085 
1086 template <typename _Tp = void>
1087  struct __decrement
1088  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1089 
1090 template <>
1091  struct __decrement<void>
1092  {
1093  template <typename _Tp>
1094  constexpr _Tp
1095  operator()(_Tp __a) const
1096  { return --__a; }
1097  };
1098 
1099 // }}}
1100 // _ValuePreserving(OrInt) {{{
1101 template <typename _From, typename _To,
1102  typename = enable_if_t<negation<
1103  __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1104  using _ValuePreserving = _From;
1105 
1106 template <typename _From, typename _To,
1107  typename _DecayedFrom = __remove_cvref_t<_From>,
1108  typename = enable_if_t<conjunction<
1109  is_convertible<_From, _To>,
1110  disjunction<
1111  is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1112  conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1113  negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1114  using _ValuePreservingOrInt = _From;
1115 
1116 // }}}
1117 // __intrinsic_type {{{
1118 template <typename _Tp, size_t _Bytes, typename = void_t<>>
1119  struct __intrinsic_type;
1120 
1121 template <typename _Tp, size_t _Size>
1122  using __intrinsic_type_t =
1123  typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1124 
1125 template <typename _Tp>
1126  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1127 template <typename _Tp>
1128  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1129 template <typename _Tp>
1130  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1131 template <typename _Tp>
1132  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1133 template <typename _Tp>
1134  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1135 template <typename _Tp>
1136  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1137 
1138 // }}}
1139 // _BitMask {{{
1140 template <size_t _Np, bool _Sanitized = false>
1141  struct _BitMask;
1142 
1143 template <size_t _Np, bool _Sanitized>
1144  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1145 
1146 template <size_t _Np>
1147  using _SanitizedBitMask = _BitMask<_Np, true>;
1148 
1149 template <size_t _Np, bool _Sanitized>
1150  struct _BitMask
1151  {
1152  static_assert(_Np > 0);
1153 
1154  static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1155 
1156  using _Tp = conditional_t<_Np == 1, bool,
1157  make_unsigned_t<__int_with_sizeof_t<std::min(
1158  sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1159 
1160  static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1161 
1162  _Tp _M_bits[_S_array_size];
1163 
1164  static constexpr int _S_unused_bits
1165  = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1166 
1167  static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1168 
1169  constexpr _BitMask() noexcept = default;
1170 
1171  constexpr _BitMask(unsigned long long __x) noexcept
1172  : _M_bits{static_cast<_Tp>(__x)} {}
1173 
1174  _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1175 
1176  constexpr _BitMask(const _BitMask&) noexcept = default;
1177 
1178  template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1179  && _Sanitized == true>>
1180  constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1181  : _BitMask(__rhs._M_sanitized()) {}
1182 
1183  constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1184  {
1185  static_assert(_S_array_size == 1);
1186  return _M_bits[0];
1187  }
1188 
1189  // precondition: is sanitized
1190  constexpr _Tp
1191  _M_to_bits() const noexcept
1192  {
1193  static_assert(_S_array_size == 1);
1194  return _M_bits[0];
1195  }
1196 
1197  // precondition: is sanitized
1198  constexpr unsigned long long
1199  to_ullong() const noexcept
1200  {
1201  static_assert(_S_array_size == 1);
1202  return _M_bits[0];
1203  }
1204 
1205  // precondition: is sanitized
1206  constexpr unsigned long
1207  to_ulong() const noexcept
1208  {
1209  static_assert(_S_array_size == 1);
1210  return _M_bits[0];
1211  }
1212 
1213  constexpr bitset<_Np>
1214  _M_to_bitset() const noexcept
1215  {
1216  static_assert(_S_array_size == 1);
1217  return _M_bits[0];
1218  }
1219 
1220  constexpr decltype(auto)
1221  _M_sanitized() const noexcept
1222  {
1223  if constexpr (_Sanitized)
1224  return *this;
1225  else if constexpr (_Np == 1)
1226  return _SanitizedBitMask<_Np>(_M_bits[0]);
1227  else
1228  {
1229  _SanitizedBitMask<_Np> __r = {};
1230  for (int __i = 0; __i < _S_array_size; ++__i)
1231  __r._M_bits[__i] = _M_bits[__i];
1232  if constexpr (_S_unused_bits > 0)
1233  __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1234  return __r;
1235  }
1236  }
1237 
1238  template <size_t _Mp, bool _LSanitized>
1239  constexpr _BitMask<_Np + _Mp, _Sanitized>
1240  _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1241  {
1242  constexpr size_t _RN = _Np + _Mp;
1243  using _Rp = _BitMask<_RN, _Sanitized>;
1244  if constexpr (_Rp::_S_array_size == 1)
1245  {
1246  _Rp __r{{_M_bits[0]}};
1247  __r._M_bits[0] <<= _Mp;
1248  __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1249  return __r;
1250  }
1251  else
1252  __assert_unreachable<_Rp>();
1253  }
1254 
1255  // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1256  // significant bits. If the operation implicitly produces a sanitized bitmask,
1257  // the result type will have _Sanitized set.
1258  template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1259  constexpr auto
1260  _M_extract() const noexcept
1261  {
1262  static_assert(_Np > _DropLsb);
1263  static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1264  "not implemented for bitmasks larger than one ullong");
1265  if constexpr (_NewSize == 1)
1266  // must sanitize because the return _Tp is bool
1267  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1268  else
1269  return _BitMask<_NewSize,
1270  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1271  && _NewSize + _DropLsb <= _Np)
1272  || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1273  && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1274  >> _DropLsb);
1275  }
1276 
1277  // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1278  constexpr bool
1279  all() const noexcept
1280  {
1281  if constexpr (_Np == 1)
1282  return _M_bits[0];
1283  else if constexpr (!_Sanitized)
1284  return _M_sanitized().all();
1285  else
1286  {
1287  constexpr _Tp __allbits = ~_Tp();
1288  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1289  if (_M_bits[__i] != __allbits)
1290  return false;
1291  return _M_bits[_S_array_size - 1] == _S_bitmask;
1292  }
1293  }
1294 
1295  // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1296  // false.
1297  constexpr bool
1298  any() const noexcept
1299  {
1300  if constexpr (_Np == 1)
1301  return _M_bits[0];
1302  else if constexpr (!_Sanitized)
1303  return _M_sanitized().any();
1304  else
1305  {
1306  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1307  if (_M_bits[__i] != 0)
1308  return true;
1309  return _M_bits[_S_array_size - 1] != 0;
1310  }
1311  }
1312 
1313  // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1314  constexpr bool
1315  none() const noexcept
1316  {
1317  if constexpr (_Np == 1)
1318  return !_M_bits[0];
1319  else if constexpr (!_Sanitized)
1320  return _M_sanitized().none();
1321  else
1322  {
1323  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1324  if (_M_bits[__i] != 0)
1325  return false;
1326  return _M_bits[_S_array_size - 1] == 0;
1327  }
1328  }
1329 
1330  // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1331  // false.
1332  constexpr int
1333  count() const noexcept
1334  {
1335  if constexpr (_Np == 1)
1336  return _M_bits[0];
1337  else if constexpr (!_Sanitized)
1338  return _M_sanitized().none();
1339  else
1340  {
1341  int __result = __builtin_popcountll(_M_bits[0]);
1342  for (int __i = 1; __i < _S_array_size; ++__i)
1343  __result += __builtin_popcountll(_M_bits[__i]);
1344  return __result;
1345  }
1346  }
1347 
1348  // Returns the bit at offset __i as bool.
1349  constexpr bool
1350  operator[](size_t __i) const noexcept
1351  {
1352  if constexpr (_Np == 1)
1353  return _M_bits[0];
1354  else if constexpr (_S_array_size == 1)
1355  return (_M_bits[0] >> __i) & 1;
1356  else
1357  {
1358  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1359  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1360  return (_M_bits[__j] >> __shift) & 1;
1361  }
1362  }
1363 
1364  template <size_t __i>
1365  constexpr bool
1366  operator[](_SizeConstant<__i>) const noexcept
1367  {
1368  static_assert(__i < _Np);
1369  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1370  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1371  return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1372  }
1373 
1374  // Set the bit at offset __i to __x.
1375  constexpr void
1376  set(size_t __i, bool __x) noexcept
1377  {
1378  if constexpr (_Np == 1)
1379  _M_bits[0] = __x;
1380  else if constexpr (_S_array_size == 1)
1381  {
1382  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1383  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1384  }
1385  else
1386  {
1387  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1388  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1389  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1390  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1391  }
1392  }
1393 
1394  template <size_t __i>
1395  constexpr void
1396  set(_SizeConstant<__i>, bool __x) noexcept
1397  {
1398  static_assert(__i < _Np);
1399  if constexpr (_Np == 1)
1400  _M_bits[0] = __x;
1401  else
1402  {
1403  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1404  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1405  constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1406  _M_bits[__j] &= __mask;
1407  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1408  }
1409  }
1410 
1411  // Inverts all bits. Sanitized input leads to sanitized output.
1412  constexpr _BitMask
1413  operator~() const noexcept
1414  {
1415  if constexpr (_Np == 1)
1416  return !_M_bits[0];
1417  else
1418  {
1419  _BitMask __result{};
1420  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1421  __result._M_bits[__i] = ~_M_bits[__i];
1422  if constexpr (_Sanitized)
1423  __result._M_bits[_S_array_size - 1]
1424  = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1425  else
1426  __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1427  return __result;
1428  }
1429  }
1430 
1431  constexpr _BitMask&
1432  operator^=(const _BitMask& __b) & noexcept
1433  {
1434  __execute_n_times<_S_array_size>(
1435  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1436  return *this;
1437  }
1438 
1439  constexpr _BitMask&
1440  operator|=(const _BitMask& __b) & noexcept
1441  {
1442  __execute_n_times<_S_array_size>(
1443  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1444  return *this;
1445  }
1446 
1447  constexpr _BitMask&
1448  operator&=(const _BitMask& __b) & noexcept
1449  {
1450  __execute_n_times<_S_array_size>(
1451  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1452  return *this;
1453  }
1454 
1455  friend constexpr _BitMask
1456  operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1457  {
1458  _BitMask __r = __a;
1459  __r ^= __b;
1460  return __r;
1461  }
1462 
1463  friend constexpr _BitMask
1464  operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1465  {
1466  _BitMask __r = __a;
1467  __r |= __b;
1468  return __r;
1469  }
1470 
1471  friend constexpr _BitMask
1472  operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1473  {
1474  _BitMask __r = __a;
1475  __r &= __b;
1476  return __r;
1477  }
1478 
1479  _GLIBCXX_SIMD_INTRINSIC
1480  constexpr bool
1481  _M_is_constprop() const
1482  {
1483  if constexpr (_S_array_size == 0)
1484  return __builtin_constant_p(_M_bits[0]);
1485  else
1486  {
1487  for (int __i = 0; __i < _S_array_size; ++__i)
1488  if (!__builtin_constant_p(_M_bits[__i]))
1489  return false;
1490  return true;
1491  }
1492  }
1493  };
1494 
1495 // }}}
1496 
1497 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1498 // __min_vector_size {{{
1499 template <typename _Tp = void>
1500  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1501 
1502 #if _GLIBCXX_SIMD_HAVE_NEON
1503 template <>
1504  inline constexpr int __min_vector_size<void> = 8;
1505 #else
1506 template <>
1507  inline constexpr int __min_vector_size<void> = 16;
1508 #endif
1509 
1510 // }}}
1511 // __vector_type {{{
1512 template <typename _Tp, size_t _Np, typename = void>
1513  struct __vector_type_n {};
1514 
1515 // substition failure for 0-element case
1516 template <typename _Tp>
1517  struct __vector_type_n<_Tp, 0, void> {};
1518 
1519 // special case 1-element to be _Tp itself
1520 template <typename _Tp>
1521  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1522  { using type = _Tp; };
1523 
1524 // else, use GNU-style builtin vector types
1525 template <typename _Tp, size_t _Np>
1526  struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1527  {
1528  static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1529 
1530  static constexpr size_t _S_Bytes =
1531 #ifdef __i386__
1532  // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1533  // those objects are passed via MMX registers and nothing ever calls EMMS.
1534  _S_Np2 == 8 ? 16 :
1535 #endif
1536  _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1537  : _S_Np2;
1538 
1539  using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1540  };
1541 
1542 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1543  struct __vector_type;
1544 
1545 template <typename _Tp, size_t _Bytes>
1546  struct __vector_type<_Tp, _Bytes, 0>
1547  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1548 
1549 template <typename _Tp, size_t _Size>
1550  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1551 
1552 template <typename _Tp>
1553  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1554 template <typename _Tp>
1555  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1556 template <typename _Tp>
1557  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1558 template <typename _Tp>
1559  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1560 template <typename _Tp>
1561  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1562 template <typename _Tp>
1563  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1564 
1565 // }}}
1566 // __is_vector_type {{{
1567 template <typename _Tp, typename = void_t<>>
1568  struct __is_vector_type : false_type {};
1569 
1570 template <typename _Tp>
1571  struct __is_vector_type<
1572  _Tp, void_t<typename __vector_type<
1573  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1574  : is_same<_Tp, typename __vector_type<
1575  remove_reference_t<decltype(declval<_Tp>()[0])>,
1576  sizeof(_Tp)>::type> {};
1577 
1578 template <typename _Tp>
1579  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1580 
1581 // }}}
1582 // __is_intrinsic_type {{{
1583 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1584 template <typename _Tp>
1585  using __is_intrinsic_type = __is_vector_type<_Tp>;
1586 #else // not SSE (x86)
1587 template <typename _Tp, typename = void_t<>>
1588  struct __is_intrinsic_type : false_type {};
1589 
1590 template <typename _Tp>
1591  struct __is_intrinsic_type<
1592  _Tp, void_t<typename __intrinsic_type<
1593  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1594  : is_same<_Tp, typename __intrinsic_type<
1595  remove_reference_t<decltype(declval<_Tp>()[0])>,
1596  sizeof(_Tp)>::type> {};
1597 #endif
1598 
1599 template <typename _Tp>
1600  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1601 
1602 // }}}
1603 // _VectorTraits{{{
1604 template <typename _Tp, typename = void_t<>>
1605  struct _VectorTraitsImpl;
1606 
1607 template <typename _Tp>
1608  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1609  || __is_intrinsic_type_v<_Tp>>>
1610  {
1611  using type = _Tp;
1612  using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1613  static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1614  using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1615  template <typename _Up, int _W = _S_full_size>
1616  static constexpr bool _S_is
1617  = is_same_v<value_type, _Up> && _W == _S_full_size;
1618  };
1619 
1620 template <typename _Tp, size_t _Np>
1621  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1622  void_t<__vector_type_t<_Tp, _Np>>>
1623  {
1624  using type = __vector_type_t<_Tp, _Np>;
1625  using value_type = _Tp;
1626  static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1627  using _Wrapper = _SimdWrapper<_Tp, _Np>;
1628  static constexpr bool _S_is_partial = (_Np == _S_full_size);
1629  static constexpr int _S_partial_width = _Np;
1630  template <typename _Up, int _W = _S_full_size>
1631  static constexpr bool _S_is
1632  = is_same_v<value_type, _Up>&& _W == _S_full_size;
1633  };
1634 
1635 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1636  using _VectorTraits = _VectorTraitsImpl<_Tp>;
1637 
1638 // }}}
1639 // __as_vector{{{
1640 template <typename _V>
1641  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1642  __as_vector(_V __x)
1643  {
1644  if constexpr (__is_vector_type_v<_V>)
1645  return __x;
1646  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1647  {
1648  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
1649  {
1650  static_assert(is_simd<_V>::value);
1651  static_assert(_V::abi_type::template __traits<
1652  typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
1653  return __as_vector(__data(__x).first);
1654  }
1655  else if constexpr (_V::size() > 1)
1656  return __data(__x)._M_data;
1657  else
1658  {
1659  static_assert(is_simd<_V>::value);
1660  using _Tp = typename _V::value_type;
1661 #ifdef __i386__
1662  constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
1663  using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
1664 #else
1665  using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
1666 #endif
1667  return _RV{__data(__x)};
1668  }
1669  }
1670  else if constexpr (__is_vectorizable_v<_V>)
1671  return __vector_type_t<_V, 2>{__x};
1672  else
1673  return __x._M_data;
1674  }
1675 
1676 // }}}
1677 // __as_wrapper{{{
1678 template <size_t _Np = 0, typename _V>
1679  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1680  __as_wrapper(_V __x)
1681  {
1682  if constexpr (__is_vector_type_v<_V>)
1683  return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1684  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1685  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1686  {
1687  static_assert(_V::size() == _Np);
1688  return __data(__x);
1689  }
1690  else
1691  {
1692  static_assert(_V::_S_size == _Np);
1693  return __x;
1694  }
1695  }
1696 
1697 // }}}
1698 // __intrin_bitcast{{{
1699 template <typename _To, typename _From>
1700  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1701  __intrin_bitcast(_From __v)
1702  {
1703  static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1704  && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1705  if constexpr (sizeof(_To) == sizeof(_From))
1706  return reinterpret_cast<_To>(__v);
1707  else if constexpr (sizeof(_From) > sizeof(_To))
1708  if constexpr (sizeof(_To) >= 16)
1709  return reinterpret_cast<const __may_alias<_To>&>(__v);
1710  else
1711  {
1712  _To __r;
1713  __builtin_memcpy(&__r, &__v, sizeof(_To));
1714  return __r;
1715  }
1716 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1717  else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1718  return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1719  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1720  else if constexpr (__have_avx512f && sizeof(_From) == 16
1721  && sizeof(_To) == 64)
1722  return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1723  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1724  else if constexpr (__have_avx512f && sizeof(_From) == 32
1725  && sizeof(_To) == 64)
1726  return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1727  reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1728 #endif // _GLIBCXX_SIMD_X86INTRIN
1729  else if constexpr (sizeof(__v) <= 8)
1730  return reinterpret_cast<_To>(
1731  __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1732  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1733  else
1734  {
1735  static_assert(sizeof(_To) > sizeof(_From));
1736  _To __r = {};
1737  __builtin_memcpy(&__r, &__v, sizeof(_From));
1738  return __r;
1739  }
1740  }
1741 
1742 // }}}
1743 // __vector_bitcast{{{
1744 template <typename _To, size_t _NN = 0, typename _From,
1745  typename _FromVT = _VectorTraits<_From>,
1746  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1747  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1748  __vector_bitcast(_From __x)
1749  {
1750  using _R = __vector_type_t<_To, _Np>;
1751  return __intrin_bitcast<_R>(__x);
1752  }
1753 
1754 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1755  size_t _Np
1756  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1757  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1758  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1759  {
1760  static_assert(_Np > 1);
1761  return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1762  }
1763 
1764 // }}}
1765 // __convert_x86 declarations {{{
1766 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1767 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1768  _To __convert_x86(_Tp);
1769 
1770 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1771  _To __convert_x86(_Tp, _Tp);
1772 
1773 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1774  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1775 
1776 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1777  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1778 
1779 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1780  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1781  _Tp, _Tp, _Tp, _Tp);
1782 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1783 
1784 //}}}
1785 // __bit_cast {{{
1786 template <typename _To, typename _From>
1787  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1788  __bit_cast(const _From __x)
1789  {
1790 #if __has_builtin(__builtin_bit_cast)
1791  return __builtin_bit_cast(_To, __x);
1792 #else
1793  static_assert(sizeof(_To) == sizeof(_From));
1794  constexpr bool __to_is_vectorizable
1795  = is_arithmetic_v<_To> || is_enum_v<_To>;
1796  constexpr bool __from_is_vectorizable
1797  = is_arithmetic_v<_From> || is_enum_v<_From>;
1798  if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1799  return reinterpret_cast<_To>(__x);
1800  else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1801  {
1802  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1803  return reinterpret_cast<_To>(_FV{__x});
1804  }
1805  else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1806  {
1807  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1808  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1809  return reinterpret_cast<_TV>(_FV{__x})[0];
1810  }
1811  else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1812  {
1813  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1814  return reinterpret_cast<_TV>(__x)[0];
1815  }
1816  else
1817  {
1818  _To __r;
1819  __builtin_memcpy(reinterpret_cast<char*>(&__r),
1820  reinterpret_cast<const char*>(&__x), sizeof(_To));
1821  return __r;
1822  }
1823 #endif
1824  }
1825 
1826 // }}}
1827 // __to_intrin {{{
1828 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1829  typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1830  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1831  __to_intrin(_Tp __x)
1832  {
1833  static_assert(sizeof(__x) <= sizeof(_R),
1834  "__to_intrin may never drop values off the end");
1835  if constexpr (sizeof(__x) == sizeof(_R))
1836  return reinterpret_cast<_R>(__as_vector(__x));
1837  else
1838  {
1839  using _Up = __int_for_sizeof_t<_Tp>;
1840  return reinterpret_cast<_R>(
1841  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1842  }
1843  }
1844 
1845 // }}}
1846 // __make_vector{{{
1847 template <typename _Tp, typename... _Args>
1848  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1849  __make_vector(const _Args&... __args)
1850  { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1851 
1852 // }}}
1853 // __vector_broadcast{{{
1854 template <size_t _Np, typename _Tp, size_t... _I>
1855  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1856  __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1857  { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1858 
1859 template <size_t _Np, typename _Tp>
1860  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1861  __vector_broadcast(_Tp __x)
1862  { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1863 
1864 // }}}
1865 // __generate_vector{{{
1866  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1867  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1868  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1869  { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1870 
1871 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1872  _GLIBCXX_SIMD_INTRINSIC constexpr _V
1873  __generate_vector(_Gp&& __gen)
1874  {
1875  if constexpr (__is_vector_type_v<_V>)
1876  return __generate_vector_impl<typename _VVT::value_type,
1877  _VVT::_S_full_size>(
1878  static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1879  else
1880  return __generate_vector_impl<typename _VVT::value_type,
1881  _VVT::_S_partial_width>(
1882  static_cast<_Gp&&>(__gen),
1883  make_index_sequence<_VVT::_S_partial_width>());
1884  }
1885 
1886 template <typename _Tp, size_t _Np, typename _Gp>
1887  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1888  __generate_vector(_Gp&& __gen)
1889  {
1890  return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1891  make_index_sequence<_Np>());
1892  }
1893 
1894 // }}}
1895 // __xor{{{
1896 template <typename _TW>
1897  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1898  __xor(_TW __a, _TW __b) noexcept
1899  {
1900  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1901  {
1902  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1903  _VectorTraitsImpl<_TW>>::value_type;
1904  if constexpr (is_floating_point_v<_Tp>)
1905  {
1906  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1907  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1908  ^ __vector_bitcast<_Ip>(__b));
1909  }
1910  else if constexpr (__is_vector_type_v<_TW>)
1911  return __a ^ __b;
1912  else
1913  return __a._M_data ^ __b._M_data;
1914  }
1915  else
1916  return __a ^ __b;
1917  }
1918 
1919 // }}}
1920 // __or{{{
1921 template <typename _TW>
1922  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1923  __or(_TW __a, _TW __b) noexcept
1924  {
1925  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1926  {
1927  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1928  _VectorTraitsImpl<_TW>>::value_type;
1929  if constexpr (is_floating_point_v<_Tp>)
1930  {
1931  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1932  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1933  | __vector_bitcast<_Ip>(__b));
1934  }
1935  else if constexpr (__is_vector_type_v<_TW>)
1936  return __a | __b;
1937  else
1938  return __a._M_data | __b._M_data;
1939  }
1940  else
1941  return __a | __b;
1942  }
1943 
1944 // }}}
1945 // __and{{{
1946 template <typename _TW>
1947  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1948  __and(_TW __a, _TW __b) noexcept
1949  {
1950  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1951  {
1952  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1953  _VectorTraitsImpl<_TW>>::value_type;
1954  if constexpr (is_floating_point_v<_Tp>)
1955  {
1956  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1957  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1958  & __vector_bitcast<_Ip>(__b));
1959  }
1960  else if constexpr (__is_vector_type_v<_TW>)
1961  return __a & __b;
1962  else
1963  return __a._M_data & __b._M_data;
1964  }
1965  else
1966  return __a & __b;
1967  }
1968 
1969 // }}}
1970 // __andnot{{{
1971 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1972 static constexpr struct
1973 {
1974  _GLIBCXX_SIMD_INTRINSIC __v4sf
1975  operator()(__v4sf __a, __v4sf __b) const noexcept
1976  { return __builtin_ia32_andnps(__a, __b); }
1977 
1978  _GLIBCXX_SIMD_INTRINSIC __v2df
1979  operator()(__v2df __a, __v2df __b) const noexcept
1980  { return __builtin_ia32_andnpd(__a, __b); }
1981 
1982  _GLIBCXX_SIMD_INTRINSIC __v2di
1983  operator()(__v2di __a, __v2di __b) const noexcept
1984  { return __builtin_ia32_pandn128(__a, __b); }
1985 
1986  _GLIBCXX_SIMD_INTRINSIC __v8sf
1987  operator()(__v8sf __a, __v8sf __b) const noexcept
1988  { return __builtin_ia32_andnps256(__a, __b); }
1989 
1990  _GLIBCXX_SIMD_INTRINSIC __v4df
1991  operator()(__v4df __a, __v4df __b) const noexcept
1992  { return __builtin_ia32_andnpd256(__a, __b); }
1993 
1994  _GLIBCXX_SIMD_INTRINSIC __v4di
1995  operator()(__v4di __a, __v4di __b) const noexcept
1996  {
1997  if constexpr (__have_avx2)
1998  return __builtin_ia32_andnotsi256(__a, __b);
1999  else
2000  return reinterpret_cast<__v4di>(
2001  __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
2002  reinterpret_cast<__v4df>(__b)));
2003  }
2004 
2005  _GLIBCXX_SIMD_INTRINSIC __v16sf
2006  operator()(__v16sf __a, __v16sf __b) const noexcept
2007  {
2008  if constexpr (__have_avx512dq)
2009  return _mm512_andnot_ps(__a, __b);
2010  else
2011  return reinterpret_cast<__v16sf>(
2012  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
2013  reinterpret_cast<__v8di>(__b)));
2014  }
2015 
2016  _GLIBCXX_SIMD_INTRINSIC __v8df
2017  operator()(__v8df __a, __v8df __b) const noexcept
2018  {
2019  if constexpr (__have_avx512dq)
2020  return _mm512_andnot_pd(__a, __b);
2021  else
2022  return reinterpret_cast<__v8df>(
2023  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
2024  reinterpret_cast<__v8di>(__b)));
2025  }
2026 
2027  _GLIBCXX_SIMD_INTRINSIC __v8di
2028  operator()(__v8di __a, __v8di __b) const noexcept
2029  { return _mm512_andnot_si512(__a, __b); }
2030 } _S_x86_andnot;
2031 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
2032 
2033 template <typename _TW>
2034  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
2035  __andnot(_TW __a, _TW __b) noexcept
2036  {
2037  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
2038  {
2039  using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
2040  _VectorTraitsImpl<_TW>>;
2041  using _Tp = typename _TVT::value_type;
2042 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
2043  if constexpr (sizeof(_TW) >= 16)
2044  {
2045  const auto __ai = __to_intrin(__a);
2046  const auto __bi = __to_intrin(__b);
2047  if (!__builtin_is_constant_evaluated()
2048  && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
2049  {
2050  const auto __r = _S_x86_andnot(__ai, __bi);
2051  if constexpr (is_convertible_v<decltype(__r), _TW>)
2052  return __r;
2053  else
2054  return reinterpret_cast<typename _TVT::type>(__r);
2055  }
2056  }
2057 #endif // _GLIBCXX_SIMD_X86INTRIN
2058  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2059  return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2060  & __vector_bitcast<_Ip>(__b));
2061  }
2062  else
2063  return ~__a & __b;
2064  }
2065 
2066 // }}}
2067 // __not{{{
2068 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2069  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2070  __not(_Tp __a) noexcept
2071  {
2072  if constexpr (is_floating_point_v<typename _TVT::value_type>)
2073  return reinterpret_cast<typename _TVT::type>(
2074  ~__vector_bitcast<unsigned>(__a));
2075  else
2076  return ~__a;
2077  }
2078 
2079 // }}}
2080 // __vec_shuffle{{{
2081 template <typename _T0, typename _T1, typename _Fun, size_t... _Is>
2082  _GLIBCXX_SIMD_INTRINSIC constexpr
2083  __vector_type_t<remove_reference_t<decltype(declval<_T0>()[0])>, sizeof...(_Is)>
2084  __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
2085  {
2086  constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
2087  constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
2088  using _Tp = remove_reference_t<decltype(declval<_T0>()[0])>;
2089  using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
2090 #if __has_builtin(__builtin_shufflevector)
2091 #ifdef __clang__
2092  // Clang requires _T0 == _T1
2093  if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
2094  return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
2095  else if constexpr (sizeof(__x) > sizeof(__y))
2096  return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
2097  else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
2098  return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
2099  __i = __idx_perm(__i);
2100  return __i < _N0 ? __i : __i - _N0 + _N1;
2101  });
2102  else if constexpr (sizeof(__x) < sizeof(__y))
2103  return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
2104  __i = __idx_perm(__i);
2105  return __i < _N0 ? __i : __i - _N0 + _N1;
2106  });
2107  else
2108 #endif
2109  {
2110  const auto __r = __builtin_shufflevector(__x, __y, [=] {
2111  constexpr int __j = __idx_perm(_Is);
2112  static_assert(__j < _N0 + _N1);
2113  return __j;
2114  }()...);
2115 #ifdef __i386__
2116  if constexpr (sizeof(__r) == sizeof(_RV))
2117  return __r;
2118  else
2119  return _RV {__r[_Is]...};
2120 #else
2121  return __r;
2122 #endif
2123  }
2124 #else
2125  return _RV {
2126  [=]() -> _Tp {
2127  constexpr int __j = __idx_perm(_Is);
2128  static_assert(__j < _N0 + _N1);
2129  if constexpr (__j < 0)
2130  return 0;
2131  else if constexpr (__j < _N0)
2132  return __x[__j];
2133  else
2134  return __y[__j - _N0];
2135  }()...
2136  };
2137 #endif
2138  }
2139 
2140 template <typename _T0, typename _Fun, typename _Seq>
2141  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2142  __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
2143  { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
2144 
2145 // }}}
2146 // __concat{{{
2147 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2148  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2149  constexpr _R
2150  __concat(_Tp a_, _Tp b_)
2151  {
2152 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2153  using _W
2154  = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2155  conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2156  long long, typename _TVT::value_type>>;
2157  constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2158  const auto __a = __vector_bitcast<_W>(a_);
2159  const auto __b = __vector_bitcast<_W>(b_);
2160  using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2161 #else
2162  constexpr int input_width = _TVT::_S_full_size;
2163  const _Tp& __a = a_;
2164  const _Tp& __b = b_;
2165  using _Up = _R;
2166 #endif
2167  if constexpr (input_width == 2)
2168  return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2169  else if constexpr (input_width == 4)
2170  return reinterpret_cast<_R>(
2171  _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2172  else if constexpr (input_width == 8)
2173  return reinterpret_cast<_R>(
2174  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2175  __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2176  else if constexpr (input_width == 16)
2177  return reinterpret_cast<_R>(
2178  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2179  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2180  __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2181  __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2182  __b[12], __b[13], __b[14], __b[15]});
2183  else if constexpr (input_width == 32)
2184  return reinterpret_cast<_R>(
2185  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2186  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2187  __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2188  __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2189  __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2190  __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2191  __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2192  __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2193  __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2194  __b[31]});
2195  }
2196 
2197 // }}}
2198 // __zero_extend {{{
2199 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2200  struct _ZeroExtendProxy
2201  {
2202  using value_type = typename _TVT::value_type;
2203  static constexpr size_t _Np = _TVT::_S_full_size;
2204  const _Tp __x;
2205 
2206  template <typename _To, typename _ToVT = _VectorTraits<_To>,
2207  typename
2208  = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2209  _GLIBCXX_SIMD_INTRINSIC operator _To() const
2210  {
2211  constexpr size_t _ToN = _ToVT::_S_full_size;
2212  if constexpr (_ToN == _Np)
2213  return __x;
2214  else if constexpr (_ToN == 2 * _Np)
2215  {
2216 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2217  if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2218  return __vector_bitcast<value_type>(
2219  _mm256_insertf128_ps(__m256(), __x, 0));
2220  else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2221  return __vector_bitcast<value_type>(
2222  _mm256_insertf128_pd(__m256d(), __x, 0));
2223  else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2224  return __vector_bitcast<value_type>(
2225  _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2226  else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2227  {
2228  if constexpr (__have_avx512dq)
2229  return __vector_bitcast<value_type>(
2230  _mm512_insertf32x8(__m512(), __x, 0));
2231  else
2232  return reinterpret_cast<__m512>(
2233  _mm512_insertf64x4(__m512d(),
2234  reinterpret_cast<__m256d>(__x), 0));
2235  }
2236  else if constexpr (__have_avx512f
2237  && _TVT::template _S_is<double, 4>)
2238  return __vector_bitcast<value_type>(
2239  _mm512_insertf64x4(__m512d(), __x, 0));
2240  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2241  return __vector_bitcast<value_type>(
2242  _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2243 #endif
2244  return __concat(__x, _Tp());
2245  }
2246  else if constexpr (_ToN == 4 * _Np)
2247  {
2248 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2249  if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2250  {
2251  return __vector_bitcast<value_type>(
2252  _mm512_insertf64x2(__m512d(), __x, 0));
2253  }
2254  else if constexpr (__have_avx512f
2255  && is_floating_point_v<value_type>)
2256  {
2257  return __vector_bitcast<value_type>(
2258  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2259  0));
2260  }
2261  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2262  {
2263  return __vector_bitcast<value_type>(
2264  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2265  }
2266 #endif
2267  return __concat(__concat(__x, _Tp()),
2268  __vector_type_t<value_type, _Np * 2>());
2269  }
2270  else if constexpr (_ToN == 8 * _Np)
2271  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2272  __vector_type_t<value_type, _Np * 4>());
2273  else if constexpr (_ToN == 16 * _Np)
2274  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2275  __vector_type_t<value_type, _Np * 8>());
2276  else
2277  __assert_unreachable<_Tp>();
2278  }
2279  };
2280 
2281 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2282  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2283  __zero_extend(_Tp __x)
2284  { return {__x}; }
2285 
2286 // }}}
2287 // __extract<_Np, By>{{{
2288 template <int _Offset,
2289  int _SplitBy,
2290  typename _Tp,
2291  typename _TVT = _VectorTraits<_Tp>,
2292  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2293  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2294  __extract(_Tp __in)
2295  {
2296  using value_type = typename _TVT::value_type;
2297 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2298  if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2299  {
2300  if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2301  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2302  else if constexpr (is_floating_point_v<value_type>)
2303  return __vector_bitcast<value_type>(
2304  _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2305  else
2306  return reinterpret_cast<_R>(
2307  _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2308  _Offset));
2309  }
2310  else
2311 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2312  {
2313 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2314  using _W = conditional_t<
2315  is_floating_point_v<value_type>, double,
2316  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2317  static_assert(sizeof(_R) % sizeof(_W) == 0);
2318  constexpr int __return_width = sizeof(_R) / sizeof(_W);
2319  using _Up = __vector_type_t<_W, __return_width>;
2320  const auto __x = __vector_bitcast<_W>(__in);
2321 #else
2322  constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2323  using _Up = _R;
2324  const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2325  = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2326 #endif
2327  constexpr int _O = _Offset * __return_width;
2328  return __call_with_subscripts<__return_width, _O>(
2329  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2330  return reinterpret_cast<_R>(_Up{__entries...});
2331  });
2332  }
2333  }
2334 
2335 // }}}
2336 // __lo/__hi64[z]{{{
2337 template <typename _Tp,
2338  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2339  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2340  __lo64(_Tp __x)
2341  {
2342  _R __r{};
2343  __builtin_memcpy(&__r, &__x, 8);
2344  return __r;
2345  }
2346 
2347 template <typename _Tp,
2348  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2349  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2350  __hi64(_Tp __x)
2351  {
2352  static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2353  _R __r{};
2354  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2355  return __r;
2356  }
2357 
2358 template <typename _Tp,
2359  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2360  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2361  __hi64z([[maybe_unused]] _Tp __x)
2362  {
2363  _R __r{};
2364  if constexpr (sizeof(_Tp) == 16)
2365  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2366  return __r;
2367  }
2368 
2369 // }}}
2370 // __lo/__hi128{{{
2371 template <typename _Tp>
2372  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2373  __lo128(_Tp __x)
2374  { return __extract<0, sizeof(_Tp) / 16>(__x); }
2375 
2376 template <typename _Tp>
2377  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2378  __hi128(_Tp __x)
2379  {
2380  static_assert(sizeof(__x) == 32);
2381  return __extract<1, 2>(__x);
2382  }
2383 
2384 // }}}
2385 // __lo/__hi256{{{
2386 template <typename _Tp>
2387  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2388  __lo256(_Tp __x)
2389  {
2390  static_assert(sizeof(__x) == 64);
2391  return __extract<0, 2>(__x);
2392  }
2393 
2394 template <typename _Tp>
2395  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2396  __hi256(_Tp __x)
2397  {
2398  static_assert(sizeof(__x) == 64);
2399  return __extract<1, 2>(__x);
2400  }
2401 
2402 // }}}
2403 // __auto_bitcast{{{
2404 template <typename _Tp>
2405  struct _AutoCast
2406  {
2407  static_assert(__is_vector_type_v<_Tp>);
2408 
2409  const _Tp __x;
2410 
2411  template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2412  _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2413  { return __intrin_bitcast<typename _UVT::type>(__x); }
2414  };
2415 
2416 template <typename _Tp>
2417  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2418  __auto_bitcast(const _Tp& __x)
2419  { return {__x}; }
2420 
2421 template <typename _Tp, size_t _Np>
2422  _GLIBCXX_SIMD_INTRINSIC constexpr
2423  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2424  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2425  { return {__x._M_data}; }
2426 
2427 // }}}
2428 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2429 
2430 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2431 // __bool_storage_member_type{{{
2432 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2433 template <size_t _Size>
2434  struct __bool_storage_member_type
2435  {
2436  static_assert((_Size & (_Size - 1)) != 0,
2437  "This trait may only be used for non-power-of-2 sizes. "
2438  "Power-of-2 sizes must be specialized.");
2439  using type =
2440  typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2441  };
2442 
2443 template <>
2444  struct __bool_storage_member_type<1> { using type = bool; };
2445 
2446 template <>
2447  struct __bool_storage_member_type<2> { using type = __mmask8; };
2448 
2449 template <>
2450  struct __bool_storage_member_type<4> { using type = __mmask8; };
2451 
2452 template <>
2453  struct __bool_storage_member_type<8> { using type = __mmask8; };
2454 
2455 template <>
2456  struct __bool_storage_member_type<16> { using type = __mmask16; };
2457 
2458 template <>
2459  struct __bool_storage_member_type<32> { using type = __mmask32; };
2460 
2461 template <>
2462  struct __bool_storage_member_type<64> { using type = __mmask64; };
2463 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2464 
2465 // }}}
2466 // __intrinsic_type (x86){{{
2467 // the following excludes bool via __is_vectorizable
2468 #if _GLIBCXX_SIMD_HAVE_SSE
2469 template <typename _Tp, size_t _Bytes>
2470  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2471  {
2472  static_assert(!is_same_v<_Tp, long double>,
2473  "no __intrinsic_type support for long double on x86");
2474 
2475  static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2476 
2477  using type [[__gnu__::__vector_size__(_S_VBytes)]]
2478  = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2479  };
2480 #endif // _GLIBCXX_SIMD_HAVE_SSE
2481 
2482 // }}}
2483 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2484 // __intrinsic_type (ARM){{{
2485 #if _GLIBCXX_SIMD_HAVE_NEON
2486 template <>
2487  struct __intrinsic_type<float, 8, void>
2488  { using type = float32x2_t; };
2489 
2490 template <>
2491  struct __intrinsic_type<float, 16, void>
2492  { using type = float32x4_t; };
2493 
2494 template <>
2495  struct __intrinsic_type<double, 8, void>
2496  {
2497 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2498  using type = float64x1_t;
2499 #endif
2500  };
2501 
2502 template <>
2503  struct __intrinsic_type<double, 16, void>
2504  {
2505 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2506  using type = float64x2_t;
2507 #endif
2508  };
2509 
2510 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2511 template <> \
2512  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2513  _Np * _Bits / 8, void> \
2514  { using type = int##_Bits##x##_Np##_t; }; \
2515 template <> \
2516  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2517  _Np * _Bits / 8, void> \
2518  { using type = uint##_Bits##x##_Np##_t; }
2519 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2520 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2521 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2522 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2523 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2524 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2525 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2526 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2527 #undef _GLIBCXX_SIMD_ARM_INTRIN
2528 
2529 template <typename _Tp, size_t _Bytes>
2530  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2531  {
2532  static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2533 
2534  using _Ip = __int_for_sizeof_t<_Tp>;
2535 
2536  using _Up = conditional_t<
2537  is_floating_point_v<_Tp>, _Tp,
2538  conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2539 
2540  static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2541  "should use explicit specialization above");
2542 
2543  using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2544  };
2545 #endif // _GLIBCXX_SIMD_HAVE_NEON
2546 
2547 // }}}
2548 // __intrinsic_type (PPC){{{
2549 #ifdef __ALTIVEC__
2550 template <typename _Tp>
2551  struct __intrinsic_type_impl;
2552 
2553 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2554  template <> \
2555  struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2556 _GLIBCXX_SIMD_PPC_INTRIN(float);
2557 #ifdef __VSX__
2558 _GLIBCXX_SIMD_PPC_INTRIN(double);
2559 #endif
2560 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2561 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2562 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2563 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2564 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2565 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2566 #if defined __VSX__ || __SIZEOF_LONG__ == 4
2567 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2568 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2569 #endif
2570 #ifdef __VSX__
2571 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2572 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2573 #endif
2574 #undef _GLIBCXX_SIMD_PPC_INTRIN
2575 
2576 template <typename _Tp, size_t _Bytes>
2577  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2578  {
2579  static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2580 
2581  // allow _Tp == long double with -mlong-double-64
2582  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2583  "no __intrinsic_type support for 128-bit floating point on PowerPC");
2584 
2585 #ifndef __VSX__
2586  static_assert(!(is_same_v<_Tp, double>
2587  || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2588  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2589 #endif
2590 
2591  static constexpr auto __element_type()
2592  {
2593  if constexpr (is_floating_point_v<_Tp>)
2594  {
2595  if constexpr (_S_is_ldouble)
2596  return double {};
2597  else
2598  return _Tp {};
2599  }
2600  else if constexpr (is_signed_v<_Tp>)
2601  {
2602  if constexpr (sizeof(_Tp) == sizeof(_SChar))
2603  return _SChar {};
2604  else if constexpr (sizeof(_Tp) == sizeof(short))
2605  return short {};
2606  else if constexpr (sizeof(_Tp) == sizeof(int))
2607  return int {};
2608  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2609  return _LLong {};
2610  }
2611  else
2612  {
2613  if constexpr (sizeof(_Tp) == sizeof(_UChar))
2614  return _UChar {};
2615  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2616  return _UShort {};
2617  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2618  return _UInt {};
2619  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2620  return _ULLong {};
2621  }
2622  }
2623 
2624  using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2625  };
2626 #endif // __ALTIVEC__
2627 
2628 // }}}
2629 // _SimdWrapper<bool>{{{1
2630 template <size_t _Width>
2631  struct _SimdWrapper<bool, _Width,
2632  void_t<typename __bool_storage_member_type<_Width>::type>>
2633  {
2634  using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2635  using value_type = bool;
2636 
2637  static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2638 
2639  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2640  __as_full_vector() const
2641  { return _M_data; }
2642 
2643  _GLIBCXX_SIMD_INTRINSIC constexpr
2644  _SimdWrapper() = default;
2645 
2646  _GLIBCXX_SIMD_INTRINSIC constexpr
2647  _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2648 
2649  _GLIBCXX_SIMD_INTRINSIC
2650  operator const _BuiltinType&() const
2651  { return _M_data; }
2652 
2653  _GLIBCXX_SIMD_INTRINSIC
2654  operator _BuiltinType&()
2655  { return _M_data; }
2656 
2657  _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2658  __intrin() const
2659  { return _M_data; }
2660 
2661  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2662  operator[](size_t __i) const
2663  { return _M_data & (_BuiltinType(1) << __i); }
2664 
2665  template <size_t __i>
2666  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2667  operator[](_SizeConstant<__i>) const
2668  { return _M_data & (_BuiltinType(1) << __i); }
2669 
2670  _GLIBCXX_SIMD_INTRINSIC constexpr void
2671  _M_set(size_t __i, value_type __x)
2672  {
2673  if (__x)
2674  _M_data |= (_BuiltinType(1) << __i);
2675  else
2676  _M_data &= ~(_BuiltinType(1) << __i);
2677  }
2678 
2679  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2680  _M_is_constprop() const
2681  { return __builtin_constant_p(_M_data); }
2682 
2683  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2684  _M_is_constprop_none_of() const
2685  {
2686  if (__builtin_constant_p(_M_data))
2687  {
2688  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2689  constexpr _BuiltinType __active_mask
2690  = ~_BuiltinType() >> (__nbits - _Width);
2691  return (_M_data & __active_mask) == 0;
2692  }
2693  return false;
2694  }
2695 
2696  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2697  _M_is_constprop_all_of() const
2698  {
2699  if (__builtin_constant_p(_M_data))
2700  {
2701  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2702  constexpr _BuiltinType __active_mask
2703  = ~_BuiltinType() >> (__nbits - _Width);
2704  return (_M_data & __active_mask) == __active_mask;
2705  }
2706  return false;
2707  }
2708 
2709  _BuiltinType _M_data;
2710  };
2711 
2712 // _SimdWrapperBase{{{1
2713 template <bool _MustZeroInitPadding, typename _BuiltinType>
2714  struct _SimdWrapperBase;
2715 
2716 template <typename _BuiltinType>
2717  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2718  {
2719  _GLIBCXX_SIMD_INTRINSIC constexpr
2720  _SimdWrapperBase() = default;
2721 
2722  _GLIBCXX_SIMD_INTRINSIC constexpr
2723  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2724 
2725  _BuiltinType _M_data;
2726  };
2727 
2728 template <typename _BuiltinType>
2729  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2730  // never become SNaN
2731  {
2732  _GLIBCXX_SIMD_INTRINSIC constexpr
2733  _SimdWrapperBase() : _M_data() {}
2734 
2735  _GLIBCXX_SIMD_INTRINSIC constexpr
2736  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2737 
2738  _BuiltinType _M_data;
2739  };
2740 
2741 // }}}
2742 // _SimdWrapper{{{
2743 struct _DisabledSimdWrapper;
2744 
2745 template <typename _Tp, size_t _Width>
2746  struct _SimdWrapper<
2747  _Tp, _Width,
2748  void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2749  : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2750  && sizeof(_Tp) * _Width
2751  == sizeof(__vector_type_t<_Tp, _Width>),
2752  __vector_type_t<_Tp, _Width>>
2753  {
2754  static constexpr bool _S_need_default_init
2755  = __has_iec559_behavior<__signaling_NaN, _Tp>::value
2756  and sizeof(_Tp) * _Width == sizeof(__vector_type_t<_Tp, _Width>);
2757 
2758  using _BuiltinType = __vector_type_t<_Tp, _Width>;
2759 
2760  using _Base = _SimdWrapperBase<_S_need_default_init, _BuiltinType>;
2761 
2762  static_assert(__is_vectorizable_v<_Tp>);
2763  static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2764 
2765  using value_type = _Tp;
2766 
2767  static inline constexpr size_t _S_full_size
2768  = sizeof(_BuiltinType) / sizeof(value_type);
2769  static inline constexpr int _S_size = _Width;
2770  static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2771 
2772  using _Base::_M_data;
2773 
2774  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2775  __as_full_vector() const
2776  { return _M_data; }
2777 
2778  _GLIBCXX_SIMD_INTRINSIC constexpr
2779  _SimdWrapper(initializer_list<_Tp> __init)
2780  : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2781  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2782  return __init.begin()[__i.value];
2783  })) {}
2784 
2785  _GLIBCXX_SIMD_INTRINSIC constexpr
2786  _SimdWrapper() = default;
2787 
2788  _GLIBCXX_SIMD_INTRINSIC constexpr
2789  _SimdWrapper(const _SimdWrapper&) = default;
2790 
2791  _GLIBCXX_SIMD_INTRINSIC constexpr
2792  _SimdWrapper(_SimdWrapper&&) = default;
2793 
2794  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2795  operator=(const _SimdWrapper&) = default;
2796 
2797  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2798  operator=(_SimdWrapper&&) = default;
2799 
2800  // Convert from exactly matching __vector_type_t
2801  using _SimdWrapperBase<_S_need_default_init, _BuiltinType>::_SimdWrapperBase;
2802 
2803  // Convert from __intrinsic_type_t if __intrinsic_type_t and __vector_type_t differ, otherwise
2804  // this ctor should not exist. Making the argument type unusable is our next best solution.
2805  _GLIBCXX_SIMD_INTRINSIC constexpr
2806  _SimdWrapper(conditional_t<is_same_v<_BuiltinType, __intrinsic_type_t<_Tp, _Width>>,
2807  _DisabledSimdWrapper, __intrinsic_type_t<_Tp, _Width>> __x)
2808  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2809 
2810  // Convert from different __vector_type_t, but only if bit reinterpretation is a correct
2811  // conversion of the value_type
2812  template <typename _V, typename _TVT = _VectorTraits<_V>,
2813  typename = enable_if_t<sizeof(typename _TVT::value_type) == sizeof(_Tp)
2814  and sizeof(_V) == sizeof(_BuiltinType)
2815  and is_integral_v<_Tp>
2816  and is_integral_v<typename _TVT::value_type>>>
2817  _GLIBCXX_SIMD_INTRINSIC constexpr
2818  _SimdWrapper(_V __x)
2819  : _Base(reinterpret_cast<_BuiltinType>(__x)) {}
2820 
2821  template <typename... _As,
2822  typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2823  && sizeof...(_As) <= _Width)>>
2824  _GLIBCXX_SIMD_INTRINSIC constexpr
2825  operator _SimdTuple<_Tp, _As...>() const
2826  {
2827  return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2828  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2829  { return _M_data[int(__i)]; });
2830  }
2831 
2832  _GLIBCXX_SIMD_INTRINSIC constexpr
2833  operator const _BuiltinType&() const
2834  { return _M_data; }
2835 
2836  _GLIBCXX_SIMD_INTRINSIC constexpr
2837  operator _BuiltinType&()
2838  { return _M_data; }
2839 
2840  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2841  operator[](size_t __i) const
2842  { return _M_data[__i]; }
2843 
2844  template <size_t __i>
2845  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2846  operator[](_SizeConstant<__i>) const
2847  { return _M_data[__i]; }
2848 
2849  _GLIBCXX_SIMD_INTRINSIC constexpr void
2850  _M_set(size_t __i, _Tp __x)
2851  {
2852  if (__builtin_is_constant_evaluated())
2853  _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2854  return __j == __i ? __x : _M_data[__j()];
2855  });
2856  else
2857  _M_data[__i] = __x;
2858  }
2859 
2860  _GLIBCXX_SIMD_INTRINSIC
2861  constexpr bool
2862  _M_is_constprop() const
2863  { return __builtin_constant_p(_M_data); }
2864 
2865  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2866  _M_is_constprop_none_of() const
2867  {
2868  if (__builtin_constant_p(_M_data))
2869  {
2870  bool __r = true;
2871  if constexpr (is_floating_point_v<_Tp>)
2872  {
2873  using _Ip = __int_for_sizeof_t<_Tp>;
2874  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2875  __execute_n_times<_Width>(
2876  [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2877  }
2878  else
2879  __execute_n_times<_Width>(
2880  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2881  if (__builtin_constant_p(__r))
2882  return __r;
2883  }
2884  return false;
2885  }
2886 
2887  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2888  _M_is_constprop_all_of() const
2889  {
2890  if (__builtin_constant_p(_M_data))
2891  {
2892  bool __r = true;
2893  if constexpr (is_floating_point_v<_Tp>)
2894  {
2895  using _Ip = __int_for_sizeof_t<_Tp>;
2896  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2897  __execute_n_times<_Width>(
2898  [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2899  }
2900  else
2901  __execute_n_times<_Width>(
2902  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2903  if (__builtin_constant_p(__r))
2904  return __r;
2905  }
2906  return false;
2907  }
2908  };
2909 
2910 // }}}
2911 
2912 // __vectorized_sizeof {{{
2913 template <typename _Tp>
2914  constexpr size_t
2915  __vectorized_sizeof()
2916  {
2917  if constexpr (!__is_vectorizable_v<_Tp>)
2918  return 0;
2919 
2920  if constexpr (sizeof(_Tp) <= 8)
2921  {
2922  // X86:
2923  if constexpr (__have_avx512bw)
2924  return 64;
2925  if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2926  return 64;
2927  if constexpr (__have_avx2)
2928  return 32;
2929  if constexpr (__have_avx && is_floating_point_v<_Tp>)
2930  return 32;
2931  if constexpr (__have_sse2)
2932  return 16;
2933  if constexpr (__have_sse && is_same_v<_Tp, float>)
2934  return 16;
2935  /* The following is too much trouble because of mixed MMX and x87 code.
2936  * While nothing here explicitly calls MMX instructions of registers,
2937  * they are still emitted but no EMMS cleanup is done.
2938  if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2939  return 8;
2940  */
2941 
2942  // PowerPC:
2943  if constexpr (__have_power8vec
2944  || (__have_power_vmx && (sizeof(_Tp) < 8))
2945  || (__have_power_vsx && is_floating_point_v<_Tp>) )
2946  return 16;
2947 
2948  // ARM:
2949  if constexpr (__have_neon_a64)
2950  return 16;
2951  if constexpr (__have_neon_a32 and (not is_floating_point_v<_Tp>
2952  or is_same_v<_Tp, float>))
2953  return 16;
2954  if constexpr (__have_neon
2955  && sizeof(_Tp) < 8
2956  // Only allow fp if the user allows non-ICE559 fp (e.g.
2957  // via -ffast-math). ARMv7 NEON fp is not conforming to
2958  // IEC559.
2959  && (__support_neon_float || !is_floating_point_v<_Tp>))
2960  return 16;
2961  }
2962 
2963  return sizeof(_Tp);
2964  }
2965 
2966 // }}}
2967 namespace simd_abi {
2968 // most of simd_abi is defined in simd_detail.h
2969 template <typename _Tp>
2970  inline constexpr int max_fixed_size
2971  = ((__have_avx512bw && sizeof(_Tp) == 1)
2972  || (__have_sve && __sve_vectorized_size_bytes/sizeof(_Tp) >= 64)) ? 64 : 32;
2973 
2974 // compatible {{{
2975 #if defined __x86_64__ || defined __aarch64__
2976 template <typename _Tp>
2977  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2978 #elif defined __ARM_NEON
2979 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2980 // ABI?)
2981 template <typename _Tp>
2982  using compatible
2983  = conditional_t<(sizeof(_Tp) < 8
2984  && (__support_neon_float || !is_floating_point_v<_Tp>)),
2985  _VecBuiltin<16>, scalar>;
2986 #else
2987 template <typename>
2988  using compatible = scalar;
2989 #endif
2990 
2991 // }}}
2992 // native {{{
2993 template <typename _Tp>
2994  constexpr auto
2995  __determine_native_abi()
2996  {
2997  constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2998  if constexpr (__bytes == sizeof(_Tp))
2999  return static_cast<scalar*>(nullptr);
3000  else if constexpr (__have_sve)
3001  return static_cast<_SveAbi<__sve_vectorized_size_bytes>*>(nullptr);
3002  else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
3003  return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
3004  else
3005  return static_cast<_VecBuiltin<__bytes>*>(nullptr);
3006  }
3007 
3008 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
3009  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
3010 
3011 // }}}
3012 // __default_abi {{{
3013 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
3014 template <typename _Tp>
3015  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
3016 #else
3017 template <typename _Tp>
3018  using __default_abi = compatible<_Tp>;
3019 #endif
3020 
3021 // }}}
3022 } // namespace simd_abi
3023 
3024 // traits {{{1
3025 template <typename _Tp>
3026  struct is_simd_flag_type
3027  : false_type
3028  {};
3029 
3030 template <>
3031  struct is_simd_flag_type<element_aligned_tag>
3032  : true_type
3033  {};
3034 
3035 template <>
3036  struct is_simd_flag_type<vector_aligned_tag>
3037  : true_type
3038  {};
3039 
3040 template <size_t _Np>
3041  struct is_simd_flag_type<overaligned_tag<_Np>>
3042  : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
3043  {};
3044 
3045 template <typename _Tp>
3046  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
3047 
3048 template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
3049  using _IsSimdFlagType = _Tp;
3050 
3051 // is_abi_tag {{{2
3052 template <typename _Tp, typename = void_t<>>
3053  struct is_abi_tag : false_type {};
3054 
3055 template <typename _Tp>
3056  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
3057  : public _Tp::_IsValidAbiTag {};
3058 
3059 template <typename _Tp>
3060  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
3061 
3062 // is_simd(_mask) {{{2
3063 template <typename _Tp>
3064  struct is_simd : public false_type {};
3065 
3066 template <typename _Tp>
3067  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
3068 
3069 template <typename _Tp>
3070  struct is_simd_mask : public false_type {};
3071 
3072 template <typename _Tp>
3073 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
3074 
3075 // simd_size {{{2
3076 template <typename _Tp, typename _Abi, typename = void>
3077  struct __simd_size_impl {};
3078 
3079 template <typename _Tp, typename _Abi>
3080  struct __simd_size_impl<
3081  _Tp, _Abi,
3082  enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
3083  : _SizeConstant<_Abi::template _S_size<_Tp>> {};
3084 
3085 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3086  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
3087 
3088 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3089  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
3090 
3091 // simd_abi::deduce {{{2
3092 template <typename _Tp, size_t _Np, typename = void>
3093  struct __deduce_impl;
3094 
3095 template <typename _Tp, size_t _Np, typename = void>
3096  struct __no_sve_deduce_impl;
3097 
3098 namespace simd_abi {
3099 /**
3100  * @tparam _Tp The requested `value_type` for the elements.
3101  * @tparam _Np The requested number of elements.
3102  * @tparam _Abis This parameter is ignored, since this implementation cannot
3103  * make any use of it. Either __a good native ABI is matched and used as `type`
3104  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
3105  * the best matching native ABIs.
3106  */
3107 template <typename _Tp, size_t _Np, typename...>
3108  struct deduce : __deduce_impl<_Tp, _Np> {};
3109 
3110 template <typename _Tp, size_t _Np, typename... _Abis>
3111  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
3112 
3113 template <typename _Tp, size_t _Np, typename...>
3114  struct __no_sve_deduce : __no_sve_deduce_impl<_Tp, _Np> {};
3115 
3116 template <typename _Tp, size_t _Np, typename... _Abis>
3117  using __no_sve_deduce_t = typename __no_sve_deduce<_Tp, _Np, _Abis...>::type;
3118 } // namespace simd_abi
3119 
3120 // }}}2
3121 // rebind_simd {{{2
3122 template <typename _Tp, typename _V, typename = void>
3123  struct rebind_simd;
3124 
3125 template <typename _Tp, typename _Up, typename _Abi>
3126  struct rebind_simd<_Tp, simd<_Up, _Abi>,
3127  void_t<std::conditional_t<!__is_sve_abi<_Abi>(),
3128  simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3129  simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>>
3130  {
3131  using type = simd<_Tp, std::conditional_t<
3132  !__is_sve_abi<_Abi>(),
3133  simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3134  simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>;
3135  };
3136 
3137 template <typename _Tp, typename _Up, typename _Abi>
3138  struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
3139  void_t<std::conditional_t<!__is_sve_abi<_Abi>(),
3140  simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3141  simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>>
3142  {
3143  using type = simd_mask<_Tp, std::conditional_t<
3144  !__is_sve_abi<_Abi>(),
3145  simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>,
3146  simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>;
3147  };
3148 
3149 template <typename _Tp, typename _V>
3150  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
3151 
3152 // resize_simd {{{2
3153 template <int _Np, typename _V, typename = void>
3154  struct resize_simd;
3155 
3156 template <int _Np, typename _Tp, typename _Abi>
3157  struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3158  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3159 
3160 template <int _Np, typename _Tp, typename _Abi>
3161  struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3162  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3163 
3164 template <int _Np, typename _V>
3165  using resize_simd_t = typename resize_simd<_Np, _V>::type;
3166 
3167 // }}}2
3168 // memory_alignment {{{2
3169 template <typename _Tp, typename _Up = typename _Tp::value_type>
3170  struct memory_alignment
3171  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3172 
3173 template <typename _Tp, typename _Up = typename _Tp::value_type>
3174  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3175 
3176 // class template simd [simd] {{{1
3177 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3178  class simd;
3179 
3180 template <typename _Tp, typename _Abi>
3181  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3182 
3183 template <typename _Tp>
3184  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3185 
3186 template <typename _Tp, int _Np>
3187  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3188 
3189 template <typename _Tp, size_t _Np>
3190  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3191 
3192 // class template simd_mask [simd_mask] {{{1
3193 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3194  class simd_mask;
3195 
3196 template <typename _Tp, typename _Abi>
3197  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3198 
3199 template <typename _Tp>
3200  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3201 
3202 template <typename _Tp, int _Np>
3203  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3204 
3205 template <typename _Tp, size_t _Np>
3206  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3207 
3208 // casts [simd.casts] {{{1
3209 // static_simd_cast {{{2
3210 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3211  struct __static_simd_cast_return_type;
3212 
3213 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3214  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3215  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3216 
3217 template <typename _Tp, typename _Up, typename _Ap>
3218  struct __static_simd_cast_return_type<
3219  _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3220  { using type = _Tp; };
3221 
3222 template <typename _Tp, typename _Ap>
3223  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3224 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3225  enable_if_t<__is_vectorizable_v<_Tp>>
3226 #else
3227  void
3228 #endif
3229  >
3230  { using type = simd<_Tp, _Ap>; };
3231 
3232 template <typename _Tp, typename = void>
3233  struct __safe_make_signed { using type = _Tp;};
3234 
3235 template <typename _Tp>
3236  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3237  {
3238  // the extra make_unsigned_t is because of PR85951
3239  using type = make_signed_t<make_unsigned_t<_Tp>>;
3240  };
3241 
3242 template <typename _Tp>
3243  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3244 
3245 template <typename _Tp, typename _Up, typename _Ap>
3246  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3247 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3248  enable_if_t<__is_vectorizable_v<_Tp>>
3249 #else
3250  void
3251 #endif
3252  >
3253  {
3254  using type = conditional_t<
3255  (is_integral_v<_Up> && is_integral_v<_Tp> &&
3256 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3257  is_signed_v<_Up> != is_signed_v<_Tp> &&
3258 #endif
3259  is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3260  simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3261  };
3262 
3263 template <typename _Tp, typename _Up, typename _Ap,
3264  typename _R
3265  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3266  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3267  static_simd_cast(const simd<_Up, _Ap>& __x)
3268  {
3269  if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3270  return __x;
3271  else
3272  {
3273  _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3274  __c;
3275  return _R(__private_init, __c(__data(__x)));
3276  }
3277  }
3278 
3279 namespace __proposed {
3280 template <typename _Tp, typename _Up, typename _Ap,
3281  typename _R
3282  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3283  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3284  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3285  {
3286  using _RM = typename _R::mask_type;
3287  return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3288  typename _RM::simd_type::value_type>(__x)};
3289  }
3290 
3291 template <typename _To, typename _Up, typename _Abi>
3292  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3293  _To
3294  simd_bit_cast(const simd<_Up, _Abi>& __x)
3295  {
3296  using _Tp = typename _To::value_type;
3297  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3298  using _From = simd<_Up, _Abi>;
3299  using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3300  // with concepts, the following should be constraints
3301  static_assert(sizeof(_To) == sizeof(_From));
3302  static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3303  static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3304 #if __has_builtin(__builtin_bit_cast)
3305  return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3306 #else
3307  return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3308 #endif
3309  }
3310 
3311 template <typename _To, typename _Up, typename _Abi>
3312  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3313  _To
3314  simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3315  {
3316  using _From = simd_mask<_Up, _Abi>;
3317  static_assert(sizeof(_To) == sizeof(_From));
3318  static_assert(is_trivially_copyable_v<_From>);
3319  // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3320  // copyable.
3321  if constexpr (is_simd_v<_To>)
3322  {
3323  using _Tp = typename _To::value_type;
3324  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3325  static_assert(is_trivially_copyable_v<_ToMember>);
3326 #if __has_builtin(__builtin_bit_cast)
3327  return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3328 #else
3329  return {__private_init, __bit_cast<_ToMember>(__x)};
3330 #endif
3331  }
3332  else
3333  {
3334  static_assert(is_trivially_copyable_v<_To>);
3335 #if __has_builtin(__builtin_bit_cast)
3336  return __builtin_bit_cast(_To, __x);
3337 #else
3338  return __bit_cast<_To>(__x);
3339 #endif
3340  }
3341  }
3342 } // namespace __proposed
3343 
3344 // simd_cast {{{2
3345 template <typename _Tp, typename _Up, typename _Ap,
3346  typename _To = __value_type_or_identity_t<_Tp>>
3347  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3348  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3349  -> decltype(static_simd_cast<_Tp>(__x))
3350  { return static_simd_cast<_Tp>(__x); }
3351 
3352 namespace __proposed {
3353 template <typename _Tp, typename _Up, typename _Ap,
3354  typename _To = __value_type_or_identity_t<_Tp>>
3355  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3356  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3357  -> decltype(static_simd_cast<_Tp>(__x))
3358  { return static_simd_cast<_Tp>(__x); }
3359 } // namespace __proposed
3360 
3361 // }}}2
3362 // resizing_simd_cast {{{
3363 namespace __proposed {
3364 /* Proposed spec:
3365 
3366 template <class T, class U, class Abi>
3367 T resizing_simd_cast(const simd<U, Abi>& x)
3368 
3369 p1 Constraints:
3370  - is_simd_v<T> is true and
3371  - T::value_type is the same type as U
3372 
3373 p2 Returns:
3374  A simd object with the i^th element initialized to x[i] for all i in the
3375  range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3376  than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3377 
3378 template <class T, class U, class Abi>
3379 T resizing_simd_cast(const simd_mask<U, Abi>& x)
3380 
3381 p1 Constraints: is_simd_mask_v<T> is true
3382 
3383 p2 Returns:
3384  A simd_mask object with the i^th element initialized to x[i] for all i in
3385 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3386  than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3387 
3388  */
3389 
3390 template <typename _Tp, typename _Up, typename _Ap>
3391  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3392  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3393  resizing_simd_cast(const simd<_Up, _Ap>& __x)
3394  {
3395  if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3396  return __x;
3397  else if (__builtin_is_constant_evaluated())
3398  return _Tp([&](auto __i) constexpr {
3399  return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3400  });
3401  else if constexpr (simd_size_v<_Up, _Ap> == 1)
3402  {
3403  _Tp __r{};
3404  __r[0] = __x[0];
3405  return __r;
3406  }
3407  else if constexpr (_Tp::size() == 1)
3408  return __x[0];
3409  else if constexpr (sizeof(_Tp) == sizeof(__x)
3410  && !__is_fixed_size_abi_v<_Ap> && !__is_sve_abi<_Ap>())
3411  return {__private_init,
3412  __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3413  _Ap::_S_masked(__data(__x))._M_data)};
3414  else
3415  {
3416  _Tp __r{};
3417  __builtin_memcpy(&__data(__r), &__data(__x),
3418  sizeof(_Up)
3419  * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3420  return __r;
3421  }
3422  }
3423 
3424 template <typename _Tp, typename _Up, typename _Ap>
3425  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3426  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3427  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3428  {
3429  return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3430  typename _Tp::simd_type::value_type>(__x)};
3431  }
3432 } // namespace __proposed
3433 
3434 // }}}
3435 // to_fixed_size {{{2
3436 template <typename _Tp, int _Np>
3437  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3438  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3439  { return __x; }
3440 
3441 template <typename _Tp, int _Np>
3442  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3443  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3444  { return __x; }
3445 
3446 template <typename _Tp, typename _Ap>
3447  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3448  to_fixed_size(const simd<_Tp, _Ap>& __x)
3449  {
3450  using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3451  return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3452  }
3453 
3454 template <typename _Tp, typename _Ap>
3455  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3456  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3457  {
3458  return {__private_init,
3459  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3460  }
3461 
3462 // to_native {{{2
3463 template <typename _Tp, int _Np>
3464  _GLIBCXX_SIMD_INTRINSIC
3465  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3466  to_native(const fixed_size_simd<_Tp, _Np>& __x)
3467  {
3468  alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3469  __x.copy_to(__mem, vector_aligned);
3470  return {__mem, vector_aligned};
3471  }
3472 
3473 template <typename _Tp, int _Np>
3474  _GLIBCXX_SIMD_INTRINSIC
3475  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3476  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3477  {
3478  return native_simd_mask<_Tp>(
3479  __private_init,
3480  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3481  }
3482 
3483 // to_compatible {{{2
3484 template <typename _Tp, int _Np>
3485  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3486  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3487  {
3488  alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3489  __x.copy_to(__mem, vector_aligned);
3490  return {__mem, vector_aligned};
3491  }
3492 
3493 template <typename _Tp, int _Np>
3494  _GLIBCXX_SIMD_INTRINSIC
3495  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3496  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3497  {
3498  return simd_mask<_Tp>(
3499  __private_init,
3500  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3501  }
3502 
3503 // masked assignment [simd_mask.where] {{{1
3504 
3505 // where_expression {{{1
3506 // const_where_expression<M, T> {{{2
3507 template <typename _M, typename _Tp>
3508  class const_where_expression
3509  {
3510  using _V = _Tp;
3511  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3512 
3513  struct _Wrapper { using value_type = _V; };
3514 
3515  protected:
3516  using _Impl = typename _V::_Impl;
3517 
3518  using value_type =
3519  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3520 
3521  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3522  __get_mask(const const_where_expression& __x)
3523  { return __x._M_k; }
3524 
3525  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3526  __get_lvalue(const const_where_expression& __x)
3527  { return __x._M_value; }
3528 
3529  const _M& _M_k;
3530  _Tp& _M_value;
3531 
3532  public:
3533  const_where_expression(const const_where_expression&) = delete;
3534 
3535  const_where_expression& operator=(const const_where_expression&) = delete;
3536 
3537  _GLIBCXX_SIMD_INTRINSIC constexpr
3538  const_where_expression(const _M& __kk, const _Tp& dd)
3539  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3540 
3541  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3542  operator-() const&&
3543  {
3544  return {__private_init,
3545  _Impl::template _S_masked_unary<negate>(__data(_M_k),
3546  __data(_M_value))};
3547  }
3548 
3549  template <typename _Up, typename _Flags>
3550  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3551  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3552  {
3553  return {__private_init,
3554  _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3555  _Flags::template _S_apply<_V>(__mem))};
3556  }
3557 
3558  template <typename _Up, typename _Flags>
3559  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3560  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3561  {
3562  _Impl::_S_masked_store(__data(_M_value),
3563  _Flags::template _S_apply<_V>(__mem),
3564  __data(_M_k));
3565  }
3566  };
3567 
3568 // const_where_expression<bool, T> {{{2
3569 template <typename _Tp>
3570  class const_where_expression<bool, _Tp>
3571  {
3572  using _M = bool;
3573  using _V = _Tp;
3574 
3575  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3576 
3577  struct _Wrapper { using value_type = _V; };
3578 
3579  protected:
3580  using value_type
3581  = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3582 
3583  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3584  __get_mask(const const_where_expression& __x)
3585  { return __x._M_k; }
3586 
3587  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3588  __get_lvalue(const const_where_expression& __x)
3589  { return __x._M_value; }
3590 
3591  const bool _M_k;
3592  _Tp& _M_value;
3593 
3594  public:
3595  const_where_expression(const const_where_expression&) = delete;
3596  const_where_expression& operator=(const const_where_expression&) = delete;
3597 
3598  _GLIBCXX_SIMD_INTRINSIC constexpr
3599  const_where_expression(const bool __kk, const _Tp& dd)
3600  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3601 
3602  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3603  operator-() const&&
3604  { return _M_k ? -_M_value : _M_value; }
3605 
3606  template <typename _Up, typename _Flags>
3607  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3608  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3609  { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3610 
3611  template <typename _Up, typename _Flags>
3612  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3613  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3614  {
3615  if (_M_k)
3616  __mem[0] = _M_value;
3617  }
3618  };
3619 
3620 // where_expression<M, T> {{{2
3621 template <typename _M, typename _Tp>
3622  class where_expression : public const_where_expression<_M, _Tp>
3623  {
3624  using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3625 
3626  static_assert(!is_const<_Tp>::value,
3627  "where_expression may only be instantiated with __a non-const "
3628  "_Tp parameter");
3629 
3630  using typename const_where_expression<_M, _Tp>::value_type;
3631  using const_where_expression<_M, _Tp>::_M_k;
3632  using const_where_expression<_M, _Tp>::_M_value;
3633 
3634  static_assert(
3635  is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3636  static_assert(_M::size() == _Tp::size(), "");
3637 
3638  _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3639  __get_lvalue(where_expression& __x)
3640  { return __x._M_value; }
3641 
3642  public:
3643  where_expression(const where_expression&) = delete;
3644  where_expression& operator=(const where_expression&) = delete;
3645 
3646  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3647  where_expression(const _M& __kk, _Tp& dd)
3648  : const_where_expression<_M, _Tp>(__kk, dd) {}
3649 
3650  template <typename _Up>
3651  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3652  operator=(_Up&& __x) &&
3653  {
3654  _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3655  __to_value_type_or_member_type<_Tp>(
3656  static_cast<_Up&&>(__x)));
3657  }
3658 
3659 #define _GLIBCXX_SIMD_OP_(__op, __name) \
3660  template <typename _Up> \
3661  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3662  operator __op##=(_Up&& __x)&& \
3663  { \
3664  _Impl::template _S_masked_cassign( \
3665  __data(_M_k), __data(_M_value), \
3666  __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3667  [](auto __impl, auto __lhs, auto __rhs) \
3668  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3669  { return __impl.__name(__lhs, __rhs); }); \
3670  } \
3671  static_assert(true)
3672  _GLIBCXX_SIMD_OP_(+, _S_plus);
3673  _GLIBCXX_SIMD_OP_(-, _S_minus);
3674  _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3675  _GLIBCXX_SIMD_OP_(/, _S_divides);
3676  _GLIBCXX_SIMD_OP_(%, _S_modulus);
3677  _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3678  _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3679  _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3680  _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3681  _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3682 #undef _GLIBCXX_SIMD_OP_
3683 
3684  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3685  operator++() &&
3686  {
3687  __data(_M_value)
3688  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3689  }
3690 
3691  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3692  operator++(int) &&
3693  {
3694  __data(_M_value)
3695  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3696  }
3697 
3698  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3699  operator--() &&
3700  {
3701  __data(_M_value)
3702  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3703  }
3704 
3705  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3706  operator--(int) &&
3707  {
3708  __data(_M_value)
3709  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3710  }
3711 
3712  // intentionally hides const_where_expression::copy_from
3713  template <typename _Up, typename _Flags>
3714  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3715  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3716  {
3717  __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3718  _Flags::template _S_apply<_Tp>(__mem));
3719  }
3720  };
3721 
3722 // where_expression<bool, T> {{{2
3723 template <typename _Tp>
3724  class where_expression<bool, _Tp>
3725  : public const_where_expression<bool, _Tp>
3726  {
3727  using _M = bool;
3728  using typename const_where_expression<_M, _Tp>::value_type;
3729  using const_where_expression<_M, _Tp>::_M_k;
3730  using const_where_expression<_M, _Tp>::_M_value;
3731 
3732  public:
3733  where_expression(const where_expression&) = delete;
3734  where_expression& operator=(const where_expression&) = delete;
3735 
3736  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3737  where_expression(const _M& __kk, _Tp& dd)
3738  : const_where_expression<_M, _Tp>(__kk, dd) {}
3739 
3740 #define _GLIBCXX_SIMD_OP_(__op) \
3741  template <typename _Up> \
3742  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3743  operator __op(_Up&& __x)&& \
3744  { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3745 
3746  _GLIBCXX_SIMD_OP_(=)
3747  _GLIBCXX_SIMD_OP_(+=)
3748  _GLIBCXX_SIMD_OP_(-=)
3749  _GLIBCXX_SIMD_OP_(*=)
3750  _GLIBCXX_SIMD_OP_(/=)
3751  _GLIBCXX_SIMD_OP_(%=)
3752  _GLIBCXX_SIMD_OP_(&=)
3753  _GLIBCXX_SIMD_OP_(|=)
3754  _GLIBCXX_SIMD_OP_(^=)
3755  _GLIBCXX_SIMD_OP_(<<=)
3756  _GLIBCXX_SIMD_OP_(>>=)
3757  #undef _GLIBCXX_SIMD_OP_
3758 
3759  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3760  operator++() &&
3761  { if (_M_k) ++_M_value; }
3762 
3763  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3764  operator++(int) &&
3765  { if (_M_k) ++_M_value; }
3766 
3767  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3768  operator--() &&
3769  { if (_M_k) --_M_value; }
3770 
3771  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3772  operator--(int) &&
3773  { if (_M_k) --_M_value; }
3774 
3775  // intentionally hides const_where_expression::copy_from
3776  template <typename _Up, typename _Flags>
3777  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3778  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3779  { if (_M_k) _M_value = __mem[0]; }
3780  };
3781 
3782 // where {{{1
3783 template <typename _Tp, typename _Ap>
3784  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3785  where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3786  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3787  { return {__k, __value}; }
3788 
3789 template <typename _Tp, typename _Ap>
3790  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3791  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3792  where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3793  { return {__k, __value}; }
3794 
3795 template <typename _Tp, typename _Ap>
3796  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3797  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3798  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3799  { return {__k, __value}; }
3800 
3801 template <typename _Tp, typename _Ap>
3802  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3803  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3804  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3805  { return {__k, __value}; }
3806 
3807 template <typename _Tp>
3808  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3809  where(_ExactBool __k, _Tp& __value)
3810  { return {__k, __value}; }
3811 
3812 template <typename _Tp>
3813  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3814  where(_ExactBool __k, const _Tp& __value)
3815  { return {__k, __value}; }
3816 
3817 template <typename _Tp, typename _Ap>
3818  _GLIBCXX_SIMD_CONSTEXPR void
3819  where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3820 
3821 template <typename _Tp, typename _Ap>
3822  _GLIBCXX_SIMD_CONSTEXPR void
3823  where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3824 
3825 // proposed mask iterations {{{1
3826 namespace __proposed {
3827 template <size_t _Np>
3828  class where_range
3829  {
3830  const bitset<_Np> __bits;
3831 
3832  public:
3833  where_range(bitset<_Np> __b) : __bits(__b) {}
3834 
3835  class iterator
3836  {
3837  size_t __mask;
3838  size_t __bit;
3839 
3840  _GLIBCXX_SIMD_INTRINSIC void
3841  __next_bit()
3842  { __bit = __builtin_ctzl(__mask); }
3843 
3844  _GLIBCXX_SIMD_INTRINSIC void
3845  __reset_lsb()
3846  {
3847  // 01100100 - 1 = 01100011
3848  __mask &= (__mask - 1);
3849  // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3850  }
3851 
3852  public:
3853  iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3854  iterator(const iterator&) = default;
3855  iterator(iterator&&) = default;
3856 
3857  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3858  operator->() const
3859  { return __bit; }
3860 
3861  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3862  operator*() const
3863  { return __bit; }
3864 
3865  _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3866  operator++()
3867  {
3868  __reset_lsb();
3869  __next_bit();
3870  return *this;
3871  }
3872 
3873  _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3874  operator++(int)
3875  {
3876  iterator __tmp = *this;
3877  __reset_lsb();
3878  __next_bit();
3879  return __tmp;
3880  }
3881 
3882  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3883  operator==(const iterator& __rhs) const
3884  { return __mask == __rhs.__mask; }
3885 
3886  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3887  operator!=(const iterator& __rhs) const
3888  { return __mask != __rhs.__mask; }
3889  };
3890 
3891  iterator
3892  begin() const
3893  { return __bits.to_ullong(); }
3894 
3895  iterator
3896  end() const
3897  { return 0; }
3898  };
3899 
3900 template <typename _Tp, typename _Ap>
3901  where_range<simd_size_v<_Tp, _Ap>>
3902  where(const simd_mask<_Tp, _Ap>& __k)
3903  { return __k.__to_bitset(); }
3904 
3905 } // namespace __proposed
3906 
3907 // }}}1
3908 // reductions [simd.reductions] {{{1
3909 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3910  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3911  reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3912  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3913 
3914 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3915  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3916  reduce(const const_where_expression<_M, _V>& __x,
3917  typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3918  {
3919  if (__builtin_expect(none_of(__get_mask(__x)), false))
3920  return __identity_element;
3921 
3922  _V __tmp = __identity_element;
3923  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3924  __data(__get_lvalue(__x)));
3925  return reduce(__tmp, __binary_op);
3926  }
3927 
3928 template <typename _M, typename _V>
3929  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3930  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3931  { return reduce(__x, 0, __binary_op); }
3932 
3933 template <typename _M, typename _V>
3934  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3935  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3936  { return reduce(__x, 1, __binary_op); }
3937 
3938 template <typename _M, typename _V>
3939  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3940  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3941  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3942 
3943 template <typename _M, typename _V>
3944  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3945  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3946  { return reduce(__x, 0, __binary_op); }
3947 
3948 template <typename _M, typename _V>
3949  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3950  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3951  { return reduce(__x, 0, __binary_op); }
3952 
3953 template <typename _Tp, typename _Abi>
3954  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3955  hmin(const simd<_Tp, _Abi>& __v) noexcept
3956  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3957 
3958 template <typename _Tp, typename _Abi>
3959  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3960  hmax(const simd<_Tp, _Abi>& __v) noexcept
3961  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3962 
3963 template <typename _M, typename _V>
3964  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3965  typename _V::value_type
3966  hmin(const const_where_expression<_M, _V>& __x) noexcept
3967  {
3968  using _Tp = typename _V::value_type;
3969  constexpr _Tp __id_elem =
3970 #ifdef __FINITE_MATH_ONLY__
3971  __finite_max_v<_Tp>;
3972 #else
3973  __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3974 #endif
3975  _V __tmp = __id_elem;
3976  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3977  __data(__get_lvalue(__x)));
3978  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3979  }
3980 
3981 template <typename _M, typename _V>
3982  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3983  typename _V::value_type
3984  hmax(const const_where_expression<_M, _V>& __x) noexcept
3985  {
3986  using _Tp = typename _V::value_type;
3987  constexpr _Tp __id_elem =
3988 #ifdef __FINITE_MATH_ONLY__
3989  __finite_min_v<_Tp>;
3990 #else
3991  [] {
3992  if constexpr (__value_exists_v<__infinity, _Tp>)
3993  return -__infinity_v<_Tp>;
3994  else
3995  return __finite_min_v<_Tp>;
3996  }();
3997 #endif
3998  _V __tmp = __id_elem;
3999  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
4000  __data(__get_lvalue(__x)));
4001  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
4002  }
4003 
4004 // }}}1
4005 // algorithms [simd.alg] {{{
4006 template <typename _Tp, typename _Ap>
4007  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
4008  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
4009  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
4010 
4011 template <typename _Tp, typename _Ap>
4012  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
4013  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
4014  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
4015 
4016 template <typename _Tp, typename _Ap>
4017  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4018  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
4019  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
4020  {
4021  const auto pair_of_members
4022  = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
4023  return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
4024  simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
4025  }
4026 
4027 template <typename _Tp, typename _Ap>
4028  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
4029  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
4030  {
4031  using _Impl = typename _Ap::_SimdImpl;
4032  return {__private_init,
4033  _Impl::_S_min(__data(__hi),
4034  _Impl::_S_max(__data(__lo), __data(__v)))};
4035  }
4036 
4037 // }}}
4038 
4039 template <size_t... _Sizes, typename _Tp, typename _Ap,
4040  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
4041  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4042  split(const simd<_Tp, _Ap>&);
4043 
4044 // __extract_part {{{
4045 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
4046  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
4047  conditional_t<_Np == _Total and _Combine == 1, _Tp, _SimdWrapper<_Tp, _Np / _Total * _Combine>>
4048  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
4049 
4050 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
4051  _GLIBCXX_SIMD_INTRINSIC constexpr auto
4052  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
4053 
4054 // }}}
4055 // _SizeList {{{
4056 template <size_t _V0, size_t... _Values>
4057  struct _SizeList
4058  {
4059  template <size_t _I>
4060  static constexpr size_t
4061  _S_at(_SizeConstant<_I> = {})
4062  {
4063  if constexpr (_I == 0)
4064  return _V0;
4065  else
4066  return _SizeList<_Values...>::template _S_at<_I - 1>();
4067  }
4068 
4069  template <size_t _I>
4070  static constexpr auto
4071  _S_before(_SizeConstant<_I> = {})
4072  {
4073  if constexpr (_I == 0)
4074  return _SizeConstant<0>();
4075  else
4076  return _SizeConstant<
4077  _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
4078  }
4079 
4080  template <size_t _Np>
4081  static constexpr auto
4082  _S_pop_front(_SizeConstant<_Np> = {})
4083  {
4084  if constexpr (_Np == 0)
4085  return _SizeList();
4086  else
4087  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
4088  }
4089  };
4090 
4091 // }}}
4092 // __extract_center {{{
4093 template <typename _Tp, size_t _Np>
4094  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
4095  __extract_center(_SimdWrapper<_Tp, _Np> __x)
4096  {
4097  static_assert(_Np >= 4);
4098  static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
4099 #if _GLIBCXX_SIMD_X86INTRIN // {{{
4100  if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
4101  {
4102  const auto __intrin = __to_intrin(__x);
4103  if constexpr (is_integral_v<_Tp>)
4104  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
4105  _mm512_shuffle_i32x4(__intrin, __intrin,
4106  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4107  else if constexpr (sizeof(_Tp) == 4)
4108  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
4109  _mm512_shuffle_f32x4(__intrin, __intrin,
4110  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4111  else if constexpr (sizeof(_Tp) == 8)
4112  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
4113  _mm512_shuffle_f64x2(__intrin, __intrin,
4114  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4115  else
4116  __assert_unreachable<_Tp>();
4117  }
4118  else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
4119  return __vector_bitcast<_Tp>(
4120  _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
4121  __hi128(__vector_bitcast<double>(__x)), 1));
4122  else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
4123  return __vector_bitcast<_Tp>(
4124  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
4125  __lo128(__vector_bitcast<_LLong>(__x)),
4126  sizeof(_Tp) * _Np / 4));
4127  else
4128 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
4129  {
4130  __vector_type_t<_Tp, _Np / 2> __r;
4131  __builtin_memcpy(&__r,
4132  reinterpret_cast<const char*>(&__x)
4133  + sizeof(_Tp) * _Np / 4,
4134  sizeof(_Tp) * _Np / 2);
4135  return __r;
4136  }
4137  }
4138 
4139 template <typename _Tp, typename _A0, typename... _As>
4140  _GLIBCXX_SIMD_INTRINSIC
4141  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
4142  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
4143  {
4144  if constexpr (sizeof...(_As) == 0)
4145  return __extract_center(__x.first);
4146  else
4147  return __extract_part<1, 4, 2>(__x);
4148  }
4149 
4150 // }}}
4151 // __split_wrapper {{{
4152 template <size_t... _Sizes, typename _Tp, typename... _As>
4153  auto
4154  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
4155  {
4156  return split<_Sizes...>(
4157  fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4158  __x));
4159  }
4160 
4161 // }}}
4162 
4163 // split<simd>(simd) {{{
4164 template <typename _V, typename _Ap,
4165  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4166  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4167  && is_simd_v<_V>, array<_V, _Parts>>
4168  split(const simd<typename _V::value_type, _Ap>& __x)
4169  {
4170  using _Tp = typename _V::value_type;
4171 
4172  auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4173  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4174  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4175  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4176  { return __x[__i * _V::size() + __j]; });
4177  });
4178  };
4179 
4180  if constexpr (_Parts == 1)
4181  {
4182  return {simd_cast<_V>(__x)};
4183  }
4184  else if (__x._M_is_constprop())
4185  {
4186  return __gen_fallback();
4187  }
4188 #if _GLIBCXX_SIMD_HAVE_SVE
4189  else if constexpr(__is_sve_abi<_Ap>)
4190  {
4191  return __gen_fallback();
4192  }
4193 #endif
4194  else if constexpr (
4195  __is_fixed_size_abi_v<_Ap>
4196  && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4197  || (__is_fixed_size_abi_v<typename _V::abi_type>
4198  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4199  )))
4200  {
4201  // fixed_size -> fixed_size (w/o padding) or scalar
4202 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4203  const __may_alias<_Tp>* const __element_ptr
4204  = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4205  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4206  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4207  { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4208 #else
4209  const auto& __xx = __data(__x);
4210  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4211  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4212  [[maybe_unused]] constexpr size_t __offset
4213  = decltype(__i)::value * _V::size();
4214  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4215  constexpr _SizeConstant<__j + __offset> __k;
4216  return __xx[__k];
4217  });
4218  });
4219 #endif
4220  }
4221  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4222  {
4223  // normally memcpy should work here as well
4224  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4225  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4226  }
4227  else
4228  {
4229  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4230  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4231  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4232  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4233  return __x[__i * _V::size() + __j];
4234  });
4235  else
4236  return _V(__private_init,
4237  __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4238  });
4239  }
4240  }
4241 
4242 // }}}
4243 // split<simd_mask>(simd_mask) {{{
4244 template <typename _V, typename _Ap,
4245  size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4246  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4247  _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4248  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4249  {
4250  if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4251  return {__x};
4252  else if constexpr (_Parts == 1)
4253  return {__proposed::static_simd_cast<_V>(__x)};
4254  else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4255  && __is_avx_abi<_Ap>())
4256  return {_V(__private_init, __lo128(__data(__x))),
4257  _V(__private_init, __hi128(__data(__x)))};
4258  else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4259  {
4260  const bitset __bits = __x.__to_bitset();
4261  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4262  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4263  constexpr size_t __offset = __i * _V::size();
4264  return _V(__bitset_init, (__bits >> __offset).to_ullong());
4265  });
4266  }
4267  else
4268  {
4269  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4270  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4271  constexpr size_t __offset = __i * _V::size();
4272  return _V(__private_init,
4273  [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4274  return __x[__j + __offset];
4275  });
4276  });
4277  }
4278  }
4279 
4280 // }}}
4281 // split<_Sizes...>(simd) {{{
4282 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4283  _GLIBCXX_SIMD_ALWAYS_INLINE
4284  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4285  split(const simd<_Tp, _Ap>& __x)
4286  {
4287  using _SL = _SizeList<_Sizes...>;
4288  using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4289  constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4290  constexpr size_t _N0 = _SL::template _S_at<0>();
4291  using _V = __deduced_simd<_Tp, _N0>;
4292 
4293  auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4294  {
4295  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4296  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4297  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4298  constexpr size_t __offset = _SL::_S_before(__i);
4299  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4300  return __x[__offset + __j];
4301  });
4302  });
4303  };
4304 
4305  if (__x._M_is_constprop())
4306  __gen_fallback();
4307 #if _GLIBCXX_SIMD_HAVE_SVE
4308  else if constexpr (__have_sve)
4309  __gen_fallback();
4310 #endif
4311  else if constexpr (_Np == _N0)
4312  {
4313  static_assert(sizeof...(_Sizes) == 1);
4314  return {simd_cast<_V>(__x)};
4315  }
4316  else if constexpr // split from fixed_size, such that __x::first.size == _N0
4317  (__is_fixed_size_abi_v<
4318  _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4319  {
4320  static_assert(
4321  !__is_fixed_size_abi_v<typename _V::abi_type>,
4322  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4323  "fixed_size_simd "
4324  "when deduced?");
4325  // extract first and recurse (__split_wrapper is needed to deduce a new
4326  // _Sizes pack)
4327  return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4328  __split_wrapper(_SL::template _S_pop_front<1>(),
4329  __data(__x).second));
4330  }
4331  else if constexpr ((!__is_fixed_size_abi_v<simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4332  {
4333  constexpr array<size_t, sizeof...(_Sizes)> __size = {_Sizes...};
4334  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4335  [&](auto __i) constexpr {
4336  constexpr size_t __offset = [&]() {
4337  size_t __r = 0;
4338  for (unsigned __j = 0; __j < __i; ++__j)
4339  __r += __size[__j];
4340  return __r;
4341  }();
4342  return __deduced_simd<_Tp, __size[__i]>(
4343  __private_init,
4344  __extract_part<__offset, _Np, __size[__i]>(__data(__x)));
4345  });
4346  }
4347 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4348  const __may_alias<_Tp>* const __element_ptr
4349  = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4350  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4351  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4352  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4353  constexpr size_t __offset = _SL::_S_before(__i);
4354  constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4355  constexpr size_t __a
4356  = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4357  constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4358  constexpr size_t __alignment = __b == 0 ? __a : __b;
4359  return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4360  });
4361 #else
4362  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4363  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4364  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4365  const auto& __xx = __data(__x);
4366  using _Offset = decltype(_SL::_S_before(__i));
4367  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4368  constexpr _SizeConstant<_Offset::value + __j> __k;
4369  return __xx[__k];
4370  });
4371  });
4372 #endif
4373  }
4374 
4375 // }}}
4376 
4377 // __subscript_in_pack {{{
4378 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4379  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4380  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4381  {
4382  if constexpr (_I < simd_size_v<_Tp, _Ap>)
4383  return __x[_I];
4384  else
4385  return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4386  }
4387 
4388 // }}}
4389 // __store_pack_of_simd {{{
4390 template <typename _Tp, typename _A0, typename... _As>
4391  _GLIBCXX_SIMD_INTRINSIC void
4392  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4393  {
4394  constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4395  __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4396  if constexpr (sizeof...(__xs) > 0)
4397  __store_pack_of_simd(__mem + __n_bytes, __xs...);
4398  }
4399 
4400 // }}}
4401 // concat(simd...) {{{
4402 template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4403  inline _GLIBCXX_SIMD_CONSTEXPR
4404  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4405  concat(const simd<_Tp, _As>&... __xs)
4406  {
4407  constexpr int _Np = (simd_size_v<_Tp, _As> + ...);
4408  using _Abi = simd_abi::deduce_t<_Tp, _Np>;
4409  using _Rp = simd<_Tp, _Abi>;
4410  using _RW = typename _SimdTraits<_Tp, _Abi>::_SimdMember;
4411  if constexpr (sizeof...(__xs) == 1)
4412  return simd_cast<_Rp>(__xs...);
4413  else if ((... && __xs._M_is_constprop()))
4414  return _Rp([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4415  { return __subscript_in_pack<__i>(__xs...); });
4416  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 2)
4417  {
4418  return {__private_init,
4419  __vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(),
4420  [](int __i) {
4421  constexpr int __sizes[2] = {int(simd_size_v<_Tp, _As>)...};
4422  constexpr int __vsizes[2]
4423  = {int(sizeof(__as_vector(__xs)) / sizeof(_Tp))...};
4424  constexpr int __padding0 = __vsizes[0] - __sizes[0];
4425  return __i >= _Np ? -1 : __i < __sizes[0] ? __i : __i + __padding0;
4426  })};
4427  }
4428  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 3)
4429  return [](const auto& __x0, const auto& __x1, const auto& __x2)
4430  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4431  return concat(concat(__x0, __x1), __x2);
4432  }(__xs...);
4433  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) > 3)
4434  return [](const auto& __x0, const auto& __x1, const auto&... __rest)
4435  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4436  return concat(concat(__x0, __x1), concat(__rest...));
4437  }(__xs...);
4438  else
4439  {
4440  _Rp __r{};
4441  __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4442  return __r;
4443  }
4444  }
4445 
4446 // }}}
4447 // concat(array<simd>) {{{
4448 template <typename _Tp, typename _Abi, size_t _Np>
4449  _GLIBCXX_SIMD_ALWAYS_INLINE
4450  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4451  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4452  {
4453  return __call_with_subscripts<_Np>(
4454  __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4455  return concat(__xs...);
4456  });
4457  }
4458 
4459 // }}}
4460 
4461 /// @cond undocumented
4462 // _SmartReference {{{
4463 template <typename _Up, typename _Accessor = _Up,
4464  typename _ValueType = typename _Up::value_type>
4465  class _SmartReference
4466  {
4467  friend _Accessor;
4468  int _M_index;
4469  _Up& _M_obj;
4470 
4471  _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4472  _M_read() const noexcept
4473  {
4474  if constexpr (is_arithmetic_v<_Up>)
4475  return _M_obj;
4476  else
4477  return _M_obj[_M_index];
4478  }
4479 
4480  template <typename _Tp>
4481  _GLIBCXX_SIMD_INTRINSIC constexpr void
4482  _M_write(_Tp&& __x) const
4483  { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4484 
4485  public:
4486  _GLIBCXX_SIMD_INTRINSIC constexpr
4487  _SmartReference(_Up& __o, int __i) noexcept
4488  : _M_index(__i), _M_obj(__o) {}
4489 
4490  using value_type = _ValueType;
4491 
4492  _GLIBCXX_SIMD_INTRINSIC
4493  _SmartReference(const _SmartReference&) = delete;
4494 
4495  _GLIBCXX_SIMD_INTRINSIC constexpr
4496  operator value_type() const noexcept
4497  { return _M_read(); }
4498 
4499  template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4500  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4501  operator=(_Tp&& __x) &&
4502  {
4503  _M_write(static_cast<_Tp&&>(__x));
4504  return {_M_obj, _M_index};
4505  }
4506 
4507 #define _GLIBCXX_SIMD_OP_(__op) \
4508  template <typename _Tp, \
4509  typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4510  typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4511  typename = _ValuePreservingOrInt<_TT, value_type>> \
4512  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4513  operator __op##=(_Tp&& __x) && \
4514  { \
4515  const value_type& __lhs = _M_read(); \
4516  _M_write(__lhs __op __x); \
4517  return {_M_obj, _M_index}; \
4518  }
4519  _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4520  _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4521  _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4522 #undef _GLIBCXX_SIMD_OP_
4523 
4524  template <typename _Tp = void,
4525  typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4526  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4527  operator++() &&
4528  {
4529  value_type __x = _M_read();
4530  _M_write(++__x);
4531  return {_M_obj, _M_index};
4532  }
4533 
4534  template <typename _Tp = void,
4535  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4536  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4537  operator++(int) &&
4538  {
4539  const value_type __r = _M_read();
4540  value_type __x = __r;
4541  _M_write(++__x);
4542  return __r;
4543  }
4544 
4545  template <typename _Tp = void,
4546  typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4547  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4548  operator--() &&
4549  {
4550  value_type __x = _M_read();
4551  _M_write(--__x);
4552  return {_M_obj, _M_index};
4553  }
4554 
4555  template <typename _Tp = void,
4556  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4557  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4558  operator--(int) &&
4559  {
4560  const value_type __r = _M_read();
4561  value_type __x = __r;
4562  _M_write(--__x);
4563  return __r;
4564  }
4565 
4566  _GLIBCXX_SIMD_INTRINSIC friend void
4567  swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4568  conjunction<
4569  is_nothrow_constructible<value_type, _SmartReference&&>,
4570  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4571  {
4572  value_type __tmp = static_cast<_SmartReference&&>(__a);
4573  static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4574  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4575  }
4576 
4577  _GLIBCXX_SIMD_INTRINSIC friend void
4578  swap(value_type& __a, _SmartReference&& __b) noexcept(
4579  conjunction<
4580  is_nothrow_constructible<value_type, value_type&&>,
4581  is_nothrow_assignable<value_type&, value_type&&>,
4582  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4583  {
4584  value_type __tmp(std::move(__a));
4585  __a = static_cast<value_type>(__b);
4586  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4587  }
4588 
4589  _GLIBCXX_SIMD_INTRINSIC friend void
4590  swap(_SmartReference&& __a, value_type& __b) noexcept(
4591  conjunction<
4592  is_nothrow_constructible<value_type, _SmartReference&&>,
4593  is_nothrow_assignable<value_type&, value_type&&>,
4594  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4595  {
4596  value_type __tmp(__a);
4597  static_cast<_SmartReference&&>(__a) = std::move(__b);
4598  __b = std::move(__tmp);
4599  }
4600  };
4601 
4602 // }}}
4603 // __scalar_abi_wrapper {{{
4604 template <int _Bytes>
4605  struct __scalar_abi_wrapper
4606  {
4607  template <typename _Tp> static constexpr size_t _S_full_size = 1;
4608  template <typename _Tp> static constexpr size_t _S_size = 1;
4609  template <typename _Tp> static constexpr size_t _S_is_partial = false;
4610 
4611  template <typename _Tp, typename _Abi = simd_abi::scalar>
4612  static constexpr bool _S_is_valid_v
4613  = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4614  };
4615 
4616 // }}}
4617 // __decay_abi metafunction {{{
4618 template <typename _Tp>
4619  struct __decay_abi { using type = _Tp; };
4620 
4621 template <int _Bytes>
4622  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4623  { using type = simd_abi::scalar; };
4624 
4625 // }}}
4626 // __find_next_valid_abi metafunction {{{1
4627 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4628 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4629 // recursion at 2 elements in the resulting ABI tag. In this case
4630 // type::_S_is_valid_v<_Tp> may be false.
4631 template <template <int> class _Abi, int _Bytes, typename _Tp>
4632  struct __find_next_valid_abi
4633  {
4634  static constexpr auto
4635  _S_choose()
4636  {
4637  constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4638  using _NextAbi = _Abi<_NextBytes>;
4639  if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4640  return _Abi<_Bytes>();
4641  else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4642  && _NextAbi::template _S_is_valid_v<_Tp>)
4643  return _NextAbi();
4644  else
4645  return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4646  }
4647 
4648  using type = decltype(_S_choose());
4649  };
4650 
4651 template <int _Bytes, typename _Tp>
4652  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4653  { using type = simd_abi::scalar; };
4654 
4655 // _AbiList {{{1
4656 template <template <int> class...>
4657  struct _AbiList
4658  {
4659  template <typename, int> static constexpr bool _S_has_valid_abi = false;
4660  template <typename, int> using _FirstValidAbi = void;
4661  template <typename, int> using _BestAbi = void;
4662  };
4663 
4664 template <template <int> class _A0, template <int> class... _Rest>
4665  struct _AbiList<_A0, _Rest...>
4666  {
4667  template <typename _Tp, int _Np>
4668  static constexpr bool _S_has_valid_abi
4669  = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4670  _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4671 
4672  template <typename _Tp, int _Np>
4673  using _FirstValidAbi = conditional_t<
4674  _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4675  typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4676  typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4677 
4678  template <typename _Tp, int _Np>
4679  static constexpr auto
4680  _S_determine_best_abi()
4681  {
4682  static_assert(_Np >= 1);
4683  constexpr int _Bytes = sizeof(_Tp) * _Np;
4684  if constexpr (_Np == 1)
4685  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4686  else
4687  {
4688  constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4689  // _A0<_Bytes> is good if:
4690  // 1. The ABI tag is valid for _Tp
4691  // 2. The storage overhead is no more than padding to fill the next
4692  // power-of-2 number of bytes
4693  if constexpr (_A0<_Bytes>::template _S_is_valid_v<_Tp>
4694  && ((__is_sve_abi<_A0<_Bytes>>() && __have_sve
4695  && (_Np <= __sve_vectorized_size_bytes/sizeof(_Tp)))
4696  || (__fullsize / 2 < _Np))
4697  )
4698  return typename __decay_abi<_A0<_Bytes>>::type{};
4699  else
4700  {
4701  using _Bp =
4702  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4703  if constexpr (_Bp::template _S_is_valid_v<
4704  _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4705  return _Bp{};
4706  else
4707  return
4708  typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4709  }
4710  }
4711  }
4712 
4713  template <typename _Tp, int _Np>
4714  using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4715  };
4716 
4717 // }}}1
4718 
4719 // the following lists all native ABIs, which makes them accessible to
4720 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4721 // matters: Whatever comes first has higher priority.
4722 using _AllNativeAbis = _AbiList<
4723 #if _GLIBCXX_SIMD_HAVE_SVE
4724  simd_abi::_SveAbi,
4725 #endif
4726  simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin, __scalar_abi_wrapper>;
4727 
4728 using _NoSveAllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4729  __scalar_abi_wrapper>;
4730 
4731 // valid _SimdTraits specialization {{{1
4732 template <typename _Tp, typename _Abi>
4733  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4734  : _Abi::template __traits<_Tp> {};
4735 
4736 // __deduce_impl specializations {{{1
4737 // try all native ABIs (including scalar) first
4738 template <typename _Tp, size_t _Np>
4739  struct __deduce_impl<
4740  _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4741  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4742 
4743 template <typename _Tp, size_t _Np>
4744  struct __no_sve_deduce_impl<
4745  _Tp, _Np, enable_if_t<_NoSveAllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4746  { using type = _NoSveAllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4747 
4748 // fall back to fixed_size only if scalar and native ABIs don't match
4749 template <typename _Tp, size_t _Np, typename = void>
4750  struct __deduce_fixed_size_fallback {};
4751 
4752 template <typename _Tp, size_t _Np>
4753  struct __deduce_fixed_size_fallback<_Tp, _Np,
4754  enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4755  { using type = simd_abi::fixed_size<_Np>; };
4756 
4757 template <typename _Tp, size_t _Np, typename>
4758  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4759 
4760 template <typename _Tp, size_t _Np, typename>
4761  struct __no_sve_deduce_impl
4762  : public __deduce_fixed_size_fallback<_Tp, _Np>
4763  {};
4764 
4765 
4766 //}}}1
4767 /// @endcond
4768 
4769 // simd_mask {{{
4770 template <typename _Tp, typename _Abi>
4771  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4772  {
4773  // types, tags, and friends {{{
4774  using _Traits = _SimdTraits<_Tp, _Abi>;
4775  using _MemberType = typename _Traits::_MaskMember;
4776 
4777  // We map all masks with equal element sizeof to a single integer type, the
4778  // one given by __int_for_sizeof_t<_Tp>. This is the approach
4779  // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4780  // template specializations in the implementation classes.
4781  using _Ip = __int_for_sizeof_t<_Tp>;
4782  static constexpr _Ip* _S_type_tag = nullptr;
4783 
4784  friend typename _Traits::_MaskBase;
4785  friend class simd<_Tp, _Abi>; // to construct masks on return
4786  friend typename _Traits::_SimdImpl; // to construct masks on return and
4787  // inspect data on masked operations
4788  public:
4789  using _Impl = typename _Traits::_MaskImpl;
4790  friend _Impl;
4791 
4792  // }}}
4793  // member types {{{
4794  using value_type = bool;
4795  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4796  using simd_type = simd<_Tp, _Abi>;
4797  using abi_type = _Abi;
4798 
4799  // }}}
4800  static constexpr size_t size() // {{{
4801  { return __size_or_zero_v<_Tp, _Abi>; }
4802 
4803  // }}}
4804  // constructors & assignment {{{
4805  simd_mask() = default;
4806  simd_mask(const simd_mask&) = default;
4807  simd_mask(simd_mask&&) = default;
4808  simd_mask& operator=(const simd_mask&) = default;
4809  simd_mask& operator=(simd_mask&&) = default;
4810 
4811  // }}}
4812  // access to internal representation (optional feature) {{{
4813  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4814  simd_mask(typename _Traits::_MaskCastType __init)
4815  : _M_data{__init} {}
4816  // conversions to internal type is done in _MaskBase
4817 
4818  // }}}
4819  // bitset interface (extension to be proposed) {{{
4820  // TS_FEEDBACK:
4821  // Conversion of simd_mask to and from bitset makes it much easier to
4822  // interface with other facilities. I suggest adding `static
4823  // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4824  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4825  __from_bitset(bitset<size()> bs)
4826  { return {__bitset_init, bs}; }
4827 
4828  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4829  __to_bitset() const
4830  { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4831 
4832  // }}}
4833  // explicit broadcast constructor {{{
4834  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4835  simd_mask(value_type __x)
4836  : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4837 
4838  // }}}
4839  // implicit type conversion constructor {{{
4840  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4841  // proposed improvement
4842  template <typename _Up, typename _A2,
4843  typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4844  _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4845  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4846  simd_mask(const simd_mask<_Up, _A2>& __x)
4847  : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4848  #else
4849  // conforming to ISO/IEC 19570:2018
4850  template <typename _Up, typename = enable_if_t<conjunction<
4851  is_same<abi_type, simd_abi::fixed_size<size()>>,
4852  is_same<_Up, _Up>>::value>>
4853  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4854  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4855  : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4856  #endif
4857 
4858  // }}}
4859  // load constructor {{{
4860  template <typename _Flags>
4861  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4862  simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4863  : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4864 
4865  template <typename _Flags>
4866  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4867  simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4868  : _M_data{}
4869  {
4870  _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4871  _Flags::template _S_apply<simd_mask>(__mem));
4872  }
4873 
4874  // }}}
4875  // loads [simd_mask.load] {{{
4876  template <typename _Flags>
4877  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4878  copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4879  { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4880 
4881  // }}}
4882  // stores [simd_mask.store] {{{
4883  template <typename _Flags>
4884  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4885  copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4886  { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4887 
4888  // }}}
4889  // scalar access {{{
4890  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4891  operator[](size_t __i)
4892  {
4893  if (__i >= size())
4894  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4895  return {_M_data, int(__i)};
4896  }
4897 
4898  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4899  operator[](size_t __i) const
4900  {
4901  if (__i >= size())
4902  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4903  if constexpr (__is_scalar_abi<_Abi>())
4904  return _M_data;
4905  else
4906  return static_cast<bool>(_M_data[__i]);
4907  }
4908 
4909  // }}}
4910  // negation {{{
4911  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4912  operator!() const
4913  { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4914 
4915  // }}}
4916  // simd_mask binary operators [simd_mask.binary] {{{
4917  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4918  // simd_mask<int> && simd_mask<uint> needs disambiguation
4919  template <typename _Up, typename _A2,
4920  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4921  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4922  operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4923  {
4924  return {__private_init,
4925  _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4926  }
4927 
4928  template <typename _Up, typename _A2,
4929  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4930  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4931  operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4932  {
4933  return {__private_init,
4934  _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4935  }
4936  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4937 
4938  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4939  operator&&(const simd_mask& __x, const simd_mask& __y)
4940  { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4941 
4942  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4943  operator||(const simd_mask& __x, const simd_mask& __y)
4944  { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4945 
4946  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4947  operator&(const simd_mask& __x, const simd_mask& __y)
4948  { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4949 
4950  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4951  operator|(const simd_mask& __x, const simd_mask& __y)
4952  { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4953 
4954  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4955  operator^(const simd_mask& __x, const simd_mask& __y)
4956  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4957 
4958  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4959  operator&=(simd_mask& __x, const simd_mask& __y)
4960  {
4961  __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4962  return __x;
4963  }
4964 
4965  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4966  operator|=(simd_mask& __x, const simd_mask& __y)
4967  {
4968  __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4969  return __x;
4970  }
4971 
4972  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4973  operator^=(simd_mask& __x, const simd_mask& __y)
4974  {
4975  __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4976  return __x;
4977  }
4978 
4979  // }}}
4980  // simd_mask compares [simd_mask.comparison] {{{
4981  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4982  operator==(const simd_mask& __x, const simd_mask& __y)
4983  { return !operator!=(__x, __y); }
4984 
4985  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4986  operator!=(const simd_mask& __x, const simd_mask& __y)
4987  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4988 
4989  // }}}
4990  // private_init ctor {{{
4991  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4992  simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4993  : _M_data(__init) {}
4994 
4995  // }}}
4996  // private_init generator ctor {{{
4997  template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4998  _GLIBCXX_SIMD_INTRINSIC constexpr
4999  simd_mask(_PrivateInit, _Fp&& __gen)
5000  : _M_data()
5001  {
5002  __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5003  _Impl::_S_set(_M_data, __i, __gen(__i));
5004  });
5005  }
5006 
5007  // }}}
5008  // bitset_init ctor {{{
5009  _GLIBCXX_SIMD_INTRINSIC constexpr
5010  simd_mask(_BitsetInit, bitset<size()> __init)
5011  : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
5012  {}
5013 
5014  // }}}
5015  // __cvt {{{
5016  // TS_FEEDBACK:
5017  // The conversion operator this implements should be a ctor on simd_mask.
5018  // Once you call .__cvt() on a simd_mask it converts conveniently.
5019  // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
5020  struct _CvtProxy
5021  {
5022  template <typename _Up, typename _A2,
5023  typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
5024  _GLIBCXX_SIMD_ALWAYS_INLINE
5025  operator simd_mask<_Up, _A2>() &&
5026  {
5027  using namespace std::experimental::__proposed;
5028  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
5029  }
5030 
5031  const simd_mask<_Tp, _Abi>& _M_data;
5032  };
5033 
5034  _GLIBCXX_SIMD_INTRINSIC _CvtProxy
5035  __cvt() const
5036  { return {*this}; }
5037 
5038  // }}}
5039  // operator?: overloads (suggested extension) {{{
5040  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5041  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
5042  operator?:(const simd_mask& __k, const simd_mask& __where_true,
5043  const simd_mask& __where_false)
5044  {
5045  auto __ret = __where_false;
5046  _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
5047  return __ret;
5048  }
5049 
5050  template <typename _U1, typename _U2,
5051  typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
5052  typename = enable_if_t<conjunction_v<
5053  is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
5054  is_convertible<simd_mask, typename _Rp::mask_type>>>>
5055  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
5056  operator?:(const simd_mask& __k, const _U1& __where_true,
5057  const _U2& __where_false)
5058  {
5059  _Rp __ret = __where_false;
5060  _Rp::_Impl::_S_masked_assign(
5061  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
5062  __data(static_cast<_Rp>(__where_true)));
5063  return __ret;
5064  }
5065 
5066  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
5067  template <typename _Kp, typename _Ak, typename _Up, typename _Au,
5068  typename = enable_if_t<
5069  conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
5070  is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
5071  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
5072  operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
5073  const simd_mask<_Up, _Au>& __where_false)
5074  {
5075  simd_mask __ret = __where_false;
5076  _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
5077  __where_true._M_data);
5078  return __ret;
5079  }
5080  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
5081  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5082 
5083  // }}}
5084  // _M_is_constprop {{{
5085  _GLIBCXX_SIMD_INTRINSIC constexpr bool
5086  _M_is_constprop() const
5087  {
5088  if constexpr (__is_scalar_abi<_Abi>())
5089  return __builtin_constant_p(_M_data);
5090  else
5091  return _M_data._M_is_constprop();
5092  }
5093 
5094  // }}}
5095 
5096  private:
5097  friend const auto& __data<_Tp, abi_type>(const simd_mask&);
5098  friend auto& __data<_Tp, abi_type>(simd_mask&);
5099  alignas(_Traits::_S_mask_align) _MemberType _M_data;
5100  };
5101 
5102 // }}}
5103 
5104 /// @cond undocumented
5105 // __data(simd_mask) {{{
5106 template <typename _Tp, typename _Ap>
5107  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5108  __data(const simd_mask<_Tp, _Ap>& __x)
5109  { return __x._M_data; }
5110 
5111 template <typename _Tp, typename _Ap>
5112  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5113  __data(simd_mask<_Tp, _Ap>& __x)
5114  { return __x._M_data; }
5115 
5116 // }}}
5117 /// @endcond
5118 
5119 // simd_mask reductions [simd_mask.reductions] {{{
5120 template <typename _Tp, typename _Abi>
5121  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5122  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5123  {
5124  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5125  {
5126  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5127  if (!__k[__i])
5128  return false;
5129  return true;
5130  }
5131  else
5132  return _Abi::_MaskImpl::_S_all_of(__k);
5133  }
5134 
5135 template <typename _Tp, typename _Abi>
5136  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5137  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5138  {
5139  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5140  {
5141  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5142  if (__k[__i])
5143  return true;
5144  return false;
5145  }
5146  else
5147  return _Abi::_MaskImpl::_S_any_of(__k);
5148  }
5149 
5150 template <typename _Tp, typename _Abi>
5151  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5152  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5153  {
5154  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5155  {
5156  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5157  if (__k[__i])
5158  return false;
5159  return true;
5160  }
5161  else
5162  return _Abi::_MaskImpl::_S_none_of(__k);
5163  }
5164 
5165 template <typename _Tp, typename _Abi>
5166  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5167  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5168  {
5169  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5170  {
5171  for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
5172  if (__k[__i] != __k[__i - 1])
5173  return true;
5174  return false;
5175  }
5176  else
5177  return _Abi::_MaskImpl::_S_some_of(__k);
5178  }
5179 
5180 template <typename _Tp, typename _Abi>
5181  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5182  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
5183  {
5184  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5185  {
5186  const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
5187  __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5188  return ((__elements != 0) + ...);
5189  });
5190  if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
5191  return __r;
5192  }
5193  return _Abi::_MaskImpl::_S_popcount(__k);
5194  }
5195 
5196 template <typename _Tp, typename _Abi>
5197  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5198  find_first_set(const simd_mask<_Tp, _Abi>& __k)
5199  {
5200  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5201  {
5202  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5203  const size_t _Idx = __call_with_n_evaluations<_Np>(
5204  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5205  return std::min({__indexes...});
5206  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5207  return __k[__i] ? +__i : _Np;
5208  });
5209  if (_Idx >= _Np)
5210  __invoke_ub("find_first_set(empty mask) is UB");
5211  if (__builtin_constant_p(_Idx))
5212  return _Idx;
5213  }
5214  return _Abi::_MaskImpl::_S_find_first_set(__k);
5215  }
5216 
5217 template <typename _Tp, typename _Abi>
5218  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5219  find_last_set(const simd_mask<_Tp, _Abi>& __k)
5220  {
5221  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5222  {
5223  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5224  const int _Idx = __call_with_n_evaluations<_Np>(
5225  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5226  return std::max({__indexes...});
5227  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5228  return __k[__i] ? int(__i) : -1;
5229  });
5230  if (_Idx < 0)
5231  __invoke_ub("find_first_set(empty mask) is UB");
5232  if (__builtin_constant_p(_Idx))
5233  return _Idx;
5234  }
5235  return _Abi::_MaskImpl::_S_find_last_set(__k);
5236  }
5237 
5238 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5239 all_of(_ExactBool __x) noexcept
5240 { return __x; }
5241 
5242 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5243 any_of(_ExactBool __x) noexcept
5244 { return __x; }
5245 
5246 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5247 none_of(_ExactBool __x) noexcept
5248 { return !__x; }
5249 
5250 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5251 some_of(_ExactBool) noexcept
5252 { return false; }
5253 
5254 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5255 popcount(_ExactBool __x) noexcept
5256 { return __x; }
5257 
5258 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5259 find_first_set(_ExactBool)
5260 { return 0; }
5261 
5262 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5263 find_last_set(_ExactBool)
5264 { return 0; }
5265 
5266 // }}}
5267 
5268 /// @cond undocumented
5269 // _SimdIntOperators{{{1
5270 template <typename _V, typename _Tp, typename _Abi, bool>
5271  class _SimdIntOperators {};
5272 
5273 template <typename _V, typename _Tp, typename _Abi>
5274  class _SimdIntOperators<_V, _Tp, _Abi, true>
5275  {
5276  using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5277 
5278  _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5279  __derived() const
5280  { return *static_cast<const _V*>(this); }
5281 
5282  template <typename _Up>
5283  _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5284  _S_make_derived(_Up&& __d)
5285  { return {__private_init, static_cast<_Up&&>(__d)}; }
5286 
5287  public:
5288  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5289  _V&
5290  operator%=(_V& __lhs, const _V& __x)
5291  { return __lhs = __lhs % __x; }
5292 
5293  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5294  _V&
5295  operator&=(_V& __lhs, const _V& __x)
5296  { return __lhs = __lhs & __x; }
5297 
5298  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5299  _V&
5300  operator|=(_V& __lhs, const _V& __x)
5301  { return __lhs = __lhs | __x; }
5302 
5303  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5304  _V&
5305  operator^=(_V& __lhs, const _V& __x)
5306  { return __lhs = __lhs ^ __x; }
5307 
5308  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5309  _V&
5310  operator<<=(_V& __lhs, const _V& __x)
5311  { return __lhs = __lhs << __x; }
5312 
5313  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5314  _V&
5315  operator>>=(_V& __lhs, const _V& __x)
5316  { return __lhs = __lhs >> __x; }
5317 
5318  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5319  _V&
5320  operator<<=(_V& __lhs, int __x)
5321  { return __lhs = __lhs << __x; }
5322 
5323  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5324  _V&
5325  operator>>=(_V& __lhs, int __x)
5326  { return __lhs = __lhs >> __x; }
5327 
5328  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5329  _V
5330  operator%(const _V& __x, const _V& __y)
5331  {
5332  return _SimdIntOperators::_S_make_derived(
5333  _Impl::_S_modulus(__data(__x), __data(__y)));
5334  }
5335 
5336  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5337  _V
5338  operator&(const _V& __x, const _V& __y)
5339  {
5340  return _SimdIntOperators::_S_make_derived(
5341  _Impl::_S_bit_and(__data(__x), __data(__y)));
5342  }
5343 
5344  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5345  _V
5346  operator|(const _V& __x, const _V& __y)
5347  {
5348  return _SimdIntOperators::_S_make_derived(
5349  _Impl::_S_bit_or(__data(__x), __data(__y)));
5350  }
5351 
5352  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5353  _V
5354  operator^(const _V& __x, const _V& __y)
5355  {
5356  return _SimdIntOperators::_S_make_derived(
5357  _Impl::_S_bit_xor(__data(__x), __data(__y)));
5358  }
5359 
5360  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5361  _V
5362  operator<<(const _V& __x, const _V& __y)
5363  {
5364  return _SimdIntOperators::_S_make_derived(
5365  _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5366  }
5367 
5368  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5369  _V
5370  operator>>(const _V& __x, const _V& __y)
5371  {
5372  return _SimdIntOperators::_S_make_derived(
5373  _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5374  }
5375 
5376  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5377  _V
5378  operator<<(const _V& __x, int __y)
5379  {
5380  if (__y < 0)
5381  __invoke_ub("The behavior is undefined if the right operand of a "
5382  "shift operation is negative. [expr.shift]\nA shift by "
5383  "%d was requested",
5384  __y);
5385  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5386  __invoke_ub(
5387  "The behavior is undefined if the right operand of a "
5388  "shift operation is greater than or equal to the width of the "
5389  "promoted left operand. [expr.shift]\nA shift by %d was requested",
5390  __y);
5391  return _SimdIntOperators::_S_make_derived(
5392  _Impl::_S_bit_shift_left(__data(__x), __y));
5393  }
5394 
5395  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5396  _V
5397  operator>>(const _V& __x, int __y)
5398  {
5399  if (__y < 0)
5400  __invoke_ub(
5401  "The behavior is undefined if the right operand of a shift "
5402  "operation is negative. [expr.shift]\nA shift by %d was requested",
5403  __y);
5404  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5405  __invoke_ub(
5406  "The behavior is undefined if the right operand of a shift "
5407  "operation is greater than or equal to the width of the promoted "
5408  "left operand. [expr.shift]\nA shift by %d was requested",
5409  __y);
5410  return _SimdIntOperators::_S_make_derived(
5411  _Impl::_S_bit_shift_right(__data(__x), __y));
5412  }
5413 
5414  // unary operators (for integral _Tp)
5415  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5416  _V
5417  operator~() const
5418  { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5419  };
5420 
5421 //}}}1
5422 /// @endcond
5423 
5424 // simd {{{
5425 template <typename _Tp, typename _Abi>
5426  class simd : public _SimdIntOperators<
5427  simd<_Tp, _Abi>, _Tp, _Abi,
5428  conjunction<is_integral<_Tp>,
5429  typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5430  public _SimdTraits<_Tp, _Abi>::_SimdBase
5431  {
5432  using _Traits = _SimdTraits<_Tp, _Abi>;
5433  using _MemberType = typename _Traits::_SimdMember;
5434  using _CastType = typename _Traits::_SimdCastType;
5435  static constexpr _Tp* _S_type_tag = nullptr;
5436  friend typename _Traits::_SimdBase;
5437 
5438  public:
5439  using _Impl = typename _Traits::_SimdImpl;
5440  friend _Impl;
5441  friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5442 
5443  using value_type = _Tp;
5444  using reference = _SmartReference<_MemberType, _Impl, value_type>;
5445  using mask_type = simd_mask<_Tp, _Abi>;
5446  using abi_type = _Abi;
5447 
5448  static constexpr size_t size()
5449  { return __size_or_zero_v<_Tp, _Abi>; }
5450 
5451  _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5452  _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5453  _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5454  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5455  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5456 
5457  // implicit broadcast constructor
5458  template <typename _Up,
5459  typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5460  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5461  simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5462  : _M_data(
5463  _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5464  {}
5465 
5466  // implicit type conversion constructor (convert from fixed_size to
5467  // fixed_size)
5468  template <typename _Up>
5469  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5470  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5471  enable_if_t<
5472  conjunction<
5473  is_same<simd_abi::fixed_size<size()>, abi_type>,
5474  negation<__is_narrowing_conversion<_Up, value_type>>,
5475  __converts_to_higher_integer_rank<_Up, value_type>>::value,
5476  void*> = nullptr)
5477  : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5478 
5479  // explicit type conversion constructor
5480 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5481  template <typename _Up, typename _A2,
5482  typename = decltype(static_simd_cast<simd>(
5483  declval<const simd<_Up, _A2>&>()))>
5484  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5485  simd(const simd<_Up, _A2>& __x)
5486  : simd(static_simd_cast<simd>(__x)) {}
5487 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5488 
5489  // generator constructor
5490  template <typename _Fp>
5491  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5492  simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5493  declval<_SizeConstant<0>&>())),
5494  value_type>* = nullptr)
5495  : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5496 
5497  // load constructor
5498  template <typename _Up, typename _Flags>
5499  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5500  simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5501  : _M_data(
5502  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5503  {}
5504 
5505  // loads [simd.load]
5506  template <typename _Up, typename _Flags>
5507  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5508  copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5509  {
5510  _M_data = static_cast<decltype(_M_data)>(
5511  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5512  }
5513 
5514  // stores [simd.store]
5515  template <typename _Up, typename _Flags>
5516  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5517  copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5518  {
5519  _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5520  _S_type_tag);
5521  }
5522 
5523  // scalar access
5524  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5525  operator[](size_t __i)
5526  { return {_M_data, int(__i)}; }
5527 
5528  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5529  operator[]([[maybe_unused]] size_t __i) const
5530  {
5531  if constexpr (__is_scalar_abi<_Abi>())
5532  {
5533  _GLIBCXX_DEBUG_ASSERT(__i == 0);
5534  return _M_data;
5535  }
5536  else
5537  return _M_data[__i];
5538  }
5539 
5540  // increment and decrement:
5541  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5542  operator++()
5543  {
5544  _Impl::_S_increment(_M_data);
5545  return *this;
5546  }
5547 
5548  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5549  operator++(int)
5550  {
5551  simd __r = *this;
5552  _Impl::_S_increment(_M_data);
5553  return __r;
5554  }
5555 
5556  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5557  operator--()
5558  {
5559  _Impl::_S_decrement(_M_data);
5560  return *this;
5561  }
5562 
5563  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5564  operator--(int)
5565  {
5566  simd __r = *this;
5567  _Impl::_S_decrement(_M_data);
5568  return __r;
5569  }
5570 
5571  // unary operators (for any _Tp)
5572  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5573  operator!() const
5574  { return {__private_init, _Impl::_S_negate(_M_data)}; }
5575 
5576  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5577  operator+() const
5578  { return *this; }
5579 
5580  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5581  operator-() const
5582  { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5583 
5584  // access to internal representation (suggested extension)
5585  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5586  simd(_CastType __init) : _M_data(__init) {}
5587 
5588  // compound assignment [simd.cassign]
5589  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5590  operator+=(simd& __lhs, const simd& __x)
5591  { return __lhs = __lhs + __x; }
5592 
5593  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5594  operator-=(simd& __lhs, const simd& __x)
5595  { return __lhs = __lhs - __x; }
5596 
5597  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5598  operator*=(simd& __lhs, const simd& __x)
5599  { return __lhs = __lhs * __x; }
5600 
5601  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5602  operator/=(simd& __lhs, const simd& __x)
5603  { return __lhs = __lhs / __x; }
5604 
5605  // binary operators [simd.binary]
5606  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5607  operator+(const simd& __x, const simd& __y)
5608  { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5609 
5610  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5611  operator-(const simd& __x, const simd& __y)
5612  { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5613 
5614  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5615  operator*(const simd& __x, const simd& __y)
5616  { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5617 
5618  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5619  operator/(const simd& __x, const simd& __y)
5620  { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5621 
5622  // compares [simd.comparison]
5623  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5624  operator==(const simd& __x, const simd& __y)
5625  { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5626 
5627  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5628  operator!=(const simd& __x, const simd& __y)
5629  {
5630  return simd::_S_make_mask(
5631  _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5632  }
5633 
5634  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5635  operator<(const simd& __x, const simd& __y)
5636  { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5637 
5638  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5639  operator<=(const simd& __x, const simd& __y)
5640  {
5641  return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5642  }
5643 
5644  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5645  operator>(const simd& __x, const simd& __y)
5646  { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5647 
5648  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5649  operator>=(const simd& __x, const simd& __y)
5650  {
5651  return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5652  }
5653 
5654  // operator?: overloads (suggested extension) {{{
5655 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5656  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5657  operator?:(const mask_type& __k, const simd& __where_true,
5658  const simd& __where_false)
5659  {
5660  auto __ret = __where_false;
5661  _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5662  return __ret;
5663  }
5664 
5665 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5666  // }}}
5667 
5668  // "private" because of the first arguments's namespace
5669  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5670  simd(_PrivateInit, const _MemberType& __init)
5671  : _M_data(__init) {}
5672 
5673  // "private" because of the first arguments's namespace
5674  _GLIBCXX_SIMD_INTRINSIC
5675  simd(_BitsetInit, bitset<size()> __init) : _M_data()
5676  { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5677 
5678  _GLIBCXX_SIMD_INTRINSIC constexpr bool
5679  _M_is_constprop() const
5680  {
5681  if constexpr (__is_scalar_abi<_Abi>())
5682  return __builtin_constant_p(_M_data);
5683  else
5684  return _M_data._M_is_constprop();
5685  }
5686 
5687  private:
5688  _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5689  _S_make_mask(typename mask_type::_MemberType __k)
5690  { return {__private_init, __k}; }
5691 
5692  friend const auto& __data<value_type, abi_type>(const simd&);
5693  friend auto& __data<value_type, abi_type>(simd&);
5694  alignas(_Traits::_S_simd_align) _MemberType _M_data;
5695  };
5696 
5697 // }}}
5698 /// @cond undocumented
5699 // __data {{{
5700 template <typename _Tp, typename _Ap>
5701  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5702  __data(const simd<_Tp, _Ap>& __x)
5703  { return __x._M_data; }
5704 
5705 template <typename _Tp, typename _Ap>
5706  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5707  __data(simd<_Tp, _Ap>& __x)
5708  { return __x._M_data; }
5709 
5710 // }}}
5711 namespace __float_bitwise_operators { //{{{
5712 template <typename _Tp, typename _Ap>
5713  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5714  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5715  { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5716 
5717 template <typename _Tp, typename _Ap>
5718  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5719  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5720  { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5721 
5722 template <typename _Tp, typename _Ap>
5723  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5724  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5725  { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5726 
5727 template <typename _Tp, typename _Ap>
5728  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5729  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5730  operator~(const simd<_Tp, _Ap>& __a)
5731  { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5732 } // namespace __float_bitwise_operators }}}
5733 /// @endcond
5734 
5735 /// @}
5736 _GLIBCXX_SIMD_END_NAMESPACE
5737 
5738 #endif // __cplusplus >= 201703L
5739 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5740 
5741 // vim: foldmethod=marker foldmarker={{{,}}}
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition: chrono.h:779
constexpr bool operator<=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition: chrono.h:855
constexpr bool operator>=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition: chrono.h:869
constexpr bool operator<(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition: chrono.h:822
constexpr bool operator>(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition: chrono.h:862
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition: complex:400
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition: complex:430
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition: complex:370
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition: complex:340
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition: type_traits:1718
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:2061
__bool_constant< true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:111
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2700
typename remove_pointer< _Tp >::type remove_pointer_t
Alias template for remove_pointer.
Definition: type_traits:2149
__bool_constant< false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:114
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition: type_traits:2696
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition: tuple:2775
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition: type_traits:2470
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:126
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition: tuple:2638
_Tp * end(valarray< _Tp > &__va) noexcept
Return an iterator pointing to one past the last element of the valarray.
Definition: valarray:1249
_Tp * begin(valarray< _Tp > &__va) noexcept
Return an iterator pointing to the first element of the valarray.
Definition: valarray:1227
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:257
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:233
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:287
void void_t
A metafunction that always yields void, used for detecting valid types.
ISO C++ entities toplevel namespace is std.
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1597
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1557
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1687
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition: utility.h:186
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1567
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:262
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1577
constexpr auto data(_Container &__cont) noexcept(noexcept(__cont.data())) -> decltype(__cont.data())
Return the data pointer of a container.
Definition: range_access.h:312