libstdc++
simd.h
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <functional>
39 #include <iosfwd>
40 #include <utility>
41 
42 #if _GLIBCXX_SIMD_X86INTRIN
43 #include <x86intrin.h>
44 #elif _GLIBCXX_SIMD_HAVE_NEON
45 #include <arm_neon.h>
46 #endif
47 
48 /* There are several closely related types, with the following naming
49  * convention:
50  * _Tp: vectorizable (arithmetic) type (or any type)
51  * _TV: __vector_type_t<_Tp, _Np>
52  * _TW: _SimdWrapper<_Tp, _Np>
53  * _TI: __intrinsic_type_t<_Tp, _Np>
54  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
55  * If one additional type is needed use _U instead of _T.
56  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
57  *
58  * More naming conventions:
59  * _Ap or _Abi: An ABI tag from the simd_abi namespace
60  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
61  * _IV, _IW as for _TV, _TW
62  * _Np: number of elements (not bytes)
63  * _Bytes: number of bytes
64  *
65  * Variable names:
66  * __k: mask object (vector- or bitmask)
67  */
68 _GLIBCXX_SIMD_BEGIN_NAMESPACE
69 
70 #if !_GLIBCXX_SIMD_X86INTRIN
71 using __m128 [[__gnu__::__vector_size__(16)]] = float;
72 using __m128d [[__gnu__::__vector_size__(16)]] = double;
73 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
74 using __m256 [[__gnu__::__vector_size__(32)]] = float;
75 using __m256d [[__gnu__::__vector_size__(32)]] = double;
76 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
77 using __m512 [[__gnu__::__vector_size__(64)]] = float;
78 using __m512d [[__gnu__::__vector_size__(64)]] = double;
79 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
80 #endif
81 
82 namespace simd_abi {
83 // simd_abi forward declarations {{{
84 // implementation details:
85 struct _Scalar;
86 
87 template <int _Np>
88  struct _Fixed;
89 
90 // There are two major ABIs that appear on different architectures.
91 // Both have non-boolean values packed into an N Byte register
92 // -> #elements = N / sizeof(T)
93 // Masks differ:
94 // 1. Use value vector registers for masks (all 0 or all 1)
95 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
96 // value vector
97 //
98 // Both can be partially used, masking off the rest when doing horizontal
99 // operations or operations that can trap (e.g. FP_INVALID or integer division
100 // by 0). This is encoded as the number of used bytes.
101 template <int _UsedBytes>
102  struct _VecBuiltin;
103 
104 template <int _UsedBytes>
105  struct _VecBltnBtmsk;
106 
107 template <typename _Tp, int _Np>
108  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
109 
110 template <int _UsedBytes = 16>
111  using _Sse = _VecBuiltin<_UsedBytes>;
112 
113 template <int _UsedBytes = 32>
114  using _Avx = _VecBuiltin<_UsedBytes>;
115 
116 template <int _UsedBytes = 64>
117  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
118 
119 template <int _UsedBytes = 16>
120  using _Neon = _VecBuiltin<_UsedBytes>;
121 
122 // implementation-defined:
123 using __sse = _Sse<>;
124 using __avx = _Avx<>;
125 using __avx512 = _Avx512<>;
126 using __neon = _Neon<>;
127 using __neon128 = _Neon<16>;
128 using __neon64 = _Neon<8>;
129 
130 // standard:
131 template <typename _Tp, size_t _Np, typename...>
132  struct deduce;
133 
134 template <int _Np>
135  using fixed_size = _Fixed<_Np>;
136 
137 using scalar = _Scalar;
138 
139 // }}}
140 } // namespace simd_abi
141 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
142 template <typename _Tp>
143  struct is_simd;
144 
145 template <typename _Tp>
146  struct is_simd_mask;
147 
148 template <typename _Tp, typename _Abi>
149  class simd;
150 
151 template <typename _Tp, typename _Abi>
152  class simd_mask;
153 
154 template <typename _Tp, typename _Abi>
155  struct simd_size;
156 
157 // }}}
158 // load/store flags {{{
159 struct element_aligned_tag
160 {
161  template <typename _Tp, typename _Up = typename _Tp::value_type>
162  static constexpr size_t _S_alignment = alignof(_Up);
163 
164  template <typename _Tp, typename _Up>
165  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
166  _S_apply(_Up* __ptr)
167  { return __ptr; }
168 };
169 
170 struct vector_aligned_tag
171 {
172  template <typename _Tp, typename _Up = typename _Tp::value_type>
173  static constexpr size_t _S_alignment
174  = std::__bit_ceil(sizeof(_Up) * _Tp::size());
175 
176  template <typename _Tp, typename _Up>
177  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
178  _S_apply(_Up* __ptr)
179  {
180  return static_cast<_Up*>(
181  __builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>));
182  }
183 };
184 
185 template <size_t _Np> struct overaligned_tag
186 {
187  template <typename _Tp, typename _Up = typename _Tp::value_type>
188  static constexpr size_t _S_alignment = _Np;
189 
190  template <typename _Tp, typename _Up>
191  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
192  _S_apply(_Up* __ptr)
193  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
194 };
195 
196 inline constexpr element_aligned_tag element_aligned = {};
197 
198 inline constexpr vector_aligned_tag vector_aligned = {};
199 
200 template <size_t _Np>
201  inline constexpr overaligned_tag<_Np> overaligned = {};
202 
203 // }}}
204 template <size_t _Xp>
205  using _SizeConstant = integral_constant<size_t, _Xp>;
206 
207 namespace __detail
208 {
209  struct _Minimum
210  {
211  template <typename _Tp>
212  _GLIBCXX_SIMD_INTRINSIC constexpr
213  _Tp
214  operator()(_Tp __a, _Tp __b) const
215  {
216  using std::min;
217  return min(__a, __b);
218  }
219  };
220 
221  struct _Maximum
222  {
223  template <typename _Tp>
224  _GLIBCXX_SIMD_INTRINSIC constexpr
225  _Tp
226  operator()(_Tp __a, _Tp __b) const
227  {
228  using std::max;
229  return max(__a, __b);
230  }
231  };
232 } // namespace __detail
233 
234 // unrolled/pack execution helpers
235 // __execute_n_times{{{
236 template <typename _Fp, size_t... _I>
237  _GLIBCXX_SIMD_INTRINSIC constexpr void
238  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
239  { ((void)__f(_SizeConstant<_I>()), ...); }
240 
241 template <typename _Fp>
242  _GLIBCXX_SIMD_INTRINSIC constexpr void
243  __execute_on_index_sequence(_Fp&&, index_sequence<>)
244  { }
245 
246 template <size_t _Np, typename _Fp>
247  _GLIBCXX_SIMD_INTRINSIC constexpr void
248  __execute_n_times(_Fp&& __f)
249  {
250  __execute_on_index_sequence(static_cast<_Fp&&>(__f),
251  make_index_sequence<_Np>{});
252  }
253 
254 // }}}
255 // __generate_from_n_evaluations{{{
256 template <typename _R, typename _Fp, size_t... _I>
257  _GLIBCXX_SIMD_INTRINSIC constexpr _R
258  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
259  { return _R{__f(_SizeConstant<_I>())...}; }
260 
261 template <size_t _Np, typename _R, typename _Fp>
262  _GLIBCXX_SIMD_INTRINSIC constexpr _R
263  __generate_from_n_evaluations(_Fp&& __f)
264  {
265  return __execute_on_index_sequence_with_return<_R>(
266  static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
267  }
268 
269 // }}}
270 // __call_with_n_evaluations{{{
271 template <size_t... _I, typename _F0, typename _FArgs>
272  _GLIBCXX_SIMD_INTRINSIC constexpr auto
273  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
274  { return __f0(__fargs(_SizeConstant<_I>())...); }
275 
276 template <size_t _Np, typename _F0, typename _FArgs>
277  _GLIBCXX_SIMD_INTRINSIC constexpr auto
278  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
279  {
280  return __call_with_n_evaluations(make_index_sequence<_Np>{},
281  static_cast<_F0&&>(__f0),
282  static_cast<_FArgs&&>(__fargs));
283  }
284 
285 // }}}
286 // __call_with_subscripts{{{
287 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
288  _GLIBCXX_SIMD_INTRINSIC constexpr auto
289  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
290  { return __fun(__x[_First + _It]...); }
291 
292 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
293  _GLIBCXX_SIMD_INTRINSIC constexpr auto
294  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
295  {
296  return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
297  make_index_sequence<_Np>(),
298  static_cast<_Fp&&>(__fun));
299  }
300 
301 // }}}
302 
303 // vvv ---- type traits ---- vvv
304 // integer type aliases{{{
305 using _UChar = unsigned char;
306 using _SChar = signed char;
307 using _UShort = unsigned short;
308 using _UInt = unsigned int;
309 using _ULong = unsigned long;
310 using _ULLong = unsigned long long;
311 using _LLong = long long;
312 
313 //}}}
314 // __first_of_pack{{{
315 template <typename _T0, typename...>
316  struct __first_of_pack
317  { using type = _T0; };
318 
319 template <typename... _Ts>
320  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
321 
322 //}}}
323 // __value_type_or_identity_t {{{
324 template <typename _Tp>
325  typename _Tp::value_type
326  __value_type_or_identity_impl(int);
327 
328 template <typename _Tp>
329  _Tp
330  __value_type_or_identity_impl(float);
331 
332 template <typename _Tp>
333  using __value_type_or_identity_t
334  = decltype(__value_type_or_identity_impl<_Tp>(int()));
335 
336 // }}}
337 // __is_vectorizable {{{
338 template <typename _Tp>
339  struct __is_vectorizable : public is_arithmetic<_Tp> {};
340 
341 template <>
342  struct __is_vectorizable<bool> : public false_type {};
343 
344 template <typename _Tp>
345  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
346 
347 // Deduces to a vectorizable type
348 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
349  using _Vectorizable = _Tp;
350 
351 // }}}
352 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
353 template <typename _Ptr, typename _ValueType>
354  struct __is_possible_loadstore_conversion
355  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
356 
357 template <>
358  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
359 
360 // Deduces to a type allowed for load/store with the given value type.
361 template <typename _Ptr, typename _ValueType,
362  typename = enable_if_t<
363  __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
364  using _LoadStorePtr = _Ptr;
365 
366 // }}}
367 // __is_bitmask{{{
368 template <typename _Tp, typename = void_t<>>
369  struct __is_bitmask : false_type {};
370 
371 template <typename _Tp>
372  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
373 
374 // the __mmaskXX case:
375 template <typename _Tp>
376  struct __is_bitmask<_Tp,
377  void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
378  : true_type {};
379 
380 // }}}
381 // __int_for_sizeof{{{
382 #pragma GCC diagnostic push
383 #pragma GCC diagnostic ignored "-Wpedantic"
384 template <size_t _Bytes>
385  constexpr auto
386  __int_for_sizeof()
387  {
388  if constexpr (_Bytes == sizeof(int))
389  return int();
390  #ifdef __clang__
391  else if constexpr (_Bytes == sizeof(char))
392  return char();
393  #else
394  else if constexpr (_Bytes == sizeof(_SChar))
395  return _SChar();
396  #endif
397  else if constexpr (_Bytes == sizeof(short))
398  return short();
399  #ifndef __clang__
400  else if constexpr (_Bytes == sizeof(long))
401  return long();
402  #endif
403  else if constexpr (_Bytes == sizeof(_LLong))
404  return _LLong();
405  #ifdef __SIZEOF_INT128__
406  else if constexpr (_Bytes == sizeof(__int128))
407  return __int128();
408  #endif // __SIZEOF_INT128__
409  else if constexpr (_Bytes % sizeof(int) == 0)
410  {
411  constexpr size_t _Np = _Bytes / sizeof(int);
412  struct _Ip
413  {
414  int _M_data[_Np];
415 
416  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
417  operator&(_Ip __rhs) const
418  {
419  return __generate_from_n_evaluations<_Np, _Ip>(
420  [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
421  }
422 
423  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
424  operator|(_Ip __rhs) const
425  {
426  return __generate_from_n_evaluations<_Np, _Ip>(
427  [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; });
428  }
429 
430  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
431  operator^(_Ip __rhs) const
432  {
433  return __generate_from_n_evaluations<_Np, _Ip>(
434  [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
435  }
436 
437  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
438  operator~() const
439  {
440  return __generate_from_n_evaluations<_Np, _Ip>(
441  [&](auto __i) { return ~_M_data[__i]; });
442  }
443  };
444  return _Ip{};
445  }
446  else
447  static_assert(_Bytes != _Bytes, "this should be unreachable");
448  }
449 #pragma GCC diagnostic pop
450 
451 template <typename _Tp>
452  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
453 
454 template <size_t _Np>
455  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
456 
457 // }}}
458 // __is_fixed_size_abi{{{
459 template <typename _Tp>
460  struct __is_fixed_size_abi : false_type {};
461 
462 template <int _Np>
463  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
464 
465 template <typename _Tp>
466  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
467 
468 // }}}
469 // constexpr feature detection{{{
470 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
471 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
472 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
473 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
474 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
475 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
476 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
477 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
478 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
479 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
480 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
481 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
482 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
483 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
484 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
485 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
486 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
487 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
488 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
489 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
490 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
491 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
492 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
493 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
494 
495 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
496 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
497 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
498 constexpr inline bool __support_neon_float =
499 #if defined __GCC_IEC_559
500  __GCC_IEC_559 == 0;
501 #elif defined __FAST_MATH__
502  true;
503 #else
504  false;
505 #endif
506 
507 #ifdef _ARCH_PWR10
508 constexpr inline bool __have_power10vec = true;
509 #else
510 constexpr inline bool __have_power10vec = false;
511 #endif
512 #ifdef __POWER9_VECTOR__
513 constexpr inline bool __have_power9vec = true;
514 #else
515 constexpr inline bool __have_power9vec = false;
516 #endif
517 #if defined __POWER8_VECTOR__
518 constexpr inline bool __have_power8vec = true;
519 #else
520 constexpr inline bool __have_power8vec = __have_power9vec;
521 #endif
522 #if defined __VSX__
523 constexpr inline bool __have_power_vsx = true;
524 #else
525 constexpr inline bool __have_power_vsx = __have_power8vec;
526 #endif
527 #if defined __ALTIVEC__
528 constexpr inline bool __have_power_vmx = true;
529 #else
530 constexpr inline bool __have_power_vmx = __have_power_vsx;
531 #endif
532 
533 // }}}
534 // __is_scalar_abi {{{
535 template <typename _Abi>
536  constexpr bool
537  __is_scalar_abi()
538  { return is_same_v<simd_abi::scalar, _Abi>; }
539 
540 // }}}
541 // __abi_bytes_v {{{
542 template <template <int> class _Abi, int _Bytes>
543  constexpr int
544  __abi_bytes_impl(_Abi<_Bytes>*)
545  { return _Bytes; }
546 
547 template <typename _Tp>
548  constexpr int
549  __abi_bytes_impl(_Tp*)
550  { return -1; }
551 
552 template <typename _Abi>
553  inline constexpr int __abi_bytes_v
554  = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
555 
556 // }}}
557 // __is_builtin_bitmask_abi {{{
558 template <typename _Abi>
559  constexpr bool
560  __is_builtin_bitmask_abi()
561  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
562 
563 // }}}
564 // __is_sse_abi {{{
565 template <typename _Abi>
566  constexpr bool
567  __is_sse_abi()
568  {
569  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
570  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
571  }
572 
573 // }}}
574 // __is_avx_abi {{{
575 template <typename _Abi>
576  constexpr bool
577  __is_avx_abi()
578  {
579  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
580  return _Bytes > 16 && _Bytes <= 32
581  && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
582  }
583 
584 // }}}
585 // __is_avx512_abi {{{
586 template <typename _Abi>
587  constexpr bool
588  __is_avx512_abi()
589  {
590  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
591  return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
592  }
593 
594 // }}}
595 // __is_neon_abi {{{
596 template <typename _Abi>
597  constexpr bool
598  __is_neon_abi()
599  {
600  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
601  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
602  }
603 
604 // }}}
605 // __make_dependent_t {{{
606 template <typename, typename _Up>
607  struct __make_dependent
608  { using type = _Up; };
609 
610 template <typename _Tp, typename _Up>
611  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
612 
613 // }}}
614 // ^^^ ---- type traits ---- ^^^
615 
616 // __invoke_ub{{{
617 template <typename... _Args>
618  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
619  __invoke_ub([[maybe_unused]] const char* __msg,
620  [[maybe_unused]] const _Args&... __args)
621  {
622 #ifdef _GLIBCXX_DEBUG_UB
623  __builtin_fprintf(stderr, __msg, __args...);
624  __builtin_trap();
625 #else
626  __builtin_unreachable();
627 #endif
628  }
629 
630 // }}}
631 // __assert_unreachable{{{
632 template <typename _Tp>
633  struct __assert_unreachable
634  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
635 
636 // }}}
637 // __size_or_zero_v {{{
638 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
639  constexpr size_t
640  __size_or_zero_dispatch(int)
641  { return _Np; }
642 
643 template <typename _Tp, typename _Ap>
644  constexpr size_t
645  __size_or_zero_dispatch(float)
646  { return 0; }
647 
648 template <typename _Tp, typename _Ap>
649  inline constexpr size_t __size_or_zero_v
650  = __size_or_zero_dispatch<_Tp, _Ap>(0);
651 
652 // }}}
653 // __div_roundup {{{
654 inline constexpr size_t
655 __div_roundup(size_t __a, size_t __b)
656 { return (__a + __b - 1) / __b; }
657 
658 // }}}
659 // _ExactBool{{{
660 class _ExactBool
661 {
662  const bool _M_data;
663 
664 public:
665  _GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {}
666 
667  _ExactBool(int) = delete;
668 
669  _GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; }
670 };
671 
672 // }}}
673 // __may_alias{{{
674 /**@internal
675  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
676  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
677  * that support it).
678  */
679 template <typename _Tp>
680  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
681 
682 // }}}
683 // _UnsupportedBase {{{
684 // simd and simd_mask base for unsupported <_Tp, _Abi>
685 struct _UnsupportedBase
686 {
687  _UnsupportedBase() = delete;
688  _UnsupportedBase(const _UnsupportedBase&) = delete;
689  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
690  ~_UnsupportedBase() = delete;
691 };
692 
693 // }}}
694 // _InvalidTraits {{{
695 /**
696  * @internal
697  * Defines the implementation of __a given <_Tp, _Abi>.
698  *
699  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
700  * possible. Static assertions in the type definition do not suffice. It is
701  * important that SFINAE works.
702  */
703 struct _InvalidTraits
704 {
705  using _IsValid = false_type;
706  using _SimdBase = _UnsupportedBase;
707  using _MaskBase = _UnsupportedBase;
708 
709  static constexpr size_t _S_full_size = 0;
710  static constexpr bool _S_is_partial = false;
711 
712  static constexpr size_t _S_simd_align = 1;
713  struct _SimdImpl;
714  struct _SimdMember {};
715  struct _SimdCastType;
716 
717  static constexpr size_t _S_mask_align = 1;
718  struct _MaskImpl;
719  struct _MaskMember {};
720  struct _MaskCastType;
721 };
722 
723 // }}}
724 // _SimdTraits {{{
725 template <typename _Tp, typename _Abi, typename = void_t<>>
726  struct _SimdTraits : _InvalidTraits {};
727 
728 // }}}
729 // __private_init, __bitset_init{{{
730 /**
731  * @internal
732  * Tag used for private init constructor of simd and simd_mask
733  */
734 inline constexpr struct _PrivateInit {} __private_init = {};
735 
736 inline constexpr struct _BitsetInit {} __bitset_init = {};
737 
738 // }}}
739 // __is_narrowing_conversion<_From, _To>{{{
740 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
741  bool = is_arithmetic_v<_To>>
742  struct __is_narrowing_conversion;
743 
744 // ignore "signed/unsigned mismatch" in the following trait.
745 // The implicit conversions will do the right thing here.
746 template <typename _From, typename _To>
747  struct __is_narrowing_conversion<_From, _To, true, true>
748  : public __bool_constant<(
749  __digits_v<_From> > __digits_v<_To>
750  || __finite_max_v<_From> > __finite_max_v<_To>
751  || __finite_min_v<_From> < __finite_min_v<_To>
752  || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
753 
754 template <typename _Tp>
755  struct __is_narrowing_conversion<_Tp, bool, true, true>
756  : public true_type {};
757 
758 template <>
759  struct __is_narrowing_conversion<bool, bool, true, true>
760  : public false_type {};
761 
762 template <typename _Tp>
763  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
764  : public false_type {};
765 
766 template <typename _From, typename _To>
767  struct __is_narrowing_conversion<_From, _To, false, true>
768  : public negation<is_convertible<_From, _To>> {};
769 
770 // }}}
771 // __converts_to_higher_integer_rank{{{
772 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
773  struct __converts_to_higher_integer_rank : public true_type {};
774 
775 // this may fail for char -> short if sizeof(char) == sizeof(short)
776 template <typename _From, typename _To>
777  struct __converts_to_higher_integer_rank<_From, _To, false>
778  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
779 
780 // }}}
781 // __data(simd/simd_mask) {{{
782 template <typename _Tp, typename _Ap>
783  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
784  __data(const simd<_Tp, _Ap>& __x);
785 
786 template <typename _Tp, typename _Ap>
787  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
788  __data(simd<_Tp, _Ap>& __x);
789 
790 template <typename _Tp, typename _Ap>
791  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
792  __data(const simd_mask<_Tp, _Ap>& __x);
793 
794 template <typename _Tp, typename _Ap>
795  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
796  __data(simd_mask<_Tp, _Ap>& __x);
797 
798 // }}}
799 // _SimdConverter {{{
800 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
801  typename = void>
802  struct _SimdConverter;
803 
804 template <typename _Tp, typename _Ap>
805  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
806  {
807  template <typename _Up>
808  _GLIBCXX_SIMD_INTRINSIC const _Up&
809  operator()(const _Up& __x)
810  { return __x; }
811  };
812 
813 // }}}
814 // __to_value_type_or_member_type {{{
815 template <typename _V>
816  _GLIBCXX_SIMD_INTRINSIC constexpr auto
817  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
818  { return __data(__x); }
819 
820 template <typename _V>
821  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
822  __to_value_type_or_member_type(const typename _V::value_type& __x)
823  { return __x; }
824 
825 // }}}
826 // __bool_storage_member_type{{{
827 template <size_t _Size>
828  struct __bool_storage_member_type;
829 
830 template <size_t _Size>
831  using __bool_storage_member_type_t =
832  typename __bool_storage_member_type<_Size>::type;
833 
834 // }}}
835 // _SimdTuple {{{
836 // why not tuple?
837 // 1. tuple gives no guarantee about the storage order, but I require
838 // storage
839 // equivalent to array<_Tp, _Np>
840 // 2. direct access to the element type (first template argument)
841 // 3. enforces equal element type, only different _Abi types are allowed
842 template <typename _Tp, typename... _Abis>
843  struct _SimdTuple;
844 
845 //}}}
846 // __fixed_size_storage_t {{{
847 template <typename _Tp, int _Np>
848  struct __fixed_size_storage;
849 
850 template <typename _Tp, int _Np>
851  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
852 
853 // }}}
854 // _SimdWrapper fwd decl{{{
855 template <typename _Tp, size_t _Size, typename = void_t<>>
856  struct _SimdWrapper;
857 
858 template <typename _Tp>
859  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
860 template <typename _Tp>
861  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
862 template <typename _Tp>
863  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
864 template <typename _Tp>
865  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
866 
867 // }}}
868 // __is_simd_wrapper {{{
869 template <typename _Tp>
870  struct __is_simd_wrapper : false_type {};
871 
872 template <typename _Tp, size_t _Np>
873  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
874 
875 template <typename _Tp>
876  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
877 
878 // }}}
879 // _BitOps {{{
880 struct _BitOps
881 {
882  // _S_bit_iteration {{{
883  template <typename _Tp, typename _Fp>
884  static void
885  _S_bit_iteration(_Tp __mask, _Fp&& __f)
886  {
887  static_assert(sizeof(_ULLong) >= sizeof(_Tp));
888  conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
889  if constexpr (is_convertible_v<_Tp, decltype(__k)>)
890  __k = __mask;
891  else
892  __k = __mask.to_ullong();
893  while(__k)
894  {
895  __f(std::__countr_zero(__k));
896  __k &= (__k - 1);
897  }
898  }
899 
900  //}}}
901 };
902 
903 //}}}
904 // __increment, __decrement {{{
905 template <typename _Tp = void>
906  struct __increment
907  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
908 
909 template <>
910  struct __increment<void>
911  {
912  template <typename _Tp>
913  constexpr _Tp
914  operator()(_Tp __a) const
915  { return ++__a; }
916  };
917 
918 template <typename _Tp = void>
919  struct __decrement
920  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
921 
922 template <>
923  struct __decrement<void>
924  {
925  template <typename _Tp>
926  constexpr _Tp
927  operator()(_Tp __a) const
928  { return --__a; }
929  };
930 
931 // }}}
932 // _ValuePreserving(OrInt) {{{
933 template <typename _From, typename _To,
934  typename = enable_if_t<negation<
935  __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
936  using _ValuePreserving = _From;
937 
938 template <typename _From, typename _To,
939  typename _DecayedFrom = __remove_cvref_t<_From>,
940  typename = enable_if_t<conjunction<
941  is_convertible<_From, _To>,
942  disjunction<
943  is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
944  conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
945  negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
946  using _ValuePreservingOrInt = _From;
947 
948 // }}}
949 // __intrinsic_type {{{
950 template <typename _Tp, size_t _Bytes, typename = void_t<>>
951  struct __intrinsic_type;
952 
953 template <typename _Tp, size_t _Size>
954  using __intrinsic_type_t =
955  typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
956 
957 template <typename _Tp>
958  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
959 template <typename _Tp>
960  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
961 template <typename _Tp>
962  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
963 template <typename _Tp>
964  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
965 template <typename _Tp>
966  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
967 template <typename _Tp>
968  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
969 
970 // }}}
971 // _BitMask {{{
972 template <size_t _Np, bool _Sanitized = false>
973  struct _BitMask;
974 
975 template <size_t _Np, bool _Sanitized>
976  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
977 
978 template <size_t _Np>
979  using _SanitizedBitMask = _BitMask<_Np, true>;
980 
981 template <size_t _Np, bool _Sanitized>
982  struct _BitMask
983  {
984  static_assert(_Np > 0);
985 
986  static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
987 
988  using _Tp = conditional_t<_Np == 1, bool,
989  make_unsigned_t<__int_with_sizeof_t<std::min(
990  sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
991 
992  static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
993 
994  _Tp _M_bits[_S_array_size];
995 
996  static constexpr int _S_unused_bits
997  = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
998 
999  static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1000 
1001  constexpr _BitMask() noexcept = default;
1002 
1003  constexpr _BitMask(unsigned long long __x) noexcept
1004  : _M_bits{static_cast<_Tp>(__x)} {}
1005 
1006  _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1007 
1008  constexpr _BitMask(const _BitMask&) noexcept = default;
1009 
1010  template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1011  && _Sanitized == true>>
1012  constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1013  : _BitMask(__rhs._M_sanitized()) {}
1014 
1015  constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1016  {
1017  static_assert(_S_array_size == 1);
1018  return _M_bits[0];
1019  }
1020 
1021  // precondition: is sanitized
1022  constexpr _Tp
1023  _M_to_bits() const noexcept
1024  {
1025  static_assert(_S_array_size == 1);
1026  return _M_bits[0];
1027  }
1028 
1029  // precondition: is sanitized
1030  constexpr unsigned long long
1031  to_ullong() const noexcept
1032  {
1033  static_assert(_S_array_size == 1);
1034  return _M_bits[0];
1035  }
1036 
1037  // precondition: is sanitized
1038  constexpr unsigned long
1039  to_ulong() const noexcept
1040  {
1041  static_assert(_S_array_size == 1);
1042  return _M_bits[0];
1043  }
1044 
1045  constexpr bitset<_Np>
1046  _M_to_bitset() const noexcept
1047  {
1048  static_assert(_S_array_size == 1);
1049  return _M_bits[0];
1050  }
1051 
1052  constexpr decltype(auto)
1053  _M_sanitized() const noexcept
1054  {
1055  if constexpr (_Sanitized)
1056  return *this;
1057  else if constexpr (_Np == 1)
1058  return _SanitizedBitMask<_Np>(_M_bits[0]);
1059  else
1060  {
1061  _SanitizedBitMask<_Np> __r = {};
1062  for (int __i = 0; __i < _S_array_size; ++__i)
1063  __r._M_bits[__i] = _M_bits[__i];
1064  if constexpr (_S_unused_bits > 0)
1065  __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1066  return __r;
1067  }
1068  }
1069 
1070  template <size_t _Mp, bool _LSanitized>
1071  constexpr _BitMask<_Np + _Mp, _Sanitized>
1072  _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1073  {
1074  constexpr size_t _RN = _Np + _Mp;
1075  using _Rp = _BitMask<_RN, _Sanitized>;
1076  if constexpr (_Rp::_S_array_size == 1)
1077  {
1078  _Rp __r{{_M_bits[0]}};
1079  __r._M_bits[0] <<= _Mp;
1080  __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1081  return __r;
1082  }
1083  else
1084  __assert_unreachable<_Rp>();
1085  }
1086 
1087  // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1088  // significant bits. If the operation implicitly produces a sanitized bitmask,
1089  // the result type will have _Sanitized set.
1090  template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1091  constexpr auto
1092  _M_extract() const noexcept
1093  {
1094  static_assert(_Np > _DropLsb);
1095  static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1096  "not implemented for bitmasks larger than one ullong");
1097  if constexpr (_NewSize == 1)
1098  // must sanitize because the return _Tp is bool
1099  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1100  else
1101  return _BitMask<_NewSize,
1102  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1103  && _NewSize + _DropLsb <= _Np)
1104  || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1105  && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1106  >> _DropLsb);
1107  }
1108 
1109  // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1110  constexpr bool
1111  all() const noexcept
1112  {
1113  if constexpr (_Np == 1)
1114  return _M_bits[0];
1115  else if constexpr (!_Sanitized)
1116  return _M_sanitized().all();
1117  else
1118  {
1119  constexpr _Tp __allbits = ~_Tp();
1120  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1121  if (_M_bits[__i] != __allbits)
1122  return false;
1123  return _M_bits[_S_array_size - 1] == _S_bitmask;
1124  }
1125  }
1126 
1127  // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1128  // false.
1129  constexpr bool
1130  any() const noexcept
1131  {
1132  if constexpr (_Np == 1)
1133  return _M_bits[0];
1134  else if constexpr (!_Sanitized)
1135  return _M_sanitized().any();
1136  else
1137  {
1138  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1139  if (_M_bits[__i] != 0)
1140  return true;
1141  return _M_bits[_S_array_size - 1] != 0;
1142  }
1143  }
1144 
1145  // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1146  constexpr bool
1147  none() const noexcept
1148  {
1149  if constexpr (_Np == 1)
1150  return !_M_bits[0];
1151  else if constexpr (!_Sanitized)
1152  return _M_sanitized().none();
1153  else
1154  {
1155  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1156  if (_M_bits[__i] != 0)
1157  return false;
1158  return _M_bits[_S_array_size - 1] == 0;
1159  }
1160  }
1161 
1162  // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1163  // false.
1164  constexpr int
1165  count() const noexcept
1166  {
1167  if constexpr (_Np == 1)
1168  return _M_bits[0];
1169  else if constexpr (!_Sanitized)
1170  return _M_sanitized().none();
1171  else
1172  {
1173  int __result = __builtin_popcountll(_M_bits[0]);
1174  for (int __i = 1; __i < _S_array_size; ++__i)
1175  __result += __builtin_popcountll(_M_bits[__i]);
1176  return __result;
1177  }
1178  }
1179 
1180  // Returns the bit at offset __i as bool.
1181  constexpr bool
1182  operator[](size_t __i) const noexcept
1183  {
1184  if constexpr (_Np == 1)
1185  return _M_bits[0];
1186  else if constexpr (_S_array_size == 1)
1187  return (_M_bits[0] >> __i) & 1;
1188  else
1189  {
1190  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1191  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1192  return (_M_bits[__j] >> __shift) & 1;
1193  }
1194  }
1195 
1196  template <size_t __i>
1197  constexpr bool
1198  operator[](_SizeConstant<__i>) const noexcept
1199  {
1200  static_assert(__i < _Np);
1201  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1202  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1203  return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1204  }
1205 
1206  // Set the bit at offset __i to __x.
1207  constexpr void
1208  set(size_t __i, bool __x) noexcept
1209  {
1210  if constexpr (_Np == 1)
1211  _M_bits[0] = __x;
1212  else if constexpr (_S_array_size == 1)
1213  {
1214  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1215  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1216  }
1217  else
1218  {
1219  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1220  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1221  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1222  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1223  }
1224  }
1225 
1226  template <size_t __i>
1227  constexpr void
1228  set(_SizeConstant<__i>, bool __x) noexcept
1229  {
1230  static_assert(__i < _Np);
1231  if constexpr (_Np == 1)
1232  _M_bits[0] = __x;
1233  else
1234  {
1235  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1236  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1237  constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1238  _M_bits[__j] &= __mask;
1239  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1240  }
1241  }
1242 
1243  // Inverts all bits. Sanitized input leads to sanitized output.
1244  constexpr _BitMask
1245  operator~() const noexcept
1246  {
1247  if constexpr (_Np == 1)
1248  return !_M_bits[0];
1249  else
1250  {
1251  _BitMask __result{};
1252  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1253  __result._M_bits[__i] = ~_M_bits[__i];
1254  if constexpr (_Sanitized)
1255  __result._M_bits[_S_array_size - 1]
1256  = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1257  else
1258  __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1259  return __result;
1260  }
1261  }
1262 
1263  constexpr _BitMask&
1264  operator^=(const _BitMask& __b) & noexcept
1265  {
1266  __execute_n_times<_S_array_size>(
1267  [&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
1268  return *this;
1269  }
1270 
1271  constexpr _BitMask&
1272  operator|=(const _BitMask& __b) & noexcept
1273  {
1274  __execute_n_times<_S_array_size>(
1275  [&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; });
1276  return *this;
1277  }
1278 
1279  constexpr _BitMask&
1280  operator&=(const _BitMask& __b) & noexcept
1281  {
1282  __execute_n_times<_S_array_size>(
1283  [&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
1284  return *this;
1285  }
1286 
1287  friend constexpr _BitMask
1288  operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1289  {
1290  _BitMask __r = __a;
1291  __r ^= __b;
1292  return __r;
1293  }
1294 
1295  friend constexpr _BitMask
1296  operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1297  {
1298  _BitMask __r = __a;
1299  __r |= __b;
1300  return __r;
1301  }
1302 
1303  friend constexpr _BitMask
1304  operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1305  {
1306  _BitMask __r = __a;
1307  __r &= __b;
1308  return __r;
1309  }
1310 
1311  _GLIBCXX_SIMD_INTRINSIC
1312  constexpr bool
1313  _M_is_constprop() const
1314  {
1315  if constexpr (_S_array_size == 0)
1316  return __builtin_constant_p(_M_bits[0]);
1317  else
1318  {
1319  for (int __i = 0; __i < _S_array_size; ++__i)
1320  if (!__builtin_constant_p(_M_bits[__i]))
1321  return false;
1322  return true;
1323  }
1324  }
1325  };
1326 
1327 // }}}
1328 
1329 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1330 // __min_vector_size {{{
1331 template <typename _Tp = void>
1332  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1333 
1334 #if _GLIBCXX_SIMD_HAVE_NEON
1335 template <>
1336  inline constexpr int __min_vector_size<void> = 8;
1337 #else
1338 template <>
1339  inline constexpr int __min_vector_size<void> = 16;
1340 #endif
1341 
1342 // }}}
1343 // __vector_type {{{
1344 template <typename _Tp, size_t _Np, typename = void>
1345  struct __vector_type_n {};
1346 
1347 // substition failure for 0-element case
1348 template <typename _Tp>
1349  struct __vector_type_n<_Tp, 0, void> {};
1350 
1351 // special case 1-element to be _Tp itself
1352 template <typename _Tp>
1353  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1354  { using type = _Tp; };
1355 
1356 // else, use GNU-style builtin vector types
1357 template <typename _Tp, size_t _Np>
1358  struct __vector_type_n<_Tp, _Np,
1359  enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1360  {
1361  static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1362 
1363  static constexpr size_t _S_Bytes =
1364 #ifdef __i386__
1365  // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1366  // those objects are passed via MMX registers and nothing ever calls EMMS.
1367  _S_Np2 == 8 ? 16 :
1368 #endif
1369  _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1370  : _S_Np2;
1371 
1372  using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1373  };
1374 
1375 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1376  struct __vector_type;
1377 
1378 template <typename _Tp, size_t _Bytes>
1379  struct __vector_type<_Tp, _Bytes, 0>
1380  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1381 
1382 template <typename _Tp, size_t _Size>
1383  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1384 
1385 template <typename _Tp>
1386  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1387 template <typename _Tp>
1388  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1389 template <typename _Tp>
1390  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1391 template <typename _Tp>
1392  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1393 template <typename _Tp>
1394  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1395 template <typename _Tp>
1396  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1397 
1398 // }}}
1399 // __is_vector_type {{{
1400 template <typename _Tp, typename = void_t<>>
1401  struct __is_vector_type : false_type {};
1402 
1403 template <typename _Tp>
1404  struct __is_vector_type<
1405  _Tp, void_t<typename __vector_type<
1406  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1407  : is_same<_Tp, typename __vector_type<
1408  remove_reference_t<decltype(declval<_Tp>()[0])>,
1409  sizeof(_Tp)>::type> {};
1410 
1411 template <typename _Tp>
1412  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1413 
1414 // }}}
1415 // __is_intrinsic_type {{{
1416 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1417 template <typename _Tp>
1418  using __is_intrinsic_type = __is_vector_type<_Tp>;
1419 #else // not SSE (x86)
1420 template <typename _Tp, typename = void_t<>>
1421  struct __is_intrinsic_type : false_type {};
1422 
1423 template <typename _Tp>
1424  struct __is_intrinsic_type<
1425  _Tp, void_t<typename __intrinsic_type<
1426  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1427  : is_same<_Tp, typename __intrinsic_type<
1428  remove_reference_t<decltype(declval<_Tp>()[0])>,
1429  sizeof(_Tp)>::type> {};
1430 #endif
1431 
1432 template <typename _Tp>
1433  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1434 
1435 // }}}
1436 // _VectorTraits{{{
1437 template <typename _Tp, typename = void_t<>>
1438  struct _VectorTraitsImpl;
1439 
1440 template <typename _Tp>
1441  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1442  || __is_intrinsic_type_v<_Tp>>>
1443  {
1444  using type = _Tp;
1445  using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1446  static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1447  using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1448  template <typename _Up, int _W = _S_full_size>
1449  static constexpr bool _S_is
1450  = is_same_v<value_type, _Up> && _W == _S_full_size;
1451  };
1452 
1453 template <typename _Tp, size_t _Np>
1454  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1455  void_t<__vector_type_t<_Tp, _Np>>>
1456  {
1457  using type = __vector_type_t<_Tp, _Np>;
1458  using value_type = _Tp;
1459  static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1460  using _Wrapper = _SimdWrapper<_Tp, _Np>;
1461  static constexpr bool _S_is_partial = (_Np == _S_full_size);
1462  static constexpr int _S_partial_width = _Np;
1463  template <typename _Up, int _W = _S_full_size>
1464  static constexpr bool _S_is
1465  = is_same_v<value_type, _Up>&& _W == _S_full_size;
1466  };
1467 
1468 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1469  using _VectorTraits = _VectorTraitsImpl<_Tp>;
1470 
1471 // }}}
1472 // __as_vector{{{
1473 template <typename _V>
1474  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1475  __as_vector(_V __x)
1476  {
1477  if constexpr (__is_vector_type_v<_V>)
1478  return __x;
1479  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1480  return __data(__x)._M_data;
1481  else if constexpr (__is_vectorizable_v<_V>)
1482  return __vector_type_t<_V, 2>{__x};
1483  else
1484  return __x._M_data;
1485  }
1486 
1487 // }}}
1488 // __as_wrapper{{{
1489 template <size_t _Np = 0, typename _V>
1490  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1491  __as_wrapper(_V __x)
1492  {
1493  if constexpr (__is_vector_type_v<_V>)
1494  return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1495  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1496  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1497  {
1498  static_assert(_V::size() == _Np);
1499  return __data(__x);
1500  }
1501  else
1502  {
1503  static_assert(_V::_S_size == _Np);
1504  return __x;
1505  }
1506  }
1507 
1508 // }}}
1509 // __intrin_bitcast{{{
1510 template <typename _To, typename _From>
1511  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1512  __intrin_bitcast(_From __v)
1513  {
1514  static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1515  && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1516  if constexpr (sizeof(_To) == sizeof(_From))
1517  return reinterpret_cast<_To>(__v);
1518  else if constexpr (sizeof(_From) > sizeof(_To))
1519  if constexpr (sizeof(_To) >= 16)
1520  return reinterpret_cast<const __may_alias<_To>&>(__v);
1521  else
1522  {
1523  _To __r;
1524  __builtin_memcpy(&__r, &__v, sizeof(_To));
1525  return __r;
1526  }
1527 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1528  else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1529  return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1530  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1531  else if constexpr (__have_avx512f && sizeof(_From) == 16
1532  && sizeof(_To) == 64)
1533  return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1534  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1535  else if constexpr (__have_avx512f && sizeof(_From) == 32
1536  && sizeof(_To) == 64)
1537  return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1538  reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1539 #endif // _GLIBCXX_SIMD_X86INTRIN
1540  else if constexpr (sizeof(__v) <= 8)
1541  return reinterpret_cast<_To>(
1542  __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1543  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1544  else
1545  {
1546  static_assert(sizeof(_To) > sizeof(_From));
1547  _To __r = {};
1548  __builtin_memcpy(&__r, &__v, sizeof(_From));
1549  return __r;
1550  }
1551  }
1552 
1553 // }}}
1554 // __vector_bitcast{{{
1555 template <typename _To, size_t _NN = 0, typename _From,
1556  typename _FromVT = _VectorTraits<_From>,
1557  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1558  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1559  __vector_bitcast(_From __x)
1560  {
1561  using _R = __vector_type_t<_To, _Np>;
1562  return __intrin_bitcast<_R>(__x);
1563  }
1564 
1565 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1566  size_t _Np
1567  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1568  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1569  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1570  {
1571  static_assert(_Np > 1);
1572  return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1573  }
1574 
1575 // }}}
1576 // __convert_x86 declarations {{{
1577 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1578 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1579  _To __convert_x86(_Tp);
1580 
1581 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1582  _To __convert_x86(_Tp, _Tp);
1583 
1584 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1585  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1586 
1587 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1588  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1589 
1590 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1591  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1592  _Tp, _Tp, _Tp, _Tp);
1593 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1594 
1595 //}}}
1596 // __bit_cast {{{
1597 template <typename _To, typename _From>
1598  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1599  __bit_cast(const _From __x)
1600  {
1601  // TODO: implement with / replace by __builtin_bit_cast ASAP
1602  static_assert(sizeof(_To) == sizeof(_From));
1603  constexpr bool __to_is_vectorizable
1604  = is_arithmetic_v<_To> || is_enum_v<_To>;
1605  constexpr bool __from_is_vectorizable
1606  = is_arithmetic_v<_From> || is_enum_v<_From>;
1607  if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1608  return reinterpret_cast<_To>(__x);
1609  else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1610  {
1611  using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1612  return reinterpret_cast<_To>(_FV{__x});
1613  }
1614  else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1615  {
1616  using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1617  using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1618  return reinterpret_cast<_TV>(_FV{__x})[0];
1619  }
1620  else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1621  {
1622  using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1623  return reinterpret_cast<_TV>(__x)[0];
1624  }
1625  else
1626  {
1627  _To __r;
1628  __builtin_memcpy(reinterpret_cast<char*>(&__r),
1629  reinterpret_cast<const char*>(&__x), sizeof(_To));
1630  return __r;
1631  }
1632  }
1633 
1634 // }}}
1635 // __to_intrin {{{
1636 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1637  typename _R
1638  = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1639  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1640  __to_intrin(_Tp __x)
1641  {
1642  static_assert(sizeof(__x) <= sizeof(_R),
1643  "__to_intrin may never drop values off the end");
1644  if constexpr (sizeof(__x) == sizeof(_R))
1645  return reinterpret_cast<_R>(__as_vector(__x));
1646  else
1647  {
1648  using _Up = __int_for_sizeof_t<_Tp>;
1649  return reinterpret_cast<_R>(
1650  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1651  }
1652  }
1653 
1654 // }}}
1655 // __make_vector{{{
1656 template <typename _Tp, typename... _Args>
1657  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1658  __make_vector(const _Args&... __args)
1659  {
1660  return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...};
1661  }
1662 
1663 // }}}
1664 // __vector_broadcast{{{
1665 template <size_t _Np, typename _Tp>
1666  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1667  __vector_broadcast(_Tp __x)
1668  {
1669  return __call_with_n_evaluations<_Np>(
1670  [](auto... __xx) { return __vector_type_t<_Tp, _Np>{__xx...}; },
1671  [&__x](int) { return __x; });
1672  }
1673 
1674 // }}}
1675 // __generate_vector{{{
1676  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1677  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1678  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1679  {
1680  return __vector_type_t<_Tp, _Np>{
1681  static_cast<_Tp>(__gen(_SizeConstant<_I>()))...};
1682  }
1683 
1684 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1685  _GLIBCXX_SIMD_INTRINSIC constexpr _V
1686  __generate_vector(_Gp&& __gen)
1687  {
1688  if constexpr (__is_vector_type_v<_V>)
1689  return __generate_vector_impl<typename _VVT::value_type,
1690  _VVT::_S_full_size>(
1691  static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1692  else
1693  return __generate_vector_impl<typename _VVT::value_type,
1694  _VVT::_S_partial_width>(
1695  static_cast<_Gp&&>(__gen),
1696  make_index_sequence<_VVT::_S_partial_width>());
1697  }
1698 
1699 template <typename _Tp, size_t _Np, typename _Gp>
1700  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1701  __generate_vector(_Gp&& __gen)
1702  {
1703  return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1704  make_index_sequence<_Np>());
1705  }
1706 
1707 // }}}
1708 // __xor{{{
1709 template <typename _TW>
1710  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1711  __xor(_TW __a, _TW __b) noexcept
1712  {
1713  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1714  {
1715  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1716  _VectorTraitsImpl<_TW>>::value_type;
1717  if constexpr (is_floating_point_v<_Tp>)
1718  {
1719  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1720  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1721  ^ __vector_bitcast<_Ip>(__b));
1722  }
1723  else if constexpr (__is_vector_type_v<_TW>)
1724  return __a ^ __b;
1725  else
1726  return __a._M_data ^ __b._M_data;
1727  }
1728  else
1729  return __a ^ __b;
1730  }
1731 
1732 // }}}
1733 // __or{{{
1734 template <typename _TW>
1735  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1736  __or(_TW __a, _TW __b) noexcept
1737  {
1738  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1739  {
1740  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1741  _VectorTraitsImpl<_TW>>::value_type;
1742  if constexpr (is_floating_point_v<_Tp>)
1743  {
1744  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1745  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1746  | __vector_bitcast<_Ip>(__b));
1747  }
1748  else if constexpr (__is_vector_type_v<_TW>)
1749  return __a | __b;
1750  else
1751  return __a._M_data | __b._M_data;
1752  }
1753  else
1754  return __a | __b;
1755  }
1756 
1757 // }}}
1758 // __and{{{
1759 template <typename _TW>
1760  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1761  __and(_TW __a, _TW __b) noexcept
1762  {
1763  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1764  {
1765  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1766  _VectorTraitsImpl<_TW>>::value_type;
1767  if constexpr (is_floating_point_v<_Tp>)
1768  {
1769  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1770  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1771  & __vector_bitcast<_Ip>(__b));
1772  }
1773  else if constexpr (__is_vector_type_v<_TW>)
1774  return __a & __b;
1775  else
1776  return __a._M_data & __b._M_data;
1777  }
1778  else
1779  return __a & __b;
1780  }
1781 
1782 // }}}
1783 // __andnot{{{
1784 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1785 static constexpr struct
1786 {
1787  _GLIBCXX_SIMD_INTRINSIC __v4sf
1788  operator()(__v4sf __a, __v4sf __b) const noexcept
1789  { return __builtin_ia32_andnps(__a, __b); }
1790 
1791  _GLIBCXX_SIMD_INTRINSIC __v2df
1792  operator()(__v2df __a, __v2df __b) const noexcept
1793  { return __builtin_ia32_andnpd(__a, __b); }
1794 
1795  _GLIBCXX_SIMD_INTRINSIC __v2di
1796  operator()(__v2di __a, __v2di __b) const noexcept
1797  { return __builtin_ia32_pandn128(__a, __b); }
1798 
1799  _GLIBCXX_SIMD_INTRINSIC __v8sf
1800  operator()(__v8sf __a, __v8sf __b) const noexcept
1801  { return __builtin_ia32_andnps256(__a, __b); }
1802 
1803  _GLIBCXX_SIMD_INTRINSIC __v4df
1804  operator()(__v4df __a, __v4df __b) const noexcept
1805  { return __builtin_ia32_andnpd256(__a, __b); }
1806 
1807  _GLIBCXX_SIMD_INTRINSIC __v4di
1808  operator()(__v4di __a, __v4di __b) const noexcept
1809  {
1810  if constexpr (__have_avx2)
1811  return __builtin_ia32_andnotsi256(__a, __b);
1812  else
1813  return reinterpret_cast<__v4di>(
1814  __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1815  reinterpret_cast<__v4df>(__b)));
1816  }
1817 
1818  _GLIBCXX_SIMD_INTRINSIC __v16sf
1819  operator()(__v16sf __a, __v16sf __b) const noexcept
1820  {
1821  if constexpr (__have_avx512dq)
1822  return _mm512_andnot_ps(__a, __b);
1823  else
1824  return reinterpret_cast<__v16sf>(
1825  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1826  reinterpret_cast<__v8di>(__b)));
1827  }
1828 
1829  _GLIBCXX_SIMD_INTRINSIC __v8df
1830  operator()(__v8df __a, __v8df __b) const noexcept
1831  {
1832  if constexpr (__have_avx512dq)
1833  return _mm512_andnot_pd(__a, __b);
1834  else
1835  return reinterpret_cast<__v8df>(
1836  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1837  reinterpret_cast<__v8di>(__b)));
1838  }
1839 
1840  _GLIBCXX_SIMD_INTRINSIC __v8di
1841  operator()(__v8di __a, __v8di __b) const noexcept
1842  { return _mm512_andnot_si512(__a, __b); }
1843 } _S_x86_andnot;
1844 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1845 
1846 template <typename _TW>
1847  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1848  __andnot(_TW __a, _TW __b) noexcept
1849  {
1850  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1851  {
1852  using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1853  _VectorTraitsImpl<_TW>>;
1854  using _Tp = typename _TVT::value_type;
1855 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1856  if constexpr (sizeof(_TW) >= 16)
1857  {
1858  const auto __ai = __to_intrin(__a);
1859  const auto __bi = __to_intrin(__b);
1860  if (!__builtin_is_constant_evaluated()
1861  && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1862  {
1863  const auto __r = _S_x86_andnot(__ai, __bi);
1864  if constexpr (is_convertible_v<decltype(__r), _TW>)
1865  return __r;
1866  else
1867  return reinterpret_cast<typename _TVT::type>(__r);
1868  }
1869  }
1870 #endif // _GLIBCXX_SIMD_X86INTRIN
1871  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1872  return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
1873  & __vector_bitcast<_Ip>(__b));
1874  }
1875  else
1876  return ~__a & __b;
1877  }
1878 
1879 // }}}
1880 // __not{{{
1881 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1882  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
1883  __not(_Tp __a) noexcept
1884  {
1885  if constexpr (is_floating_point_v<typename _TVT::value_type>)
1886  return reinterpret_cast<typename _TVT::type>(
1887  ~__vector_bitcast<unsigned>(__a));
1888  else
1889  return ~__a;
1890  }
1891 
1892 // }}}
1893 // __concat{{{
1894 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1895  typename _R = __vector_type_t<typename _TVT::value_type,
1896  _TVT::_S_full_size * 2>>
1897  constexpr _R
1898  __concat(_Tp a_, _Tp b_)
1899  {
1900 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
1901  using _W
1902  = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
1903  conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
1904  long long, typename _TVT::value_type>>;
1905  constexpr int input_width = sizeof(_Tp) / sizeof(_W);
1906  const auto __a = __vector_bitcast<_W>(a_);
1907  const auto __b = __vector_bitcast<_W>(b_);
1908  using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
1909 #else
1910  constexpr int input_width = _TVT::_S_full_size;
1911  const _Tp& __a = a_;
1912  const _Tp& __b = b_;
1913  using _Up = _R;
1914 #endif
1915  if constexpr (input_width == 2)
1916  return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
1917  else if constexpr (input_width == 4)
1918  return reinterpret_cast<_R>(
1919  _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
1920  else if constexpr (input_width == 8)
1921  return reinterpret_cast<_R>(
1922  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
1923  __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
1924  else if constexpr (input_width == 16)
1925  return reinterpret_cast<_R>(
1926  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1927  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1928  __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
1929  __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
1930  __b[12], __b[13], __b[14], __b[15]});
1931  else if constexpr (input_width == 32)
1932  return reinterpret_cast<_R>(
1933  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1934  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1935  __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
1936  __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
1937  __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
1938  __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
1939  __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
1940  __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
1941  __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
1942  __b[31]});
1943  }
1944 
1945 // }}}
1946 // __zero_extend {{{
1947 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1948  struct _ZeroExtendProxy
1949  {
1950  using value_type = typename _TVT::value_type;
1951  static constexpr size_t _Np = _TVT::_S_full_size;
1952  const _Tp __x;
1953 
1954  template <typename _To, typename _ToVT = _VectorTraits<_To>,
1955  typename
1956  = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
1957  _GLIBCXX_SIMD_INTRINSIC operator _To() const
1958  {
1959  constexpr size_t _ToN = _ToVT::_S_full_size;
1960  if constexpr (_ToN == _Np)
1961  return __x;
1962  else if constexpr (_ToN == 2 * _Np)
1963  {
1964 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1965  if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
1966  return __vector_bitcast<value_type>(
1967  _mm256_insertf128_ps(__m256(), __x, 0));
1968  else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
1969  return __vector_bitcast<value_type>(
1970  _mm256_insertf128_pd(__m256d(), __x, 0));
1971  else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
1972  return __vector_bitcast<value_type>(
1973  _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
1974  else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
1975  {
1976  if constexpr (__have_avx512dq)
1977  return __vector_bitcast<value_type>(
1978  _mm512_insertf32x8(__m512(), __x, 0));
1979  else
1980  return reinterpret_cast<__m512>(
1981  _mm512_insertf64x4(__m512d(),
1982  reinterpret_cast<__m256d>(__x), 0));
1983  }
1984  else if constexpr (__have_avx512f
1985  && _TVT::template _S_is<double, 4>)
1986  return __vector_bitcast<value_type>(
1987  _mm512_insertf64x4(__m512d(), __x, 0));
1988  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
1989  return __vector_bitcast<value_type>(
1990  _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
1991 #endif
1992  return __concat(__x, _Tp());
1993  }
1994  else if constexpr (_ToN == 4 * _Np)
1995  {
1996 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1997  if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
1998  {
1999  return __vector_bitcast<value_type>(
2000  _mm512_insertf64x2(__m512d(), __x, 0));
2001  }
2002  else if constexpr (__have_avx512f
2003  && is_floating_point_v<value_type>)
2004  {
2005  return __vector_bitcast<value_type>(
2006  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2007  0));
2008  }
2009  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2010  {
2011  return __vector_bitcast<value_type>(
2012  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2013  }
2014 #endif
2015  return __concat(__concat(__x, _Tp()),
2016  __vector_type_t<value_type, _Np * 2>());
2017  }
2018  else if constexpr (_ToN == 8 * _Np)
2019  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2020  __vector_type_t<value_type, _Np * 4>());
2021  else if constexpr (_ToN == 16 * _Np)
2022  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2023  __vector_type_t<value_type, _Np * 8>());
2024  else
2025  __assert_unreachable<_Tp>();
2026  }
2027  };
2028 
2029 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2030  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2031  __zero_extend(_Tp __x)
2032  { return {__x}; }
2033 
2034 // }}}
2035 // __extract<_Np, By>{{{
2036 template <int _Offset,
2037  int _SplitBy,
2038  typename _Tp,
2039  typename _TVT = _VectorTraits<_Tp>,
2040  typename _R = __vector_type_t<typename _TVT::value_type,
2041  _TVT::_S_full_size / _SplitBy>>
2042  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2043  __extract(_Tp __in)
2044  {
2045  using value_type = typename _TVT::value_type;
2046 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2047  if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2048  {
2049  if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2050  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2051  else if constexpr (is_floating_point_v<value_type>)
2052  return __vector_bitcast<value_type>(
2053  _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2054  else
2055  return reinterpret_cast<_R>(
2056  _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2057  _Offset));
2058  }
2059  else
2060 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2061  {
2062 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2063  using _W = conditional_t<
2064  is_floating_point_v<value_type>, double,
2065  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2066  static_assert(sizeof(_R) % sizeof(_W) == 0);
2067  constexpr int __return_width = sizeof(_R) / sizeof(_W);
2068  using _Up = __vector_type_t<_W, __return_width>;
2069  const auto __x = __vector_bitcast<_W>(__in);
2070 #else
2071  constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2072  using _Up = _R;
2073  const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2074  = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2075 #endif
2076  constexpr int _O = _Offset * __return_width;
2077  return __call_with_subscripts<__return_width, _O>(
2078  __x, [](auto... __entries) {
2079  return reinterpret_cast<_R>(_Up{__entries...});
2080  });
2081  }
2082  }
2083 
2084 // }}}
2085 // __lo/__hi64[z]{{{
2086 template <typename _Tp,
2087  typename _R
2088  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2089  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2090  __lo64(_Tp __x)
2091  {
2092  _R __r{};
2093  __builtin_memcpy(&__r, &__x, 8);
2094  return __r;
2095  }
2096 
2097 template <typename _Tp,
2098  typename _R
2099  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2100  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2101  __hi64(_Tp __x)
2102  {
2103  static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2104  _R __r{};
2105  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2106  return __r;
2107  }
2108 
2109 template <typename _Tp,
2110  typename _R
2111  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2112  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2113  __hi64z([[maybe_unused]] _Tp __x)
2114  {
2115  _R __r{};
2116  if constexpr (sizeof(_Tp) == 16)
2117  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2118  return __r;
2119  }
2120 
2121 // }}}
2122 // __lo/__hi128{{{
2123 template <typename _Tp>
2124  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2125  __lo128(_Tp __x)
2126  { return __extract<0, sizeof(_Tp) / 16>(__x); }
2127 
2128 template <typename _Tp>
2129  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2130  __hi128(_Tp __x)
2131  {
2132  static_assert(sizeof(__x) == 32);
2133  return __extract<1, 2>(__x);
2134  }
2135 
2136 // }}}
2137 // __lo/__hi256{{{
2138 template <typename _Tp>
2139  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2140  __lo256(_Tp __x)
2141  {
2142  static_assert(sizeof(__x) == 64);
2143  return __extract<0, 2>(__x);
2144  }
2145 
2146 template <typename _Tp>
2147  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2148  __hi256(_Tp __x)
2149  {
2150  static_assert(sizeof(__x) == 64);
2151  return __extract<1, 2>(__x);
2152  }
2153 
2154 // }}}
2155 // __auto_bitcast{{{
2156 template <typename _Tp>
2157  struct _AutoCast
2158  {
2159  static_assert(__is_vector_type_v<_Tp>);
2160 
2161  const _Tp __x;
2162 
2163  template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2164  _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2165  { return __intrin_bitcast<typename _UVT::type>(__x); }
2166  };
2167 
2168 template <typename _Tp>
2169  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2170  __auto_bitcast(const _Tp& __x)
2171  { return {__x}; }
2172 
2173 template <typename _Tp, size_t _Np>
2174  _GLIBCXX_SIMD_INTRINSIC constexpr
2175  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2176  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2177  { return {__x._M_data}; }
2178 
2179 // }}}
2180 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2181 
2182 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2183 // __bool_storage_member_type{{{
2184 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2185 template <size_t _Size>
2186  struct __bool_storage_member_type
2187  {
2188  static_assert((_Size & (_Size - 1)) != 0,
2189  "This trait may only be used for non-power-of-2 sizes. "
2190  "Power-of-2 sizes must be specialized.");
2191  using type =
2192  typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2193  };
2194 
2195 template <>
2196  struct __bool_storage_member_type<1> { using type = bool; };
2197 
2198 template <>
2199  struct __bool_storage_member_type<2> { using type = __mmask8; };
2200 
2201 template <>
2202  struct __bool_storage_member_type<4> { using type = __mmask8; };
2203 
2204 template <>
2205  struct __bool_storage_member_type<8> { using type = __mmask8; };
2206 
2207 template <>
2208  struct __bool_storage_member_type<16> { using type = __mmask16; };
2209 
2210 template <>
2211  struct __bool_storage_member_type<32> { using type = __mmask32; };
2212 
2213 template <>
2214  struct __bool_storage_member_type<64> { using type = __mmask64; };
2215 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2216 
2217 // }}}
2218 // __intrinsic_type (x86){{{
2219 // the following excludes bool via __is_vectorizable
2220 #if _GLIBCXX_SIMD_HAVE_SSE
2221 template <typename _Tp, size_t _Bytes>
2222  struct __intrinsic_type<_Tp, _Bytes,
2223  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2224  {
2225  static_assert(!is_same_v<_Tp, long double>,
2226  "no __intrinsic_type support for long double on x86");
2227 
2228  static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16
2229  : _Bytes <= 32 ? 32
2230  : 64;
2231 
2232  using type [[__gnu__::__vector_size__(_S_VBytes)]]
2233  = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2234  };
2235 #endif // _GLIBCXX_SIMD_HAVE_SSE
2236 
2237 // }}}
2238 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2239 // __intrinsic_type (ARM){{{
2240 #if _GLIBCXX_SIMD_HAVE_NEON
2241 template <>
2242  struct __intrinsic_type<float, 8, void>
2243  { using type = float32x2_t; };
2244 
2245 template <>
2246  struct __intrinsic_type<float, 16, void>
2247  { using type = float32x4_t; };
2248 
2249 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2250 template <>
2251  struct __intrinsic_type<double, 8, void>
2252  { using type = float64x1_t; };
2253 
2254 template <>
2255  struct __intrinsic_type<double, 16, void>
2256  { using type = float64x2_t; };
2257 #endif
2258 
2259 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2260 template <> \
2261  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2262  _Np * _Bits / 8, void> \
2263  { using type = int##_Bits##x##_Np##_t; }; \
2264 template <> \
2265  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2266  _Np * _Bits / 8, void> \
2267  { using type = uint##_Bits##x##_Np##_t; }
2268 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2269 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2270 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2271 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2272 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2273 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2274 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2275 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2276 #undef _GLIBCXX_SIMD_ARM_INTRIN
2277 
2278 template <typename _Tp, size_t _Bytes>
2279  struct __intrinsic_type<_Tp, _Bytes,
2280  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2281  {
2282  static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2283  using _Ip = __int_for_sizeof_t<_Tp>;
2284  using _Up = conditional_t<
2285  is_floating_point_v<_Tp>, _Tp,
2286  conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2287  static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2288  "should use explicit specialization above");
2289  using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2290  };
2291 #endif // _GLIBCXX_SIMD_HAVE_NEON
2292 
2293 // }}}
2294 // __intrinsic_type (PPC){{{
2295 #ifdef __ALTIVEC__
2296 template <typename _Tp>
2297  struct __intrinsic_type_impl;
2298 
2299 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2300  template <> \
2301  struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2302 _GLIBCXX_SIMD_PPC_INTRIN(float);
2303 _GLIBCXX_SIMD_PPC_INTRIN(double);
2304 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2305 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2306 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2307 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2308 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2309 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2310 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2311 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2312 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2313 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2314 #undef _GLIBCXX_SIMD_PPC_INTRIN
2315 
2316 template <typename _Tp, size_t _Bytes>
2317  struct __intrinsic_type<_Tp, _Bytes,
2318  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2319  {
2320  static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2321  // allow _Tp == long double with -mlong-double-64
2322  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2323  "no __intrinsic_type support for long double on PPC");
2324 #ifndef __VSX__
2325  static_assert(!is_same_v<_Tp, double>,
2326  "no __intrinsic_type support for double on PPC w/o VSX");
2327 #endif
2328  using type =
2329  typename __intrinsic_type_impl<
2330  conditional_t<is_floating_point_v<_Tp>,
2331  conditional_t<_S_is_ldouble, double, _Tp>,
2332  __int_for_sizeof_t<_Tp>>>::type;
2333  };
2334 #endif // __ALTIVEC__
2335 
2336 // }}}
2337 // _SimdWrapper<bool>{{{1
2338 template <size_t _Width>
2339  struct _SimdWrapper<bool, _Width,
2340  void_t<typename __bool_storage_member_type<_Width>::type>>
2341  {
2342  using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2343  using value_type = bool;
2344 
2345  static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2346 
2347  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2348  __as_full_vector() const { return _M_data; }
2349 
2350  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2351  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_BuiltinType __k)
2352  : _M_data(__k) {};
2353 
2354  _GLIBCXX_SIMD_INTRINSIC operator const _BuiltinType&() const
2355  { return _M_data; }
2356 
2357  _GLIBCXX_SIMD_INTRINSIC operator _BuiltinType&()
2358  { return _M_data; }
2359 
2360  _GLIBCXX_SIMD_INTRINSIC _BuiltinType __intrin() const
2361  { return _M_data; }
2362 
2363  _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](size_t __i) const
2364  { return _M_data & (_BuiltinType(1) << __i); }
2365 
2366  template <size_t __i>
2367  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2368  operator[](_SizeConstant<__i>) const
2369  { return _M_data & (_BuiltinType(1) << __i); }
2370 
2371  _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, value_type __x)
2372  {
2373  if (__x)
2374  _M_data |= (_BuiltinType(1) << __i);
2375  else
2376  _M_data &= ~(_BuiltinType(1) << __i);
2377  }
2378 
2379  _GLIBCXX_SIMD_INTRINSIC
2380  constexpr bool _M_is_constprop() const
2381  { return __builtin_constant_p(_M_data); }
2382 
2383  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2384  {
2385  if (__builtin_constant_p(_M_data))
2386  {
2387  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2388  constexpr _BuiltinType __active_mask
2389  = ~_BuiltinType() >> (__nbits - _Width);
2390  return (_M_data & __active_mask) == 0;
2391  }
2392  return false;
2393  }
2394 
2395  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2396  {
2397  if (__builtin_constant_p(_M_data))
2398  {
2399  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2400  constexpr _BuiltinType __active_mask
2401  = ~_BuiltinType() >> (__nbits - _Width);
2402  return (_M_data & __active_mask) == __active_mask;
2403  }
2404  return false;
2405  }
2406 
2407  _BuiltinType _M_data;
2408  };
2409 
2410 // _SimdWrapperBase{{{1
2411 template <bool _MustZeroInitPadding, typename _BuiltinType>
2412  struct _SimdWrapperBase;
2413 
2414 template <typename _BuiltinType>
2415  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2416  {
2417  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() = default;
2418  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2419  : _M_data(__init)
2420  {}
2421 
2422  _BuiltinType _M_data;
2423  };
2424 
2425 template <typename _BuiltinType>
2426  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2427  // never become SNaN
2428  {
2429  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() : _M_data() {}
2430  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2431  : _M_data(__init)
2432  {}
2433 
2434  _BuiltinType _M_data;
2435  };
2436 
2437 // }}}
2438 // _SimdWrapper{{{
2439 template <typename _Tp, size_t _Width>
2440  struct _SimdWrapper<
2441  _Tp, _Width,
2442  void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2443  : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2444  && sizeof(_Tp) * _Width
2445  == sizeof(__vector_type_t<_Tp, _Width>),
2446  __vector_type_t<_Tp, _Width>>
2447  {
2448  using _Base
2449  = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2450  && sizeof(_Tp) * _Width
2451  == sizeof(__vector_type_t<_Tp, _Width>),
2452  __vector_type_t<_Tp, _Width>>;
2453 
2454  static_assert(__is_vectorizable_v<_Tp>);
2455  static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2456 
2457  using _BuiltinType = __vector_type_t<_Tp, _Width>;
2458  using value_type = _Tp;
2459 
2460  static inline constexpr size_t _S_full_size
2461  = sizeof(_BuiltinType) / sizeof(value_type);
2462  static inline constexpr int _S_size = _Width;
2463  static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2464 
2465  using _Base::_M_data;
2466 
2467  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2468  __as_full_vector() const
2469  { return _M_data; }
2470 
2471  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
2472  : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2473  [&](auto __i) { return __init.begin()[__i.value]; })) {}
2474 
2475  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2476  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&)
2477  = default;
2478  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_SimdWrapper&&) = default;
2479 
2480  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2481  operator=(const _SimdWrapper&) = default;
2482  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2483  operator=(_SimdWrapper&&) = default;
2484 
2485  template <typename _V, typename = enable_if_t<disjunction_v<
2486  is_same<_V, __vector_type_t<_Tp, _Width>>,
2487  is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2488  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_V __x)
2489  // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2490  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2491 
2492  template <typename... _As,
2493  typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2494  && sizeof...(_As) <= _Width)>>
2495  _GLIBCXX_SIMD_INTRINSIC constexpr
2496  operator _SimdTuple<_Tp, _As...>() const
2497  {
2498  const auto& dd = _M_data; // workaround for GCC7 ICE
2499  return __generate_from_n_evaluations<sizeof...(_As),
2500  _SimdTuple<_Tp, _As...>>([&](
2501  auto __i) constexpr { return dd[int(__i)]; });
2502  }
2503 
2504  _GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const
2505  { return _M_data; }
2506 
2507  _GLIBCXX_SIMD_INTRINSIC constexpr operator _BuiltinType&()
2508  { return _M_data; }
2509 
2510  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](size_t __i) const
2511  { return _M_data[__i]; }
2512 
2513  template <size_t __i>
2514  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](_SizeConstant<__i>) const
2515  { return _M_data[__i]; }
2516 
2517  _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, _Tp __x)
2518  { _M_data[__i] = __x; }
2519 
2520  _GLIBCXX_SIMD_INTRINSIC
2521  constexpr bool _M_is_constprop() const
2522  { return __builtin_constant_p(_M_data); }
2523 
2524  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2525  {
2526  if (__builtin_constant_p(_M_data))
2527  {
2528  bool __r = true;
2529  if constexpr (is_floating_point_v<_Tp>)
2530  {
2531  using _Ip = __int_for_sizeof_t<_Tp>;
2532  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2533  __execute_n_times<_Width>(
2534  [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2535  }
2536  else
2537  __execute_n_times<_Width>(
2538  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2539  return __r;
2540  }
2541  return false;
2542  }
2543 
2544  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2545  {
2546  if (__builtin_constant_p(_M_data))
2547  {
2548  bool __r = true;
2549  if constexpr (is_floating_point_v<_Tp>)
2550  {
2551  using _Ip = __int_for_sizeof_t<_Tp>;
2552  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2553  __execute_n_times<_Width>(
2554  [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2555  }
2556  else
2557  __execute_n_times<_Width>(
2558  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2559  return __r;
2560  }
2561  return false;
2562  }
2563  };
2564 
2565 // }}}
2566 
2567 // __vectorized_sizeof {{{
2568 template <typename _Tp>
2569  constexpr size_t
2570  __vectorized_sizeof()
2571  {
2572  if constexpr (!__is_vectorizable_v<_Tp>)
2573  return 0;
2574 
2575  if constexpr (sizeof(_Tp) <= 8)
2576  {
2577  // X86:
2578  if constexpr (__have_avx512bw)
2579  return 64;
2580  if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2581  return 64;
2582  if constexpr (__have_avx2)
2583  return 32;
2584  if constexpr (__have_avx && is_floating_point_v<_Tp>)
2585  return 32;
2586  if constexpr (__have_sse2)
2587  return 16;
2588  if constexpr (__have_sse && is_same_v<_Tp, float>)
2589  return 16;
2590  /* The following is too much trouble because of mixed MMX and x87 code.
2591  * While nothing here explicitly calls MMX instructions of registers,
2592  * they are still emitted but no EMMS cleanup is done.
2593  if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2594  return 8;
2595  */
2596 
2597  // PowerPC:
2598  if constexpr (__have_power8vec
2599  || (__have_power_vmx && (sizeof(_Tp) < 8))
2600  || (__have_power_vsx && is_floating_point_v<_Tp>) )
2601  return 16;
2602 
2603  // ARM:
2604  if constexpr (__have_neon_a64
2605  || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2606  return 16;
2607  if constexpr (__have_neon
2608  && sizeof(_Tp) < 8
2609  // Only allow fp if the user allows non-ICE559 fp (e.g.
2610  // via -ffast-math). ARMv7 NEON fp is not conforming to
2611  // IEC559.
2612  && (__support_neon_float || !is_floating_point_v<_Tp>))
2613  return 16;
2614  }
2615 
2616  return sizeof(_Tp);
2617  }
2618 
2619 // }}}
2620 namespace simd_abi {
2621 // most of simd_abi is defined in simd_detail.h
2622 template <typename _Tp>
2623  inline constexpr int max_fixed_size
2624  = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2625 
2626 // compatible {{{
2627 #if defined __x86_64__ || defined __aarch64__
2628 template <typename _Tp>
2629  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2630 #elif defined __ARM_NEON
2631 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2632 // ABI?)
2633 template <typename _Tp>
2634  using compatible
2635  = conditional_t<(sizeof(_Tp) < 8
2636  && (__support_neon_float || !is_floating_point_v<_Tp>)),
2637  _VecBuiltin<16>, scalar>;
2638 #else
2639 template <typename>
2640  using compatible = scalar;
2641 #endif
2642 
2643 // }}}
2644 // native {{{
2645 template <typename _Tp>
2646  constexpr auto
2647  __determine_native_abi()
2648  {
2649  constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2650  if constexpr (__bytes == sizeof(_Tp))
2651  return static_cast<scalar*>(nullptr);
2652  else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2653  return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2654  else
2655  return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2656  }
2657 
2658 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2659  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2660 
2661 // }}}
2662 // __default_abi {{{
2663 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2664 template <typename _Tp>
2665  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2666 #else
2667 template <typename _Tp>
2668  using __default_abi = compatible<_Tp>;
2669 #endif
2670 
2671 // }}}
2672 } // namespace simd_abi
2673 
2674 // traits {{{1
2675 // is_abi_tag {{{2
2676 template <typename _Tp, typename = void_t<>>
2677  struct is_abi_tag : false_type {};
2678 
2679 template <typename _Tp>
2680  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2681  : public _Tp::_IsValidAbiTag {};
2682 
2683 template <typename _Tp>
2684  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2685 
2686 // is_simd(_mask) {{{2
2687 template <typename _Tp>
2688  struct is_simd : public false_type {};
2689 
2690 template <typename _Tp>
2691  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2692 
2693 template <typename _Tp>
2694  struct is_simd_mask : public false_type {};
2695 
2696 template <typename _Tp>
2697 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2698 
2699 // simd_size {{{2
2700 template <typename _Tp, typename _Abi, typename = void>
2701  struct __simd_size_impl {};
2702 
2703 template <typename _Tp, typename _Abi>
2704  struct __simd_size_impl<
2705  _Tp, _Abi,
2706  enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2707  : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2708 
2709 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2710  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2711 
2712 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2713  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2714 
2715 // simd_abi::deduce {{{2
2716 template <typename _Tp, size_t _Np, typename = void>
2717  struct __deduce_impl;
2718 
2719 namespace simd_abi {
2720 /**
2721  * @tparam _Tp The requested `value_type` for the elements.
2722  * @tparam _Np The requested number of elements.
2723  * @tparam _Abis This parameter is ignored, since this implementation cannot
2724  * make any use of it. Either __a good native ABI is matched and used as `type`
2725  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2726  * the best matching native ABIs.
2727  */
2728 template <typename _Tp, size_t _Np, typename...>
2729  struct deduce : __deduce_impl<_Tp, _Np> {};
2730 
2731 template <typename _Tp, size_t _Np, typename... _Abis>
2732  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2733 } // namespace simd_abi
2734 
2735 // }}}2
2736 // rebind_simd {{{2
2737 template <typename _Tp, typename _V, typename = void>
2738  struct rebind_simd;
2739 
2740 template <typename _Tp, typename _Up, typename _Abi>
2741  struct rebind_simd<
2742  _Tp, simd<_Up, _Abi>,
2743  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2744  {
2745  using type
2746  = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2747  };
2748 
2749 template <typename _Tp, typename _Up, typename _Abi>
2750  struct rebind_simd<
2751  _Tp, simd_mask<_Up, _Abi>,
2752  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2753  {
2754  using type
2755  = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2756  };
2757 
2758 template <typename _Tp, typename _V>
2759  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2760 
2761 // resize_simd {{{2
2762 template <int _Np, typename _V, typename = void>
2763  struct resize_simd;
2764 
2765 template <int _Np, typename _Tp, typename _Abi>
2766  struct resize_simd<_Np, simd<_Tp, _Abi>,
2767  void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2768  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2769 
2770 template <int _Np, typename _Tp, typename _Abi>
2771  struct resize_simd<_Np, simd_mask<_Tp, _Abi>,
2772  void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2773  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2774 
2775 template <int _Np, typename _V>
2776  using resize_simd_t = typename resize_simd<_Np, _V>::type;
2777 
2778 // }}}2
2779 // memory_alignment {{{2
2780 template <typename _Tp, typename _Up = typename _Tp::value_type>
2781  struct memory_alignment
2782  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
2783 
2784 template <typename _Tp, typename _Up = typename _Tp::value_type>
2785  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
2786 
2787 // class template simd [simd] {{{1
2788 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2789  class simd;
2790 
2791 template <typename _Tp, typename _Abi>
2792  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
2793 
2794 template <typename _Tp>
2795  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
2796 
2797 template <typename _Tp, int _Np>
2798  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
2799 
2800 template <typename _Tp, size_t _Np>
2801  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2802 
2803 // class template simd_mask [simd_mask] {{{1
2804 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2805  class simd_mask;
2806 
2807 template <typename _Tp, typename _Abi>
2808  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
2809 
2810 template <typename _Tp>
2811  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
2812 
2813 template <typename _Tp, int _Np>
2814  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
2815 
2816 template <typename _Tp, size_t _Np>
2817  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2818 
2819 // casts [simd.casts] {{{1
2820 // static_simd_cast {{{2
2821 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>,
2822  typename = void>
2823  struct __static_simd_cast_return_type;
2824 
2825 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
2826  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false,
2827  void>
2828  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
2829 
2830 template <typename _Tp, typename _Up, typename _Ap>
2831  struct __static_simd_cast_return_type<
2832  _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
2833  { using type = _Tp; };
2834 
2835 template <typename _Tp, typename _Ap>
2836  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
2837 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2838  enable_if_t<__is_vectorizable_v<_Tp>>
2839 #else
2840  void
2841 #endif
2842  >
2843  { using type = simd<_Tp, _Ap>; };
2844 
2845 template <typename _Tp, typename = void>
2846  struct __safe_make_signed { using type = _Tp;};
2847 
2848 template <typename _Tp>
2849  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
2850  {
2851  // the extra make_unsigned_t is because of PR85951
2852  using type = make_signed_t<make_unsigned_t<_Tp>>;
2853  };
2854 
2855 template <typename _Tp>
2856  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
2857 
2858 template <typename _Tp, typename _Up, typename _Ap>
2859  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
2860 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2861  enable_if_t<__is_vectorizable_v<_Tp>>
2862 #else
2863  void
2864 #endif
2865  >
2866  {
2867  using type = conditional_t<
2868  (is_integral_v<_Up> && is_integral_v<_Tp> &&
2869 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
2870  is_signed_v<_Up> != is_signed_v<_Tp> &&
2871 #endif
2872  is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
2873  simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
2874  };
2875 
2876 template <typename _Tp, typename _Up, typename _Ap,
2877  typename _R
2878  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2879  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
2880  static_simd_cast(const simd<_Up, _Ap>& __x)
2881  {
2882  if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
2883  return __x;
2884  else
2885  {
2886  _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
2887  __c;
2888  return _R(__private_init, __c(__data(__x)));
2889  }
2890  }
2891 
2892 namespace __proposed {
2893 template <typename _Tp, typename _Up, typename _Ap,
2894  typename _R
2895  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2896  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
2897  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
2898  {
2899  using _RM = typename _R::mask_type;
2900  return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
2901  typename _RM::simd_type::value_type>(__x)};
2902  }
2903 } // namespace __proposed
2904 
2905 // simd_cast {{{2
2906 template <typename _Tp, typename _Up, typename _Ap,
2907  typename _To = __value_type_or_identity_t<_Tp>>
2908  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2909  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
2910  -> decltype(static_simd_cast<_Tp>(__x))
2911  { return static_simd_cast<_Tp>(__x); }
2912 
2913 namespace __proposed {
2914 template <typename _Tp, typename _Up, typename _Ap,
2915  typename _To = __value_type_or_identity_t<_Tp>>
2916  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2917  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
2918  -> decltype(static_simd_cast<_Tp>(__x))
2919  { return static_simd_cast<_Tp>(__x); }
2920 } // namespace __proposed
2921 
2922 // }}}2
2923 // resizing_simd_cast {{{
2924 namespace __proposed {
2925 /* Proposed spec:
2926 
2927 template <class T, class U, class Abi>
2928 T resizing_simd_cast(const simd<U, Abi>& x)
2929 
2930 p1 Constraints:
2931  - is_simd_v<T> is true and
2932  - T::value_type is the same type as U
2933 
2934 p2 Returns:
2935  A simd object with the i^th element initialized to x[i] for all i in the
2936  range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2937  than simd_size_v<U, Abi>, the remaining elements are value-initialized.
2938 
2939 template <class T, class U, class Abi>
2940 T resizing_simd_cast(const simd_mask<U, Abi>& x)
2941 
2942 p1 Constraints: is_simd_mask_v<T> is true
2943 
2944 p2 Returns:
2945  A simd_mask object with the i^th element initialized to x[i] for all i in
2946 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2947  than simd_size_v<U, Abi>, the remaining elements are initialized to false.
2948 
2949  */
2950 
2951 template <typename _Tp, typename _Up, typename _Ap>
2952  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
2953  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
2954  resizing_simd_cast(const simd<_Up, _Ap>& __x)
2955  {
2956  if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
2957  return __x;
2958  else if constexpr (simd_size_v<_Up, _Ap> == 1)
2959  {
2960  _Tp __r{};
2961  __r[0] = __x[0];
2962  return __r;
2963  }
2964  else if constexpr (_Tp::size() == 1)
2965  return __x[0];
2966  else if constexpr (sizeof(_Tp) == sizeof(__x)
2967  && !__is_fixed_size_abi_v<_Ap>)
2968  return {__private_init,
2969  __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
2970  _Ap::_S_masked(__data(__x))._M_data)};
2971  else
2972  {
2973  _Tp __r{};
2974  __builtin_memcpy(&__data(__r), &__data(__x),
2975  sizeof(_Up)
2976  * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
2977  return __r;
2978  }
2979  }
2980 
2981 template <typename _Tp, typename _Up, typename _Ap>
2982  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
2983  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
2984  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
2985  {
2986  return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
2987  typename _Tp::simd_type::value_type>(__x)};
2988  }
2989 } // namespace __proposed
2990 
2991 // }}}
2992 // to_fixed_size {{{2
2993 template <typename _Tp, int _Np>
2994  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
2995  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
2996  { return __x; }
2997 
2998 template <typename _Tp, int _Np>
2999  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3000  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3001  { return __x; }
3002 
3003 template <typename _Tp, typename _Ap>
3004  _GLIBCXX_SIMD_INTRINSIC auto
3005  to_fixed_size(const simd<_Tp, _Ap>& __x)
3006  {
3007  return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x](
3008  auto __i) constexpr { return __x[__i]; });
3009  }
3010 
3011 template <typename _Tp, typename _Ap>
3012  _GLIBCXX_SIMD_INTRINSIC auto
3013  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3014  {
3015  constexpr int _Np = simd_mask<_Tp, _Ap>::size();
3016  fixed_size_simd_mask<_Tp, _Np> __r;
3017  __execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; });
3018  return __r;
3019  }
3020 
3021 // to_native {{{2
3022 template <typename _Tp, int _Np>
3023  _GLIBCXX_SIMD_INTRINSIC
3024  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3025  to_native(const fixed_size_simd<_Tp, _Np>& __x)
3026  {
3027  alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3028  __x.copy_to(__mem, vector_aligned);
3029  return {__mem, vector_aligned};
3030  }
3031 
3032 template <typename _Tp, size_t _Np>
3033  _GLIBCXX_SIMD_INTRINSIC
3034  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3035  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3036  {
3037  return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; });
3038  }
3039 
3040 // to_compatible {{{2
3041 template <typename _Tp, size_t _Np>
3042  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3043  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3044  {
3045  alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3046  __x.copy_to(__mem, vector_aligned);
3047  return {__mem, vector_aligned};
3048  }
3049 
3050 template <typename _Tp, size_t _Np>
3051  _GLIBCXX_SIMD_INTRINSIC
3052  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3053  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3054  { return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); }
3055 
3056 // masked assignment [simd_mask.where] {{{1
3057 
3058 // where_expression {{{1
3059 // const_where_expression<M, T> {{{2
3060 template <typename _M, typename _Tp>
3061  class const_where_expression
3062  {
3063  using _V = _Tp;
3064  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3065 
3066  struct _Wrapper { using value_type = _V; };
3067 
3068  protected:
3069  using _Impl = typename _V::_Impl;
3070 
3071  using value_type =
3072  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3073 
3074  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3075  __get_mask(const const_where_expression& __x)
3076  { return __x._M_k; }
3077 
3078  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3079  __get_lvalue(const const_where_expression& __x)
3080  { return __x._M_value; }
3081 
3082  const _M& _M_k;
3083  _Tp& _M_value;
3084 
3085  public:
3086  const_where_expression(const const_where_expression&) = delete;
3087  const_where_expression& operator=(const const_where_expression&) = delete;
3088 
3089  _GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd)
3090  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3091 
3092  _GLIBCXX_SIMD_INTRINSIC _V
3093  operator-() const&&
3094  {
3095  return {__private_init,
3096  _Impl::template _S_masked_unary<negate>(__data(_M_k),
3097  __data(_M_value))};
3098  }
3099 
3100  template <typename _Up, typename _Flags>
3101  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3102  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3103  {
3104  return {__private_init,
3105  _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3106  _Flags::template _S_apply<_V>(__mem))};
3107  }
3108 
3109  template <typename _Up, typename _Flags>
3110  _GLIBCXX_SIMD_INTRINSIC void
3111  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3112  {
3113  _Impl::_S_masked_store(__data(_M_value),
3114  _Flags::template _S_apply<_V>(__mem),
3115  __data(_M_k));
3116  }
3117  };
3118 
3119 // const_where_expression<bool, T> {{{2
3120 template <typename _Tp>
3121  class const_where_expression<bool, _Tp>
3122  {
3123  using _M = bool;
3124  using _V = _Tp;
3125 
3126  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3127 
3128  struct _Wrapper { using value_type = _V; };
3129 
3130  protected:
3131  using value_type =
3132  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3133 
3134  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3135  __get_mask(const const_where_expression& __x)
3136  { return __x._M_k; }
3137 
3138  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3139  __get_lvalue(const const_where_expression& __x)
3140  { return __x._M_value; }
3141 
3142  const bool _M_k;
3143  _Tp& _M_value;
3144 
3145  public:
3146  const_where_expression(const const_where_expression&) = delete;
3147  const_where_expression& operator=(const const_where_expression&) = delete;
3148 
3149  _GLIBCXX_SIMD_INTRINSIC const_where_expression(const bool __kk, const _Tp& dd)
3150  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3151 
3152  _GLIBCXX_SIMD_INTRINSIC _V operator-() const&&
3153  { return _M_k ? -_M_value : _M_value; }
3154 
3155  template <typename _Up, typename _Flags>
3156  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3157  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3158  { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3159 
3160  template <typename _Up, typename _Flags>
3161  _GLIBCXX_SIMD_INTRINSIC void
3162  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3163  {
3164  if (_M_k)
3165  __mem[0] = _M_value;
3166  }
3167  };
3168 
3169 // where_expression<M, T> {{{2
3170 template <typename _M, typename _Tp>
3171  class where_expression : public const_where_expression<_M, _Tp>
3172  {
3173  using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3174 
3175  static_assert(!is_const<_Tp>::value,
3176  "where_expression may only be instantiated with __a non-const "
3177  "_Tp parameter");
3178 
3179  using typename const_where_expression<_M, _Tp>::value_type;
3180  using const_where_expression<_M, _Tp>::_M_k;
3181  using const_where_expression<_M, _Tp>::_M_value;
3182 
3183  static_assert(
3184  is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3185  static_assert(_M::size() == _Tp::size(), "");
3186 
3187  _GLIBCXX_SIMD_INTRINSIC friend _Tp& __get_lvalue(where_expression& __x)
3188  { return __x._M_value; }
3189 
3190  public:
3191  where_expression(const where_expression&) = delete;
3192  where_expression& operator=(const where_expression&) = delete;
3193 
3194  _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3195  : const_where_expression<_M, _Tp>(__kk, dd) {}
3196 
3197  template <typename _Up>
3198  _GLIBCXX_SIMD_INTRINSIC void operator=(_Up&& __x) &&
3199  {
3200  _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3201  __to_value_type_or_member_type<_Tp>(
3202  static_cast<_Up&&>(__x)));
3203  }
3204 
3205 #define _GLIBCXX_SIMD_OP_(__op, __name) \
3206  template <typename _Up> \
3207  _GLIBCXX_SIMD_INTRINSIC void operator __op##=(_Up&& __x)&& \
3208  { \
3209  _Impl::template _S_masked_cassign( \
3210  __data(_M_k), __data(_M_value), \
3211  __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3212  [](auto __impl, auto __lhs, auto __rhs) constexpr { \
3213  return __impl.__name(__lhs, __rhs); \
3214  }); \
3215  } \
3216  static_assert(true)
3217  _GLIBCXX_SIMD_OP_(+, _S_plus);
3218  _GLIBCXX_SIMD_OP_(-, _S_minus);
3219  _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3220  _GLIBCXX_SIMD_OP_(/, _S_divides);
3221  _GLIBCXX_SIMD_OP_(%, _S_modulus);
3222  _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3223  _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3224  _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3225  _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3226  _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3227 #undef _GLIBCXX_SIMD_OP_
3228 
3229  _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3230  {
3231  __data(_M_value)
3232  = _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3233  __data(_M_value));
3234  }
3235 
3236  _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3237  {
3238  __data(_M_value)
3239  = _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3240  __data(_M_value));
3241  }
3242 
3243  _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3244  {
3245  __data(_M_value)
3246  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3247  __data(_M_value));
3248  }
3249 
3250  _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3251  {
3252  __data(_M_value)
3253  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3254  __data(_M_value));
3255  }
3256 
3257  // intentionally hides const_where_expression::copy_from
3258  template <typename _Up, typename _Flags>
3259  _GLIBCXX_SIMD_INTRINSIC void
3260  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3261  {
3262  __data(_M_value)
3263  = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3264  _Flags::template _S_apply<_Tp>(__mem));
3265  }
3266  };
3267 
3268 // where_expression<bool, T> {{{2
3269 template <typename _Tp>
3270  class where_expression<bool, _Tp> : public const_where_expression<bool, _Tp>
3271  {
3272  using _M = bool;
3273  using typename const_where_expression<_M, _Tp>::value_type;
3274  using const_where_expression<_M, _Tp>::_M_k;
3275  using const_where_expression<_M, _Tp>::_M_value;
3276 
3277  public:
3278  where_expression(const where_expression&) = delete;
3279  where_expression& operator=(const where_expression&) = delete;
3280 
3281  _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3282  : const_where_expression<_M, _Tp>(__kk, dd) {}
3283 
3284 #define _GLIBCXX_SIMD_OP_(__op) \
3285  template <typename _Up> \
3286  _GLIBCXX_SIMD_INTRINSIC void operator __op(_Up&& __x)&& \
3287  { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3288 
3289  _GLIBCXX_SIMD_OP_(=)
3290  _GLIBCXX_SIMD_OP_(+=)
3291  _GLIBCXX_SIMD_OP_(-=)
3292  _GLIBCXX_SIMD_OP_(*=)
3293  _GLIBCXX_SIMD_OP_(/=)
3294  _GLIBCXX_SIMD_OP_(%=)
3295  _GLIBCXX_SIMD_OP_(&=)
3296  _GLIBCXX_SIMD_OP_(|=)
3297  _GLIBCXX_SIMD_OP_(^=)
3298  _GLIBCXX_SIMD_OP_(<<=)
3299  _GLIBCXX_SIMD_OP_(>>=)
3300  #undef _GLIBCXX_SIMD_OP_
3301 
3302  _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3303  { if (_M_k) ++_M_value; }
3304 
3305  _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3306  { if (_M_k) ++_M_value; }
3307 
3308  _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3309  { if (_M_k) --_M_value; }
3310 
3311  _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3312  { if (_M_k) --_M_value; }
3313 
3314  // intentionally hides const_where_expression::copy_from
3315  template <typename _Up, typename _Flags>
3316  _GLIBCXX_SIMD_INTRINSIC void
3317  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3318  { if (_M_k) _M_value = __mem[0]; }
3319  };
3320 
3321 // where {{{1
3322 template <typename _Tp, typename _Ap>
3323  _GLIBCXX_SIMD_INTRINSIC where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3324  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3325  { return {__k, __value}; }
3326 
3327 template <typename _Tp, typename _Ap>
3328  _GLIBCXX_SIMD_INTRINSIC
3329  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3330  where(const typename simd<_Tp, _Ap>::mask_type& __k,
3331  const simd<_Tp, _Ap>& __value)
3332  { return {__k, __value}; }
3333 
3334 template <typename _Tp, typename _Ap>
3335  _GLIBCXX_SIMD_INTRINSIC
3336  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3337  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3338  simd_mask<_Tp, _Ap>& __value)
3339  { return {__k, __value}; }
3340 
3341 template <typename _Tp, typename _Ap>
3342  _GLIBCXX_SIMD_INTRINSIC
3343  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3344  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3345  const simd_mask<_Tp, _Ap>& __value)
3346  { return {__k, __value}; }
3347 
3348 template <typename _Tp>
3349  _GLIBCXX_SIMD_INTRINSIC where_expression<bool, _Tp>
3350  where(_ExactBool __k, _Tp& __value)
3351  { return {__k, __value}; }
3352 
3353 template <typename _Tp>
3354  _GLIBCXX_SIMD_INTRINSIC const_where_expression<bool, _Tp>
3355  where(_ExactBool __k, const _Tp& __value)
3356  { return {__k, __value}; }
3357 
3358  template <typename _Tp, typename _Ap>
3359  void where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3360 
3361  template <typename _Tp, typename _Ap>
3362  void where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3363 
3364 // proposed mask iterations {{{1
3365 namespace __proposed {
3366 template <size_t _Np>
3367  class where_range
3368  {
3369  const bitset<_Np> __bits;
3370 
3371  public:
3372  where_range(bitset<_Np> __b) : __bits(__b) {}
3373 
3374  class iterator
3375  {
3376  size_t __mask;
3377  size_t __bit;
3378 
3379  _GLIBCXX_SIMD_INTRINSIC void __next_bit()
3380  { __bit = __builtin_ctzl(__mask); }
3381 
3382  _GLIBCXX_SIMD_INTRINSIC void __reset_lsb()
3383  {
3384  // 01100100 - 1 = 01100011
3385  __mask &= (__mask - 1);
3386  // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3387  }
3388 
3389  public:
3390  iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3391  iterator(const iterator&) = default;
3392  iterator(iterator&&) = default;
3393 
3394  _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator->() const
3395  { return __bit; }
3396 
3397  _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator*() const
3398  { return __bit; }
3399 
3400  _GLIBCXX_SIMD_ALWAYS_INLINE iterator& operator++()
3401  {
3402  __reset_lsb();
3403  __next_bit();
3404  return *this;
3405  }
3406 
3407  _GLIBCXX_SIMD_ALWAYS_INLINE iterator operator++(int)
3408  {
3409  iterator __tmp = *this;
3410  __reset_lsb();
3411  __next_bit();
3412  return __tmp;
3413  }
3414 
3415  _GLIBCXX_SIMD_ALWAYS_INLINE bool operator==(const iterator& __rhs) const
3416  { return __mask == __rhs.__mask; }
3417 
3418  _GLIBCXX_SIMD_ALWAYS_INLINE bool operator!=(const iterator& __rhs) const
3419  { return __mask != __rhs.__mask; }
3420  };
3421 
3422  iterator begin() const
3423  { return __bits.to_ullong(); }
3424 
3425  iterator end() const
3426  { return 0; }
3427  };
3428 
3429 template <typename _Tp, typename _Ap>
3430  where_range<simd_size_v<_Tp, _Ap>>
3431  where(const simd_mask<_Tp, _Ap>& __k)
3432  { return __k.__to_bitset(); }
3433 
3434 } // namespace __proposed
3435 
3436 // }}}1
3437 // reductions [simd.reductions] {{{1
3438 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3439  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3440  reduce(const simd<_Tp, _Abi>& __v,
3441  _BinaryOperation __binary_op = _BinaryOperation())
3442  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3443 
3444 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3445  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3446  reduce(const const_where_expression<_M, _V>& __x,
3447  typename _V::value_type __identity_element,
3448  _BinaryOperation __binary_op)
3449  {
3450  if (__builtin_expect(none_of(__get_mask(__x)), false))
3451  return __identity_element;
3452 
3453  _V __tmp = __identity_element;
3454  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3455  __data(__get_lvalue(__x)));
3456  return reduce(__tmp, __binary_op);
3457  }
3458 
3459 template <typename _M, typename _V>
3460  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3461  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3462  { return reduce(__x, 0, __binary_op); }
3463 
3464 template <typename _M, typename _V>
3465  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3466  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3467  { return reduce(__x, 1, __binary_op); }
3468 
3469 template <typename _M, typename _V>
3470  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3471  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3472  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3473 
3474 template <typename _M, typename _V>
3475  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3476  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3477  { return reduce(__x, 0, __binary_op); }
3478 
3479 template <typename _M, typename _V>
3480  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3481  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3482  { return reduce(__x, 0, __binary_op); }
3483 
3484 template <typename _Tp, typename _Abi>
3485  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3486  hmin(const simd<_Tp, _Abi>& __v) noexcept
3487  {
3488  return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum());
3489  }
3490 
3491 template <typename _Tp, typename _Abi>
3492  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3493  hmax(const simd<_Tp, _Abi>& __v) noexcept
3494  {
3495  return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum());
3496  }
3497 
3498 template <typename _M, typename _V>
3499  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3500  typename _V::value_type
3501  hmin(const const_where_expression<_M, _V>& __x) noexcept
3502  {
3503  using _Tp = typename _V::value_type;
3504  constexpr _Tp __id_elem =
3505 #ifdef __FINITE_MATH_ONLY__
3506  __finite_max_v<_Tp>;
3507 #else
3508  __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3509 #endif
3510  _V __tmp = __id_elem;
3511  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3512  __data(__get_lvalue(__x)));
3513  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3514  }
3515 
3516 template <typename _M, typename _V>
3517  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3518  typename _V::value_type
3519  hmax(const const_where_expression<_M, _V>& __x) noexcept
3520  {
3521  using _Tp = typename _V::value_type;
3522  constexpr _Tp __id_elem =
3523 #ifdef __FINITE_MATH_ONLY__
3524  __finite_min_v<_Tp>;
3525 #else
3526  [] {
3527  if constexpr (__value_exists_v<__infinity, _Tp>)
3528  return -__infinity_v<_Tp>;
3529  else
3530  return __finite_min_v<_Tp>;
3531  }();
3532 #endif
3533  _V __tmp = __id_elem;
3534  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3535  __data(__get_lvalue(__x)));
3536  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3537  }
3538 
3539 // }}}1
3540 // algorithms [simd.alg] {{{
3541 template <typename _Tp, typename _Ap>
3542  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3543  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3544  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3545 
3546 template <typename _Tp, typename _Ap>
3547  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3548  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3549  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3550 
3551 template <typename _Tp, typename _Ap>
3552  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3553  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3554  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3555  {
3556  const auto pair_of_members
3557  = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3558  return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3559  simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3560  }
3561 
3562 template <typename _Tp, typename _Ap>
3563  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3564  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo,
3565  const simd<_Tp, _Ap>& __hi)
3566  {
3567  using _Impl = typename _Ap::_SimdImpl;
3568  return {__private_init,
3569  _Impl::_S_min(__data(__hi),
3570  _Impl::_S_max(__data(__lo), __data(__v)))};
3571  }
3572 
3573 // }}}
3574 
3575 template <size_t... _Sizes, typename _Tp, typename _Ap,
3576  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3577  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3578  split(const simd<_Tp, _Ap>&);
3579 
3580 // __extract_part {{{
3581 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3582  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
3583  _SimdWrapper<_Tp, _Np / _Total * _Combine>
3584  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3585 
3586 template <int Index, int Parts, int _Combine = 1, typename _Tp, typename _A0,
3587  typename... _As>
3588  _GLIBCXX_SIMD_INTRINSIC auto
3589  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3590 
3591 // }}}
3592 // _SizeList {{{
3593 template <size_t _V0, size_t... _Values>
3594  struct _SizeList
3595  {
3596  template <size_t _I>
3597  static constexpr size_t _S_at(_SizeConstant<_I> = {})
3598  {
3599  if constexpr (_I == 0)
3600  return _V0;
3601  else
3602  return _SizeList<_Values...>::template _S_at<_I - 1>();
3603  }
3604 
3605  template <size_t _I>
3606  static constexpr auto _S_before(_SizeConstant<_I> = {})
3607  {
3608  if constexpr (_I == 0)
3609  return _SizeConstant<0>();
3610  else
3611  return _SizeConstant<
3612  _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3613  }
3614 
3615  template <size_t _Np>
3616  static constexpr auto _S_pop_front(_SizeConstant<_Np> = {})
3617  {
3618  if constexpr (_Np == 0)
3619  return _SizeList();
3620  else
3621  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3622  }
3623  };
3624 
3625 // }}}
3626 // __extract_center {{{
3627 template <typename _Tp, size_t _Np>
3628  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3629  __extract_center(_SimdWrapper<_Tp, _Np> __x)
3630  {
3631  static_assert(_Np >= 4);
3632  static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3633 #if _GLIBCXX_SIMD_X86INTRIN // {{{
3634  if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3635  {
3636  const auto __intrin = __to_intrin(__x);
3637  if constexpr (is_integral_v<_Tp>)
3638  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3639  _mm512_shuffle_i32x4(__intrin, __intrin,
3640  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3641  else if constexpr (sizeof(_Tp) == 4)
3642  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3643  _mm512_shuffle_f32x4(__intrin, __intrin,
3644  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3645  else if constexpr (sizeof(_Tp) == 8)
3646  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3647  _mm512_shuffle_f64x2(__intrin, __intrin,
3648  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3649  else
3650  __assert_unreachable<_Tp>();
3651  }
3652  else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3653  return __vector_bitcast<_Tp>(
3654  _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3655  __hi128(__vector_bitcast<double>(__x)), 1));
3656  else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3657  return __vector_bitcast<_Tp>(
3658  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3659  __lo128(__vector_bitcast<_LLong>(__x)),
3660  sizeof(_Tp) * _Np / 4));
3661  else
3662 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
3663  {
3664  __vector_type_t<_Tp, _Np / 2> __r;
3665  __builtin_memcpy(&__r,
3666  reinterpret_cast<const char*>(&__x)
3667  + sizeof(_Tp) * _Np / 4,
3668  sizeof(_Tp) * _Np / 2);
3669  return __r;
3670  }
3671  }
3672 
3673 template <typename _Tp, typename _A0, typename... _As>
3674  _GLIBCXX_SIMD_INTRINSIC
3675  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3676  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3677  {
3678  if constexpr (sizeof...(_As) == 0)
3679  return __extract_center(__x.first);
3680  else
3681  return __extract_part<1, 4, 2>(__x);
3682  }
3683 
3684 // }}}
3685 // __split_wrapper {{{
3686 template <size_t... _Sizes, typename _Tp, typename... _As>
3687  auto
3688  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3689  {
3690  return split<_Sizes...>(
3691  fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
3692  __x));
3693  }
3694 
3695 // }}}
3696 
3697 // split<simd>(simd) {{{
3698 template <typename _V, typename _Ap,
3699  size_t Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
3700  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == Parts * _V::size()
3701  && is_simd_v<_V>, array<_V, Parts>>
3702  split(const simd<typename _V::value_type, _Ap>& __x)
3703  {
3704  using _Tp = typename _V::value_type;
3705  if constexpr (Parts == 1)
3706  {
3707  return {simd_cast<_V>(__x)};
3708  }
3709  else if (__x._M_is_constprop())
3710  {
3711  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3712  auto __i) constexpr {
3713  return _V([&](auto __j) constexpr {
3714  return __x[__i * _V::size() + __j];
3715  });
3716  });
3717  }
3718  else if constexpr (
3719  __is_fixed_size_abi_v<_Ap>
3720  && (is_same_v<typename _V::abi_type, simd_abi::scalar>
3721  || (__is_fixed_size_abi_v<typename _V::abi_type>
3722  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
3723  )))
3724  {
3725  // fixed_size -> fixed_size (w/o padding) or scalar
3726 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3727  const __may_alias<_Tp>* const __element_ptr
3728  = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
3729  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3730  auto __i) constexpr {
3731  return _V(__element_ptr + __i * _V::size(), vector_aligned);
3732  });
3733 #else
3734  const auto& __xx = __data(__x);
3735  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3736  auto __i) constexpr {
3737  [[maybe_unused]] constexpr size_t __offset
3738  = decltype(__i)::value * _V::size();
3739  return _V([&](auto __j) constexpr {
3740  constexpr _SizeConstant<__j + __offset> __k;
3741  return __xx[__k];
3742  });
3743  });
3744 #endif
3745  }
3746  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
3747  {
3748  // normally memcpy should work here as well
3749  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3750  auto __i) constexpr { return __x[__i]; });
3751  }
3752  else
3753  {
3754  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3755  auto __i) constexpr {
3756  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
3757  return _V([&](auto __j) constexpr {
3758  return __x[__i * _V::size() + __j];
3759  });
3760  else
3761  return _V(__private_init,
3762  __extract_part<decltype(__i)::value, Parts>(__data(__x)));
3763  });
3764  }
3765  }
3766 
3767 // }}}
3768 // split<simd_mask>(simd_mask) {{{
3769 template <typename _V, typename _Ap,
3770  size_t _Parts
3771  = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
3772  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
3773  _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
3774  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
3775  {
3776  if constexpr (is_same_v<_Ap, typename _V::abi_type>)
3777  return {__x};
3778  else if constexpr (_Parts == 1)
3779  return {__proposed::static_simd_cast<_V>(__x)};
3780  else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
3781  && __is_avx_abi<_Ap>())
3782  return {_V(__private_init, __lo128(__data(__x))),
3783  _V(__private_init, __hi128(__data(__x)))};
3784  else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
3785  {
3786  const bitset __bits = __x.__to_bitset();
3787  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3788  auto __i) constexpr {
3789  constexpr size_t __offset = __i * _V::size();
3790  return _V(__bitset_init, (__bits >> __offset).to_ullong());
3791  });
3792  }
3793  else
3794  {
3795  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3796  auto __i) constexpr {
3797  constexpr size_t __offset = __i * _V::size();
3798  return _V(
3799  __private_init, [&](auto __j) constexpr {
3800  return __x[__j + __offset];
3801  });
3802  });
3803  }
3804  }
3805 
3806 // }}}
3807 // split<_Sizes...>(simd) {{{
3808 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
3809  _GLIBCXX_SIMD_ALWAYS_INLINE
3810  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3811  split(const simd<_Tp, _Ap>& __x)
3812  {
3813  using _SL = _SizeList<_Sizes...>;
3814  using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
3815  constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
3816  constexpr size_t _N0 = _SL::template _S_at<0>();
3817  using _V = __deduced_simd<_Tp, _N0>;
3818 
3819  if (__x._M_is_constprop())
3820  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3821  auto __i) constexpr {
3822  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3823  constexpr size_t __offset = _SL::_S_before(__i);
3824  return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; });
3825  });
3826  else if constexpr (_Np == _N0)
3827  {
3828  static_assert(sizeof...(_Sizes) == 1);
3829  return {simd_cast<_V>(__x)};
3830  }
3831  else if constexpr // split from fixed_size, such that __x::first.size == _N0
3832  (__is_fixed_size_abi_v<
3833  _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
3834  {
3835  static_assert(
3836  !__is_fixed_size_abi_v<typename _V::abi_type>,
3837  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
3838  "fixed_size_simd "
3839  "when deduced?");
3840  // extract first and recurse (__split_wrapper is needed to deduce a new
3841  // _Sizes pack)
3842  return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
3843  __split_wrapper(_SL::template _S_pop_front<1>(),
3844  __data(__x).second));
3845  }
3846  else if constexpr ((!is_same_v<simd_abi::scalar,
3847  simd_abi::deduce_t<_Tp, _Sizes>> && ...)
3848  && (!__is_fixed_size_abi_v<
3849  simd_abi::deduce_t<_Tp, _Sizes>> && ...))
3850  {
3851  if constexpr (((_Sizes * 2 == _Np) && ...))
3852  return {{__private_init, __extract_part<0, 2>(__data(__x))},
3853  {__private_init, __extract_part<1, 2>(__data(__x))}};
3854  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3855  _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
3856  return {{__private_init, __extract_part<0, 3>(__data(__x))},
3857  {__private_init, __extract_part<1, 3>(__data(__x))},
3858  {__private_init, __extract_part<2, 3>(__data(__x))}};
3859  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3860  _SizeList<2 * _Np / 3, _Np / 3>>)
3861  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
3862  {__private_init, __extract_part<2, 3>(__data(__x))}};
3863  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3864  _SizeList<_Np / 3, 2 * _Np / 3>>)
3865  return {{__private_init, __extract_part<0, 3>(__data(__x))},
3866  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
3867  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3868  _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
3869  return {{__private_init, __extract_part<0, 2>(__data(__x))},
3870  {__private_init, __extract_part<2, 4>(__data(__x))},
3871  {__private_init, __extract_part<3, 4>(__data(__x))}};
3872  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3873  _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
3874  return {{__private_init, __extract_part<0, 4>(__data(__x))},
3875  {__private_init, __extract_part<1, 4>(__data(__x))},
3876  {__private_init, __extract_part<1, 2>(__data(__x))}};
3877  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3878  _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
3879  return {{__private_init, __extract_part<0, 4>(__data(__x))},
3880  {__private_init, __extract_center(__data(__x))},
3881  {__private_init, __extract_part<3, 4>(__data(__x))}};
3882  else if constexpr (((_Sizes * 4 == _Np) && ...))
3883  return {{__private_init, __extract_part<0, 4>(__data(__x))},
3884  {__private_init, __extract_part<1, 4>(__data(__x))},
3885  {__private_init, __extract_part<2, 4>(__data(__x))},
3886  {__private_init, __extract_part<3, 4>(__data(__x))}};
3887  // else fall through
3888  }
3889 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3890  const __may_alias<_Tp>* const __element_ptr
3891  = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
3892  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3893  auto __i) constexpr {
3894  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3895  constexpr size_t __offset = _SL::_S_before(__i);
3896  constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
3897  constexpr size_t __a
3898  = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
3899  constexpr size_t __b = ((__a - 1) & __a) ^ __a;
3900  constexpr size_t __alignment = __b == 0 ? __a : __b;
3901  return _Vi(__element_ptr + __offset, overaligned<__alignment>);
3902  });
3903 #else
3904  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3905  auto __i) constexpr {
3906  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3907  const auto& __xx = __data(__x);
3908  using _Offset = decltype(_SL::_S_before(__i));
3909  return _Vi([&](auto __j) constexpr {
3910  constexpr _SizeConstant<_Offset::value + __j> __k;
3911  return __xx[__k];
3912  });
3913  });
3914 #endif
3915  }
3916 
3917 // }}}
3918 
3919 // __subscript_in_pack {{{
3920 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
3921  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
3922  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
3923  {
3924  if constexpr (_I < simd_size_v<_Tp, _Ap>)
3925  return __x[_I];
3926  else
3927  return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
3928  }
3929 
3930 // }}}
3931 // __store_pack_of_simd {{{
3932 template <typename _Tp, typename _A0, typename... _As>
3933  _GLIBCXX_SIMD_INTRINSIC void
3934  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0,
3935  const simd<_Tp, _As>&... __xs)
3936  {
3937  constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
3938  __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
3939  if constexpr (sizeof...(__xs) > 0)
3940  __store_pack_of_simd(__mem + __n_bytes, __xs...);
3941  }
3942 
3943 // }}}
3944 // concat(simd...) {{{
3945 template <typename _Tp, typename... _As>
3946  inline _GLIBCXX_SIMD_CONSTEXPR
3947  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
3948  concat(const simd<_Tp, _As>&... __xs)
3949  {
3950  using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
3951  if constexpr (sizeof...(__xs) == 1)
3952  return simd_cast<_Rp>(__xs...);
3953  else if ((... && __xs._M_is_constprop()))
3954  return simd<_Tp,
3955  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&](
3956  auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); });
3957  else
3958  {
3959  _Rp __r{};
3960  __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
3961  return __r;
3962  }
3963  }
3964 
3965 // }}}
3966 // concat(array<simd>) {{{
3967 template <typename _Tp, typename _Abi, size_t _Np>
3968  _GLIBCXX_SIMD_ALWAYS_INLINE
3969  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
3970  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
3971  {
3972  return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) {
3973  return concat(__xs...);
3974  });
3975  }
3976 
3977 // }}}
3978 
3979 // _SmartReference {{{
3980 template <typename _Up, typename _Accessor = _Up,
3981  typename _ValueType = typename _Up::value_type>
3982  class _SmartReference
3983  {
3984  friend _Accessor;
3985  int _M_index;
3986  _Up& _M_obj;
3987 
3988  _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType _M_read() const noexcept
3989  {
3990  if constexpr (is_arithmetic_v<_Up>)
3991  return _M_obj;
3992  else
3993  return _M_obj[_M_index];
3994  }
3995 
3996  template <typename _Tp>
3997  _GLIBCXX_SIMD_INTRINSIC constexpr void _M_write(_Tp&& __x) const
3998  { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
3999 
4000  public:
4001  _GLIBCXX_SIMD_INTRINSIC constexpr
4002  _SmartReference(_Up& __o, int __i) noexcept
4003  : _M_index(__i), _M_obj(__o) {}
4004 
4005  using value_type = _ValueType;
4006 
4007  _GLIBCXX_SIMD_INTRINSIC _SmartReference(const _SmartReference&) = delete;
4008 
4009  _GLIBCXX_SIMD_INTRINSIC constexpr operator value_type() const noexcept
4010  { return _M_read(); }
4011 
4012  template <typename _Tp,
4013  typename
4014  = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4015  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator=(_Tp&& __x) &&
4016  {
4017  _M_write(static_cast<_Tp&&>(__x));
4018  return {_M_obj, _M_index};
4019  }
4020 
4021 #define _GLIBCXX_SIMD_OP_(__op) \
4022  template <typename _Tp, \
4023  typename _TT \
4024  = decltype(declval<value_type>() __op declval<_Tp>()), \
4025  typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4026  typename = _ValuePreservingOrInt<_TT, value_type>> \
4027  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4028  operator __op##=(_Tp&& __x) && \
4029  { \
4030  const value_type& __lhs = _M_read(); \
4031  _M_write(__lhs __op __x); \
4032  return {_M_obj, _M_index}; \
4033  }
4034  _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4035  _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4036  _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4037 #undef _GLIBCXX_SIMD_OP_
4038 
4039  template <typename _Tp = void,
4040  typename
4041  = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4042  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator++() &&
4043  {
4044  value_type __x = _M_read();
4045  _M_write(++__x);
4046  return {_M_obj, _M_index};
4047  }
4048 
4049  template <typename _Tp = void,
4050  typename
4051  = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4052  _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator++(int) &&
4053  {
4054  const value_type __r = _M_read();
4055  value_type __x = __r;
4056  _M_write(++__x);
4057  return __r;
4058  }
4059 
4060  template <typename _Tp = void,
4061  typename
4062  = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4063  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator--() &&
4064  {
4065  value_type __x = _M_read();
4066  _M_write(--__x);
4067  return {_M_obj, _M_index};
4068  }
4069 
4070  template <typename _Tp = void,
4071  typename
4072  = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4073  _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator--(int) &&
4074  {
4075  const value_type __r = _M_read();
4076  value_type __x = __r;
4077  _M_write(--__x);
4078  return __r;
4079  }
4080 
4081  _GLIBCXX_SIMD_INTRINSIC friend void
4082  swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4083  conjunction<
4084  is_nothrow_constructible<value_type, _SmartReference&&>,
4085  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4086  {
4087  value_type __tmp = static_cast<_SmartReference&&>(__a);
4088  static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4089  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4090  }
4091 
4092  _GLIBCXX_SIMD_INTRINSIC friend void
4093  swap(value_type& __a, _SmartReference&& __b) noexcept(
4094  conjunction<
4095  is_nothrow_constructible<value_type, value_type&&>,
4096  is_nothrow_assignable<value_type&, value_type&&>,
4097  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4098  {
4099  value_type __tmp(std::move(__a));
4100  __a = static_cast<value_type>(__b);
4101  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4102  }
4103 
4104  _GLIBCXX_SIMD_INTRINSIC friend void
4105  swap(_SmartReference&& __a, value_type& __b) noexcept(
4106  conjunction<
4107  is_nothrow_constructible<value_type, _SmartReference&&>,
4108  is_nothrow_assignable<value_type&, value_type&&>,
4109  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4110  {
4111  value_type __tmp(__a);
4112  static_cast<_SmartReference&&>(__a) = std::move(__b);
4113  __b = std::move(__tmp);
4114  }
4115  };
4116 
4117 // }}}
4118 // __scalar_abi_wrapper {{{
4119 template <int _Bytes>
4120  struct __scalar_abi_wrapper
4121  {
4122  template <typename _Tp> static constexpr size_t _S_full_size = 1;
4123  template <typename _Tp> static constexpr size_t _S_size = 1;
4124  template <typename _Tp> static constexpr size_t _S_is_partial = false;
4125 
4126  template <typename _Tp, typename _Abi = simd_abi::scalar>
4127  static constexpr bool _S_is_valid_v
4128  = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4129  };
4130 
4131 // }}}
4132 // __decay_abi metafunction {{{
4133 template <typename _Tp>
4134  struct __decay_abi { using type = _Tp; };
4135 
4136 template <int _Bytes>
4137  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4138  { using type = simd_abi::scalar; };
4139 
4140 // }}}
4141 // __find_next_valid_abi metafunction {{{1
4142 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4143 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4144 // recursion at 2 elements in the resulting ABI tag. In this case
4145 // type::_S_is_valid_v<_Tp> may be false.
4146 template <template <int> class _Abi, int _Bytes, typename _Tp>
4147  struct __find_next_valid_abi
4148  {
4149  static constexpr auto _S_choose()
4150  {
4151  constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4152  using _NextAbi = _Abi<_NextBytes>;
4153  if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4154  return _Abi<_Bytes>();
4155  else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4156  && _NextAbi::template _S_is_valid_v<_Tp>)
4157  return _NextAbi();
4158  else
4159  return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4160  }
4161 
4162  using type = decltype(_S_choose());
4163  };
4164 
4165 template <int _Bytes, typename _Tp>
4166  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4167  { using type = simd_abi::scalar; };
4168 
4169 // _AbiList {{{1
4170 template <template <int> class...>
4171  struct _AbiList
4172  {
4173  template <typename, int> static constexpr bool _S_has_valid_abi = false;
4174  template <typename, int> using _FirstValidAbi = void;
4175  template <typename, int> using _BestAbi = void;
4176  };
4177 
4178 template <template <int> class _A0, template <int> class... _Rest>
4179  struct _AbiList<_A0, _Rest...>
4180  {
4181  template <typename _Tp, int _Np>
4182  static constexpr bool _S_has_valid_abi
4183  = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4184  _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4185 
4186  template <typename _Tp, int _Np>
4187  using _FirstValidAbi = conditional_t<
4188  _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4189  typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4190  typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4191 
4192  template <typename _Tp, int _Np>
4193  static constexpr auto _S_determine_best_abi()
4194  {
4195  static_assert(_Np >= 1);
4196  constexpr int _Bytes = sizeof(_Tp) * _Np;
4197  if constexpr (_Np == 1)
4198  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4199  else
4200  {
4201  constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4202  // _A0<_Bytes> is good if:
4203  // 1. The ABI tag is valid for _Tp
4204  // 2. The storage overhead is no more than padding to fill the next
4205  // power-of-2 number of bytes
4206  if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4207  _Tp> && __fullsize / 2 < _Np)
4208  return typename __decay_abi<_A0<_Bytes>>::type{};
4209  else
4210  {
4211  using _Bp =
4212  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4213  if constexpr (_Bp::template _S_is_valid_v<
4214  _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4215  return _Bp{};
4216  else
4217  return
4218  typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4219  }
4220  }
4221  }
4222 
4223  template <typename _Tp, int _Np>
4224  using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4225  };
4226 
4227 // }}}1
4228 
4229 // the following lists all native ABIs, which makes them accessible to
4230 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4231 // matters: Whatever comes first has higher priority.
4232 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4233  __scalar_abi_wrapper>;
4234 
4235 // valid _SimdTraits specialization {{{1
4236 template <typename _Tp, typename _Abi>
4237  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4238  : _Abi::template __traits<_Tp> {};
4239 
4240 // __deduce_impl specializations {{{1
4241 // try all native ABIs (including scalar) first
4242 template <typename _Tp, size_t _Np>
4243  struct __deduce_impl<
4244  _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4245  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4246 
4247 // fall back to fixed_size only if scalar and native ABIs don't match
4248 template <typename _Tp, size_t _Np, typename = void>
4249  struct __deduce_fixed_size_fallback {};
4250 
4251 template <typename _Tp, size_t _Np>
4252  struct __deduce_fixed_size_fallback<_Tp, _Np,
4253  enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4254  { using type = simd_abi::fixed_size<_Np>; };
4255 
4256 template <typename _Tp, size_t _Np, typename>
4257  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4258 
4259 //}}}1
4260 
4261 // simd_mask {{{
4262 template <typename _Tp, typename _Abi>
4263  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4264  {
4265  // types, tags, and friends {{{
4266  using _Traits = _SimdTraits<_Tp, _Abi>;
4267  using _MemberType = typename _Traits::_MaskMember;
4268 
4269  // We map all masks with equal element sizeof to a single integer type, the
4270  // one given by __int_for_sizeof_t<_Tp>. This is the approach
4271  // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4272  // template specializations in the implementation classes.
4273  using _Ip = __int_for_sizeof_t<_Tp>;
4274  static constexpr _Ip* _S_type_tag = nullptr;
4275 
4276  friend typename _Traits::_MaskBase;
4277  friend class simd<_Tp, _Abi>; // to construct masks on return
4278  friend typename _Traits::_SimdImpl; // to construct masks on return and
4279  // inspect data on masked operations
4280  public:
4281  using _Impl = typename _Traits::_MaskImpl;
4282  friend _Impl;
4283 
4284  // }}}
4285  // member types {{{
4286  using value_type = bool;
4287  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4288  using simd_type = simd<_Tp, _Abi>;
4289  using abi_type = _Abi;
4290 
4291  // }}}
4292  static constexpr size_t size() // {{{
4293  { return __size_or_zero_v<_Tp, _Abi>; }
4294 
4295  // }}}
4296  // constructors & assignment {{{
4297  simd_mask() = default;
4298  simd_mask(const simd_mask&) = default;
4299  simd_mask(simd_mask&&) = default;
4300  simd_mask& operator=(const simd_mask&) = default;
4301  simd_mask& operator=(simd_mask&&) = default;
4302 
4303  // }}}
4304  // access to internal representation (optional feature) {{{
4305  _GLIBCXX_SIMD_ALWAYS_INLINE explicit
4306  simd_mask(typename _Traits::_MaskCastType __init)
4307  : _M_data{__init} {}
4308  // conversions to internal type is done in _MaskBase
4309 
4310  // }}}
4311  // bitset interface (extension to be proposed) {{{
4312  // TS_FEEDBACK:
4313  // Conversion of simd_mask to and from bitset makes it much easier to
4314  // interface with other facilities. I suggest adding `static
4315  // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4316  _GLIBCXX_SIMD_ALWAYS_INLINE static simd_mask
4317  __from_bitset(bitset<size()> bs)
4318  { return {__bitset_init, bs}; }
4319 
4320  _GLIBCXX_SIMD_ALWAYS_INLINE bitset<size()>
4321  __to_bitset() const
4322  { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4323 
4324  // }}}
4325  // explicit broadcast constructor {{{
4326  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4327  simd_mask(value_type __x)
4328  : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4329 
4330  // }}}
4331  // implicit type conversion constructor {{{
4332  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4333  // proposed improvement
4334  template <typename _Up, typename _A2,
4335  typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4336  _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4337  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4338  simd_mask(const simd_mask<_Up, _A2>& __x)
4339  : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4340  #else
4341  // conforming to ISO/IEC 19570:2018
4342  template <typename _Up, typename = enable_if_t<conjunction<
4343  is_same<abi_type, simd_abi::fixed_size<size()>>,
4344  is_same<_Up, _Up>>::value>>
4345  _GLIBCXX_SIMD_ALWAYS_INLINE
4346  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4347  : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4348  #endif
4349 
4350  // }}}
4351  // load constructor {{{
4352  template <typename _Flags>
4353  _GLIBCXX_SIMD_ALWAYS_INLINE
4354  simd_mask(const value_type* __mem, _Flags)
4355  : _M_data(_Impl::template _S_load<_Ip>(
4356  _Flags::template _S_apply<simd_mask>(__mem))) {}
4357 
4358  template <typename _Flags>
4359  _GLIBCXX_SIMD_ALWAYS_INLINE
4360  simd_mask(const value_type* __mem, simd_mask __k, _Flags)
4361  : _M_data{}
4362  {
4363  _M_data
4364  = _Impl::_S_masked_load(_M_data, __k._M_data,
4365  _Flags::template _S_apply<simd_mask>(__mem));
4366  }
4367 
4368  // }}}
4369  // loads [simd_mask.load] {{{
4370  template <typename _Flags>
4371  _GLIBCXX_SIMD_ALWAYS_INLINE void
4372  copy_from(const value_type* __mem, _Flags)
4373  {
4374  _M_data = _Impl::template _S_load<_Ip>(
4375  _Flags::template _S_apply<simd_mask>(__mem));
4376  }
4377 
4378  // }}}
4379  // stores [simd_mask.store] {{{
4380  template <typename _Flags>
4381  _GLIBCXX_SIMD_ALWAYS_INLINE void
4382  copy_to(value_type* __mem, _Flags) const
4383  { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4384 
4385  // }}}
4386  // scalar access {{{
4387  _GLIBCXX_SIMD_ALWAYS_INLINE reference
4388  operator[](size_t __i)
4389  {
4390  if (__i >= size())
4391  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4392  return {_M_data, int(__i)};
4393  }
4394 
4395  _GLIBCXX_SIMD_ALWAYS_INLINE value_type
4396  operator[](size_t __i) const
4397  {
4398  if (__i >= size())
4399  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4400  if constexpr (__is_scalar_abi<_Abi>())
4401  return _M_data;
4402  else
4403  return static_cast<bool>(_M_data[__i]);
4404  }
4405 
4406  // }}}
4407  // negation {{{
4408  _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask
4409  operator!() const
4410  { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4411 
4412  // }}}
4413  // simd_mask binary operators [simd_mask.binary] {{{
4414  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4415  // simd_mask<int> && simd_mask<uint> needs disambiguation
4416  template <typename _Up, typename _A2,
4417  typename
4418  = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4419  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4420  operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4421  {
4422  return {__private_init,
4423  _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4424  }
4425 
4426  template <typename _Up, typename _A2,
4427  typename
4428  = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4429  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4430  operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4431  {
4432  return {__private_init,
4433  _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4434  }
4435  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4436 
4437  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4438  operator&&(const simd_mask& __x, const simd_mask& __y)
4439  {
4440  return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)};
4441  }
4442 
4443  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4444  operator||(const simd_mask& __x, const simd_mask& __y)
4445  {
4446  return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)};
4447  }
4448 
4449  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4450  operator&(const simd_mask& __x, const simd_mask& __y)
4451  { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4452 
4453  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4454  operator|(const simd_mask& __x, const simd_mask& __y)
4455  { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4456 
4457  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4458  operator^(const simd_mask& __x, const simd_mask& __y)
4459  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4460 
4461  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4462  operator&=(simd_mask& __x, const simd_mask& __y)
4463  {
4464  __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4465  return __x;
4466  }
4467 
4468  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4469  operator|=(simd_mask& __x, const simd_mask& __y)
4470  {
4471  __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4472  return __x;
4473  }
4474 
4475  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4476  operator^=(simd_mask& __x, const simd_mask& __y)
4477  {
4478  __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4479  return __x;
4480  }
4481 
4482  // }}}
4483  // simd_mask compares [simd_mask.comparison] {{{
4484  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4485  operator==(const simd_mask& __x, const simd_mask& __y)
4486  { return !operator!=(__x, __y); }
4487 
4488  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4489  operator!=(const simd_mask& __x, const simd_mask& __y)
4490  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4491 
4492  // }}}
4493  // private_init ctor {{{
4494  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4495  simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4496  : _M_data(__init) {}
4497 
4498  // }}}
4499  // private_init generator ctor {{{
4500  template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4501  _GLIBCXX_SIMD_INTRINSIC constexpr
4502  simd_mask(_PrivateInit, _Fp&& __gen)
4503  : _M_data()
4504  {
4505  __execute_n_times<size()>([&](auto __i) constexpr {
4506  _Impl::_S_set(_M_data, __i, __gen(__i));
4507  });
4508  }
4509 
4510  // }}}
4511  // bitset_init ctor {{{
4512  _GLIBCXX_SIMD_INTRINSIC simd_mask(_BitsetInit, bitset<size()> __init)
4513  : _M_data(
4514  _Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4515  {}
4516 
4517  // }}}
4518  // __cvt {{{
4519  // TS_FEEDBACK:
4520  // The conversion operator this implements should be a ctor on simd_mask.
4521  // Once you call .__cvt() on a simd_mask it converts conveniently.
4522  // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4523  struct _CvtProxy
4524  {
4525  template <typename _Up, typename _A2,
4526  typename
4527  = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4528  operator simd_mask<_Up, _A2>() &&
4529  {
4530  using namespace std::experimental::__proposed;
4531  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4532  }
4533 
4534  const simd_mask<_Tp, _Abi>& _M_data;
4535  };
4536 
4537  _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4538  __cvt() const
4539  { return {*this}; }
4540 
4541  // }}}
4542  // operator?: overloads (suggested extension) {{{
4543  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4544  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4545  operator?:(const simd_mask& __k, const simd_mask& __where_true,
4546  const simd_mask& __where_false)
4547  {
4548  auto __ret = __where_false;
4549  _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4550  return __ret;
4551  }
4552 
4553  template <typename _U1, typename _U2,
4554  typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4555  typename = enable_if_t<conjunction_v<
4556  is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4557  is_convertible<simd_mask, typename _Rp::mask_type>>>>
4558  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4559  operator?:(const simd_mask& __k, const _U1& __where_true,
4560  const _U2& __where_false)
4561  {
4562  _Rp __ret = __where_false;
4563  _Rp::_Impl::_S_masked_assign(
4564  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4565  __data(static_cast<_Rp>(__where_true)));
4566  return __ret;
4567  }
4568 
4569  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4570  template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4571  typename = enable_if_t<
4572  conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4573  is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4574  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4575  operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4576  const simd_mask<_Up, _Au>& __where_false)
4577  {
4578  simd_mask __ret = __where_false;
4579  _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4580  __where_true._M_data);
4581  return __ret;
4582  }
4583  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4584  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4585 
4586  // }}}
4587  // _M_is_constprop {{{
4588  _GLIBCXX_SIMD_INTRINSIC constexpr bool
4589  _M_is_constprop() const
4590  {
4591  if constexpr (__is_scalar_abi<_Abi>())
4592  return __builtin_constant_p(_M_data);
4593  else
4594  return _M_data._M_is_constprop();
4595  }
4596 
4597  // }}}
4598 
4599  private:
4600  friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4601  friend auto& __data<_Tp, abi_type>(simd_mask&);
4602  alignas(_Traits::_S_mask_align) _MemberType _M_data;
4603  };
4604 
4605 // }}}
4606 
4607 // __data(simd_mask) {{{
4608 template <typename _Tp, typename _Ap>
4609  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4610  __data(const simd_mask<_Tp, _Ap>& __x)
4611  { return __x._M_data; }
4612 
4613 template <typename _Tp, typename _Ap>
4614  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4615  __data(simd_mask<_Tp, _Ap>& __x)
4616  { return __x._M_data; }
4617 
4618 // }}}
4619 
4620 // simd_mask reductions [simd_mask.reductions] {{{
4621 template <typename _Tp, typename _Abi>
4622  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4623  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4624  {
4625  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4626  {
4627  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4628  if (!__k[__i])
4629  return false;
4630  return true;
4631  }
4632  else
4633  return _Abi::_MaskImpl::_S_all_of(__k);
4634  }
4635 
4636 template <typename _Tp, typename _Abi>
4637  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4638  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4639  {
4640  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4641  {
4642  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4643  if (__k[__i])
4644  return true;
4645  return false;
4646  }
4647  else
4648  return _Abi::_MaskImpl::_S_any_of(__k);
4649  }
4650 
4651 template <typename _Tp, typename _Abi>
4652  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4653  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4654  {
4655  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4656  {
4657  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4658  if (__k[__i])
4659  return false;
4660  return true;
4661  }
4662  else
4663  return _Abi::_MaskImpl::_S_none_of(__k);
4664  }
4665 
4666 template <typename _Tp, typename _Abi>
4667  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4668  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4669  {
4670  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4671  {
4672  for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4673  if (__k[__i] != __k[__i - 1])
4674  return true;
4675  return false;
4676  }
4677  else
4678  return _Abi::_MaskImpl::_S_some_of(__k);
4679  }
4680 
4681 template <typename _Tp, typename _Abi>
4682  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4683  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4684  {
4685  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4686  {
4687  const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4688  __k, [](auto... __elements) { return ((__elements != 0) + ...); });
4689  if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4690  return __r;
4691  }
4692  return _Abi::_MaskImpl::_S_popcount(__k);
4693  }
4694 
4695 template <typename _Tp, typename _Abi>
4696  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4697  find_first_set(const simd_mask<_Tp, _Abi>& __k)
4698  {
4699  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4700  {
4701  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4702  const size_t _Idx = __call_with_n_evaluations<_Np>(
4703  [](auto... __indexes) { return std::min({__indexes...}); },
4704  [&](auto __i) { return __k[__i] ? +__i : _Np; });
4705  if (_Idx >= _Np)
4706  __invoke_ub("find_first_set(empty mask) is UB");
4707  if (__builtin_constant_p(_Idx))
4708  return _Idx;
4709  }
4710  return _Abi::_MaskImpl::_S_find_first_set(__k);
4711  }
4712 
4713 template <typename _Tp, typename _Abi>
4714  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4715  find_last_set(const simd_mask<_Tp, _Abi>& __k)
4716  {
4717  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4718  {
4719  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4720  const int _Idx = __call_with_n_evaluations<_Np>(
4721  [](auto... __indexes) { return std::max({__indexes...}); },
4722  [&](auto __i) { return __k[__i] ? int(__i) : -1; });
4723  if (_Idx < 0)
4724  __invoke_ub("find_first_set(empty mask) is UB");
4725  if (__builtin_constant_p(_Idx))
4726  return _Idx;
4727  }
4728  return _Abi::_MaskImpl::_S_find_last_set(__k);
4729  }
4730 
4731 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4732 all_of(_ExactBool __x) noexcept
4733 { return __x; }
4734 
4735 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4736 any_of(_ExactBool __x) noexcept
4737 { return __x; }
4738 
4739 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4740 none_of(_ExactBool __x) noexcept
4741 { return !__x; }
4742 
4743 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4744 some_of(_ExactBool) noexcept
4745 { return false; }
4746 
4747 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4748 popcount(_ExactBool __x) noexcept
4749 { return __x; }
4750 
4751 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4752 find_first_set(_ExactBool)
4753 { return 0; }
4754 
4755 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4756 find_last_set(_ExactBool)
4757 { return 0; }
4758 
4759 // }}}
4760 
4761 // _SimdIntOperators{{{1
4762 template <typename _V, typename _Impl, bool>
4763  class _SimdIntOperators {};
4764 
4765 template <typename _V, typename _Impl>
4766  class _SimdIntOperators<_V, _Impl, true>
4767  {
4768  _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
4769  { return *static_cast<const _V*>(this); }
4770 
4771  template <typename _Tp>
4772  _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
4773  _S_make_derived(_Tp&& __d)
4774  { return {__private_init, static_cast<_Tp&&>(__d)}; }
4775 
4776  public:
4777  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x)
4778  { return __lhs = __lhs % __x; }
4779 
4780  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x)
4781  { return __lhs = __lhs & __x; }
4782 
4783  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x)
4784  { return __lhs = __lhs | __x; }
4785 
4786  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x)
4787  { return __lhs = __lhs ^ __x; }
4788 
4789  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x)
4790  { return __lhs = __lhs << __x; }
4791 
4792  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x)
4793  { return __lhs = __lhs >> __x; }
4794 
4795  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x)
4796  { return __lhs = __lhs << __x; }
4797 
4798  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x)
4799  { return __lhs = __lhs >> __x; }
4800 
4801  _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y)
4802  {
4803  return _SimdIntOperators::_S_make_derived(
4804  _Impl::_S_modulus(__data(__x), __data(__y)));
4805  }
4806 
4807  _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y)
4808  {
4809  return _SimdIntOperators::_S_make_derived(
4810  _Impl::_S_bit_and(__data(__x), __data(__y)));
4811  }
4812 
4813  _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y)
4814  {
4815  return _SimdIntOperators::_S_make_derived(
4816  _Impl::_S_bit_or(__data(__x), __data(__y)));
4817  }
4818 
4819  _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y)
4820  {
4821  return _SimdIntOperators::_S_make_derived(
4822  _Impl::_S_bit_xor(__data(__x), __data(__y)));
4823  }
4824 
4825  _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y)
4826  {
4827  return _SimdIntOperators::_S_make_derived(
4828  _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
4829  }
4830 
4831  _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y)
4832  {
4833  return _SimdIntOperators::_S_make_derived(
4834  _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
4835  }
4836 
4837  template <typename _VV = _V>
4838  _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y)
4839  {
4840  using _Tp = typename _VV::value_type;
4841  if (__y < 0)
4842  __invoke_ub("The behavior is undefined if the right operand of a "
4843  "shift operation is negative. [expr.shift]\nA shift by "
4844  "%d was requested",
4845  __y);
4846  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4847  __invoke_ub(
4848  "The behavior is undefined if the right operand of a "
4849  "shift operation is greater than or equal to the width of the "
4850  "promoted left operand. [expr.shift]\nA shift by %d was requested",
4851  __y);
4852  return _SimdIntOperators::_S_make_derived(
4853  _Impl::_S_bit_shift_left(__data(__x), __y));
4854  }
4855 
4856  template <typename _VV = _V>
4857  _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y)
4858  {
4859  using _Tp = typename _VV::value_type;
4860  if (__y < 0)
4861  __invoke_ub(
4862  "The behavior is undefined if the right operand of a shift "
4863  "operation is negative. [expr.shift]\nA shift by %d was requested",
4864  __y);
4865  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4866  __invoke_ub(
4867  "The behavior is undefined if the right operand of a shift "
4868  "operation is greater than or equal to the width of the promoted "
4869  "left operand. [expr.shift]\nA shift by %d was requested",
4870  __y);
4871  return _SimdIntOperators::_S_make_derived(
4872  _Impl::_S_bit_shift_right(__data(__x), __y));
4873  }
4874 
4875  // unary operators (for integral _Tp)
4876  _GLIBCXX_SIMD_CONSTEXPR _V operator~() const
4877  { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
4878  };
4879 
4880 //}}}1
4881 
4882 // simd {{{
4883 template <typename _Tp, typename _Abi>
4884  class simd : public _SimdIntOperators<
4885  simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl,
4886  conjunction<is_integral<_Tp>,
4887  typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
4888  public _SimdTraits<_Tp, _Abi>::_SimdBase
4889  {
4890  using _Traits = _SimdTraits<_Tp, _Abi>;
4891  using _MemberType = typename _Traits::_SimdMember;
4892  using _CastType = typename _Traits::_SimdCastType;
4893  static constexpr _Tp* _S_type_tag = nullptr;
4894  friend typename _Traits::_SimdBase;
4895 
4896  public:
4897  using _Impl = typename _Traits::_SimdImpl;
4898  friend _Impl;
4899  friend _SimdIntOperators<simd, _Impl, true>;
4900 
4901  using value_type = _Tp;
4902  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4903  using mask_type = simd_mask<_Tp, _Abi>;
4904  using abi_type = _Abi;
4905 
4906  static constexpr size_t size()
4907  { return __size_or_zero_v<_Tp, _Abi>; }
4908 
4909  _GLIBCXX_SIMD_CONSTEXPR simd() = default;
4910  _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
4911  _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
4912  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
4913  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
4914 
4915  // implicit broadcast constructor
4916  template <typename _Up,
4917  typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
4918  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4919  simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
4920  : _M_data(
4921  _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
4922  {}
4923 
4924  // implicit type conversion constructor (convert from fixed_size to
4925  // fixed_size)
4926  template <typename _Up>
4927  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4928  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
4929  enable_if_t<
4930  conjunction<
4931  is_same<simd_abi::fixed_size<size()>, abi_type>,
4932  negation<__is_narrowing_conversion<_Up, value_type>>,
4933  __converts_to_higher_integer_rank<_Up, value_type>>::value,
4934  void*> = nullptr)
4935  : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
4936 
4937  // explicit type conversion constructor
4938 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4939  template <typename _Up, typename _A2,
4940  typename = decltype(static_simd_cast<simd>(
4941  declval<const simd<_Up, _A2>&>()))>
4942  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4943  simd(const simd<_Up, _A2>& __x)
4944  : simd(static_simd_cast<simd>(__x)) {}
4945 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4946 
4947  // generator constructor
4948  template <typename _Fp>
4949  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4950  simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
4951  declval<_SizeConstant<0>&>())),
4952  value_type>* = nullptr)
4953  : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
4954 
4955  // load constructor
4956  template <typename _Up, typename _Flags>
4957  _GLIBCXX_SIMD_ALWAYS_INLINE
4958  simd(const _Up* __mem, _Flags)
4959  : _M_data(
4960  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
4961  {}
4962 
4963  // loads [simd.load]
4964  template <typename _Up, typename _Flags>
4965  _GLIBCXX_SIMD_ALWAYS_INLINE void
4966  copy_from(const _Vectorizable<_Up>* __mem, _Flags)
4967  {
4968  _M_data = static_cast<decltype(_M_data)>(
4969  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
4970  }
4971 
4972  // stores [simd.store]
4973  template <typename _Up, typename _Flags>
4974  _GLIBCXX_SIMD_ALWAYS_INLINE void
4975  copy_to(_Vectorizable<_Up>* __mem, _Flags) const
4976  {
4977  _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
4978  _S_type_tag);
4979  }
4980 
4981  // scalar access
4982  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4983  operator[](size_t __i)
4984  { return {_M_data, int(__i)}; }
4985 
4986  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4987  operator[]([[maybe_unused]] size_t __i) const
4988  {
4989  if constexpr (__is_scalar_abi<_Abi>())
4990  {
4991  _GLIBCXX_DEBUG_ASSERT(__i == 0);
4992  return _M_data;
4993  }
4994  else
4995  return _M_data[__i];
4996  }
4997 
4998  // increment and decrement:
4999  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5000  operator++()
5001  {
5002  _Impl::_S_increment(_M_data);
5003  return *this;
5004  }
5005 
5006  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5007  operator++(int)
5008  {
5009  simd __r = *this;
5010  _Impl::_S_increment(_M_data);
5011  return __r;
5012  }
5013 
5014  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5015  operator--()
5016  {
5017  _Impl::_S_decrement(_M_data);
5018  return *this;
5019  }
5020 
5021  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5022  operator--(int)
5023  {
5024  simd __r = *this;
5025  _Impl::_S_decrement(_M_data);
5026  return __r;
5027  }
5028 
5029  // unary operators (for any _Tp)
5030  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5031  operator!() const
5032  { return {__private_init, _Impl::_S_negate(_M_data)}; }
5033 
5034  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5035  operator+() const
5036  { return *this; }
5037 
5038  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5039  operator-() const
5040  { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5041 
5042  // access to internal representation (suggested extension)
5043  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5044  simd(_CastType __init) : _M_data(__init) {}
5045 
5046  // compound assignment [simd.cassign]
5047  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5048  operator+=(simd& __lhs, const simd& __x)
5049  { return __lhs = __lhs + __x; }
5050 
5051  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5052  operator-=(simd& __lhs, const simd& __x)
5053  { return __lhs = __lhs - __x; }
5054 
5055  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5056  operator*=(simd& __lhs, const simd& __x)
5057  { return __lhs = __lhs * __x; }
5058 
5059  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5060  operator/=(simd& __lhs, const simd& __x)
5061  { return __lhs = __lhs / __x; }
5062 
5063  // binary operators [simd.binary]
5064  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5065  operator+(const simd& __x, const simd& __y)
5066  { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5067 
5068  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5069  operator-(const simd& __x, const simd& __y)
5070  { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5071 
5072  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5073  operator*(const simd& __x, const simd& __y)
5074  { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5075 
5076  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5077  operator/(const simd& __x, const simd& __y)
5078  { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5079 
5080  // compares [simd.comparison]
5081  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5082  operator==(const simd& __x, const simd& __y)
5083  { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5084 
5085  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5086  operator!=(const simd& __x, const simd& __y)
5087  {
5088  return simd::_S_make_mask(
5089  _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5090  }
5091 
5092  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5093  operator<(const simd& __x, const simd& __y)
5094  { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5095 
5096  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5097  operator<=(const simd& __x, const simd& __y)
5098  {
5099  return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5100  }
5101 
5102  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5103  operator>(const simd& __x, const simd& __y)
5104  { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5105 
5106  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5107  operator>=(const simd& __x, const simd& __y)
5108  {
5109  return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5110  }
5111 
5112  // operator?: overloads (suggested extension) {{{
5113 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5114  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5115  operator?:(const mask_type& __k, const simd& __where_true,
5116  const simd& __where_false)
5117  {
5118  auto __ret = __where_false;
5119  _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5120  return __ret;
5121  }
5122 
5123 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5124  // }}}
5125 
5126  // "private" because of the first arguments's namespace
5127  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5128  simd(_PrivateInit, const _MemberType& __init)
5129  : _M_data(__init) {}
5130 
5131  // "private" because of the first arguments's namespace
5132  _GLIBCXX_SIMD_INTRINSIC
5133  simd(_BitsetInit, bitset<size()> __init) : _M_data()
5134  { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5135 
5136  _GLIBCXX_SIMD_INTRINSIC constexpr bool
5137  _M_is_constprop() const
5138  {
5139  if constexpr (__is_scalar_abi<_Abi>())
5140  return __builtin_constant_p(_M_data);
5141  else
5142  return _M_data._M_is_constprop();
5143  }
5144 
5145  private:
5146  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR static mask_type
5147  _S_make_mask(typename mask_type::_MemberType __k)
5148  { return {__private_init, __k}; }
5149 
5150  friend const auto& __data<value_type, abi_type>(const simd&);
5151  friend auto& __data<value_type, abi_type>(simd&);
5152  alignas(_Traits::_S_simd_align) _MemberType _M_data;
5153  };
5154 
5155 // }}}
5156 // __data {{{
5157 template <typename _Tp, typename _Ap>
5158  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5159  __data(const simd<_Tp, _Ap>& __x)
5160  { return __x._M_data; }
5161 
5162 template <typename _Tp, typename _Ap>
5163  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5164  __data(simd<_Tp, _Ap>& __x)
5165  { return __x._M_data; }
5166 
5167 // }}}
5168 namespace __float_bitwise_operators { //{{{
5169 template <typename _Tp, typename _Ap>
5170  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5171  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5172  {
5173  return {__private_init,
5174  _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))};
5175  }
5176 
5177 template <typename _Tp, typename _Ap>
5178  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5179  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5180  {
5181  return {__private_init,
5182  _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))};
5183  }
5184 
5185 template <typename _Tp, typename _Ap>
5186  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5187  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5188  {
5189  return {__private_init,
5190  _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))};
5191  }
5192 } // namespace __float_bitwise_operators }}}
5193 
5194 _GLIBCXX_SIMD_END_NAMESPACE
5195 
5196 #endif // __cplusplus >= 201703L
5197 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5198 
5199 // vim: foldmethod=marker foldmarker={{{,}}}
typename remove_pointer< _Tp >::type remove_pointer_t
Alias template for remove_pointer.
Definition: type_traits:2000
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1472
bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1435
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:104
typename remove_const< _Tp >::type remove_const_t
Alias template for remove_const.
Definition: type_traits:1526
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1540
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:1925
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition: type_traits:2514
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:75
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition: complex:422
constexpr auto data(_Container &__cont) noexcept(noexcept(__cont.data())) -> decltype(__cont.data())
Return the data pointer of a container.
Definition: range_access.h:290
_Tp * begin(valarray< _Tp > &__va)
Return an iterator pointing to the first element of the valarray.
Definition: valarray:1214
_Tp * end(valarray< _Tp > &__va)
Return an iterator pointing to one past the last element of the valarray.
Definition: valarray:1234
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2518
constexpr const _Tp & clamp(const _Tp &, const _Tp &, const _Tp &)
Returns the value clamped between lo and hi.
Definition: stl_algo.h:3656
constexpr pair< const _Tp &, const _Tp & > minmax(const _Tp &, const _Tp &)
Determines min and max at once as an ordered pair.
Definition: stl_algo.h:3301
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition: complex:332
element_type & operator*() const
Smart pointer dereferencing.
Definition: auto_ptr.h:173
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:278
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:78
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition: complex:362
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition: utility:343
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:254
bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1453
element_type * operator->() const
Smart pointer dereferencing.
Definition: auto_ptr.h:186
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition: type_traits:2536
bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1444
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition: type_traits:1595
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:245
auto_ptr & operator=(auto_ptr &__a)
auto_ptr assignment operator.
Definition: auto_ptr.h:128