GCC with patches for OS216
修订版 | dccc54d25013516d27ae0ca86abba79d78f0478d (tree) |
---|---|
时间 | 2020-06-18 19:15:49 |
作者 | Srinath Parvathaneni <srinath.parvathaneni@arm....> |
Commiter | Srinath Parvathaneni |
[PATCH][GCC] arm: Fix the MVE ACLE vaddq_m polymorphic variants.
Hello,
This patch fixes the MVE ACLE vaddq_m polymorphic variants by modifying the corresponding
intrinsic parameters and vaddq_m polymorphic variant's _Generic case entries in "arm_mve.h"
header file.
2020-06-04 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
gcc/
* config/arm/arm_mve.h (arm_vaddq_m_n_s8): Correct the intrinsic
arguments.
(arm_vaddq_m_n_s32): Likewise.
(arm_vaddq_m_n_s16): Likewise.
(arm_vaddq_m_n_u8): Likewise.
(arm_vaddq_m_n_u32): Likewise.
(arm_vaddq_m_n_u16): Likewise.
(arm_vaddq_m): Modify polymorphic variant.
gcc/testsuite/
* gcc.target/arm/mve/intrinsics/mve_vaddq_m.c: New test.
(cherry picked from commit dc39db873670bea8d8e655444387ceaa53a01a79)
@@ -9713,42 +9713,42 @@ __arm_vabdq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr | ||
9713 | 9713 | |
9714 | 9714 | __extension__ extern __inline int8x16_t |
9715 | 9715 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
9716 | -__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int8_t __b, mve_pred16_t __p) | |
9716 | +__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int __b, mve_pred16_t __p) | |
9717 | 9717 | { |
9718 | 9718 | return __builtin_mve_vaddq_m_n_sv16qi (__inactive, __a, __b, __p); |
9719 | 9719 | } |
9720 | 9720 | |
9721 | 9721 | __extension__ extern __inline int32x4_t |
9722 | 9722 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
9723 | -__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p) | |
9723 | +__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int __b, mve_pred16_t __p) | |
9724 | 9724 | { |
9725 | 9725 | return __builtin_mve_vaddq_m_n_sv4si (__inactive, __a, __b, __p); |
9726 | 9726 | } |
9727 | 9727 | |
9728 | 9728 | __extension__ extern __inline int16x8_t |
9729 | 9729 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
9730 | -__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p) | |
9730 | +__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int __b, mve_pred16_t __p) | |
9731 | 9731 | { |
9732 | 9732 | return __builtin_mve_vaddq_m_n_sv8hi (__inactive, __a, __b, __p); |
9733 | 9733 | } |
9734 | 9734 | |
9735 | 9735 | __extension__ extern __inline uint8x16_t |
9736 | 9736 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
9737 | -__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, mve_pred16_t __p) | |
9737 | +__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, int __b, mve_pred16_t __p) | |
9738 | 9738 | { |
9739 | 9739 | return __builtin_mve_vaddq_m_n_uv16qi (__inactive, __a, __b, __p); |
9740 | 9740 | } |
9741 | 9741 | |
9742 | 9742 | __extension__ extern __inline uint32x4_t |
9743 | 9743 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
9744 | -__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32_t __b, mve_pred16_t __p) | |
9744 | +__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, int __b, mve_pred16_t __p) | |
9745 | 9745 | { |
9746 | 9746 | return __builtin_mve_vaddq_m_n_uv4si (__inactive, __a, __b, __p); |
9747 | 9747 | } |
9748 | 9748 | |
9749 | 9749 | __extension__ extern __inline uint16x8_t |
9750 | 9750 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
9751 | -__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16_t __b, mve_pred16_t __p) | |
9751 | +__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, int __b, mve_pred16_t __p) | |
9752 | 9752 | { |
9753 | 9753 | return __builtin_mve_vaddq_m_n_uv8hi (__inactive, __a, __b, __p); |
9754 | 9754 | } |
@@ -26493,42 +26493,42 @@ __arm_vabdq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16 | ||
26493 | 26493 | |
26494 | 26494 | __extension__ extern __inline int8x16_t |
26495 | 26495 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
26496 | -__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int8_t __b, mve_pred16_t __p) | |
26496 | +__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int __b, mve_pred16_t __p) | |
26497 | 26497 | { |
26498 | 26498 | return __arm_vaddq_m_n_s8 (__inactive, __a, __b, __p); |
26499 | 26499 | } |
26500 | 26500 | |
26501 | 26501 | __extension__ extern __inline int32x4_t |
26502 | 26502 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
26503 | -__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p) | |
26503 | +__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int __b, mve_pred16_t __p) | |
26504 | 26504 | { |
26505 | 26505 | return __arm_vaddq_m_n_s32 (__inactive, __a, __b, __p); |
26506 | 26506 | } |
26507 | 26507 | |
26508 | 26508 | __extension__ extern __inline int16x8_t |
26509 | 26509 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
26510 | -__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p) | |
26510 | +__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int __b, mve_pred16_t __p) | |
26511 | 26511 | { |
26512 | 26512 | return __arm_vaddq_m_n_s16 (__inactive, __a, __b, __p); |
26513 | 26513 | } |
26514 | 26514 | |
26515 | 26515 | __extension__ extern __inline uint8x16_t |
26516 | 26516 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
26517 | -__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, mve_pred16_t __p) | |
26517 | +__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, int __b, mve_pred16_t __p) | |
26518 | 26518 | { |
26519 | 26519 | return __arm_vaddq_m_n_u8 (__inactive, __a, __b, __p); |
26520 | 26520 | } |
26521 | 26521 | |
26522 | 26522 | __extension__ extern __inline uint32x4_t |
26523 | 26523 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
26524 | -__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, uint32_t __b, mve_pred16_t __p) | |
26524 | +__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, int __b, mve_pred16_t __p) | |
26525 | 26525 | { |
26526 | 26526 | return __arm_vaddq_m_n_u32 (__inactive, __a, __b, __p); |
26527 | 26527 | } |
26528 | 26528 | |
26529 | 26529 | __extension__ extern __inline uint16x8_t |
26530 | 26530 | __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
26531 | -__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, uint16_t __b, mve_pred16_t __p) | |
26531 | +__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, int __b, mve_pred16_t __p) | |
26532 | 26532 | { |
26533 | 26533 | return __arm_vaddq_m_n_u16 (__inactive, __a, __b, __p); |
26534 | 26534 | } |
@@ -37383,12 +37383,12 @@ extern void *__ARM_undef; | ||
37383 | 37383 | int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ |
37384 | 37384 | int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ |
37385 | 37385 | int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ |
37386 | - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ | |
37387 | - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ | |
37388 | - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ | |
37389 | - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ | |
37390 | - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ | |
37391 | - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ | |
37386 | + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \ | |
37387 | + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \ | |
37388 | + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \ | |
37389 | + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int), p3), \ | |
37390 | + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int), p3), \ | |
37391 | + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int), p3), \ | |
37392 | 37392 | int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ |
37393 | 37393 | int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) |
37394 | 37394 |
@@ -39749,12 +39749,12 @@ extern void *__ARM_undef; | ||
39749 | 39749 | __typeof(p1) __p1 = (p1); \ |
39750 | 39750 | __typeof(p2) __p2 = (p2); \ |
39751 | 39751 | _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ |
39752 | - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ | |
39753 | - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ | |
39754 | - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ | |
39755 | - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ | |
39756 | - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ | |
39757 | - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ | |
39752 | + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \ | |
39753 | + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \ | |
39754 | + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \ | |
39755 | + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int), p3), \ | |
39756 | + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int), p3), \ | |
39757 | + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int), p3), \ | |
39758 | 39758 | int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ |
39759 | 39759 | int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ |
39760 | 39760 | int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ |
@@ -0,0 +1,48 @@ | ||
1 | +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ | |
2 | +/* { dg-add-options arm_v8_1m_mve_fp } */ | |
3 | +/* { dg-additional-options "-O2" } */ | |
4 | + | |
5 | +#include <arm_mve.h> | |
6 | +mve_pred16_t p; | |
7 | + | |
8 | +int32x4_t fn1 (int32x4_t vecIdx) | |
9 | +{ | |
10 | + return vaddq_m(vuninitializedq_s32(), vecIdx, 1, p); | |
11 | +} | |
12 | + | |
13 | +int16x8_t fn2 (int16x8_t vecIdx) | |
14 | +{ | |
15 | + return vaddq_m(vuninitializedq_s16(), vecIdx, 1, p); | |
16 | +} | |
17 | + | |
18 | +int8x16_t fn3 (int8x16_t vecIdx) | |
19 | +{ | |
20 | + return vaddq_m(vuninitializedq_s8(), vecIdx, 1, p); | |
21 | +} | |
22 | + | |
23 | +uint32x4_t fn4 (uint32x4_t vecIdx) | |
24 | +{ | |
25 | + return vaddq_m(vuninitializedq_u32(), vecIdx, 1, p); | |
26 | +} | |
27 | + | |
28 | +uint16x8_t fn5 (uint16x8_t vecIdx) | |
29 | +{ | |
30 | + return vaddq_m(vuninitializedq_u16(), vecIdx, 1, p); | |
31 | +} | |
32 | + | |
33 | +uint8x16_t fn6 (uint8x16_t vecIdx) | |
34 | +{ | |
35 | + return vaddq_m(vuninitializedq_u8(), vecIdx, 1, p); | |
36 | +} | |
37 | + | |
38 | +float32x4_t fn7 (float32x4_t vecIdx) | |
39 | +{ | |
40 | + return vaddq_m(vuninitializedq_f32(), vecIdx, (float32_t) 1.23, p); | |
41 | +} | |
42 | + | |
43 | +float16x8_t fn8 (float16x8_t vecIdx) | |
44 | +{ | |
45 | + return vaddq_m(vuninitializedq_f16(), vecIdx, (float16_t) 1.40, p); | |
46 | +} | |
47 | + | |
48 | +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ |