• R/O
  • HTTP
  • SSH
  • HTTPS

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

GCC with patches for OS216


Commit MetaInfo

修订版dccc54d25013516d27ae0ca86abba79d78f0478d (tree)
时间2020-06-18 19:15:49
作者Srinath Parvathaneni <srinath.parvathaneni@arm....>
CommiterSrinath Parvathaneni

Log Message

[PATCH][GCC] arm: Fix the MVE ACLE vaddq_m polymorphic variants.

Hello,

This patch fixes the MVE ACLE vaddq_m polymorphic variants by modifying the corresponding
intrinsic parameters and vaddq_m polymorphic variant's _Generic case entries in "arm_mve.h"
header file.

2020-06-04 Srinath Parvathaneni <srinath.parvathaneni@arm.com>

gcc/
* config/arm/arm_mve.h (arm_vaddq_m_n_s8): Correct the intrinsic
arguments.
(
arm_vaddq_m_n_s32): Likewise.
(arm_vaddq_m_n_s16): Likewise.
(
arm_vaddq_m_n_u8): Likewise.
(arm_vaddq_m_n_u32): Likewise.
(
arm_vaddq_m_n_u16): Likewise.
(arm_vaddq_m): Modify polymorphic variant.

gcc/testsuite/
* gcc.target/arm/mve/intrinsics/mve_vaddq_m.c: New test.

(cherry picked from commit dc39db873670bea8d8e655444387ceaa53a01a79)

更改概述

差异

--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -9713,42 +9713,42 @@ __arm_vabdq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr
97139713
97149714 __extension__ extern __inline int8x16_t
97159715 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
9716-__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int8_t __b, mve_pred16_t __p)
9716+__arm_vaddq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int __b, mve_pred16_t __p)
97179717 {
97189718 return __builtin_mve_vaddq_m_n_sv16qi (__inactive, __a, __b, __p);
97199719 }
97209720
97219721 __extension__ extern __inline int32x4_t
97229722 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
9723-__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
9723+__arm_vaddq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int __b, mve_pred16_t __p)
97249724 {
97259725 return __builtin_mve_vaddq_m_n_sv4si (__inactive, __a, __b, __p);
97269726 }
97279727
97289728 __extension__ extern __inline int16x8_t
97299729 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
9730-__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
9730+__arm_vaddq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int __b, mve_pred16_t __p)
97319731 {
97329732 return __builtin_mve_vaddq_m_n_sv8hi (__inactive, __a, __b, __p);
97339733 }
97349734
97359735 __extension__ extern __inline uint8x16_t
97369736 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
9737-__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
9737+__arm_vaddq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, int __b, mve_pred16_t __p)
97389738 {
97399739 return __builtin_mve_vaddq_m_n_uv16qi (__inactive, __a, __b, __p);
97409740 }
97419741
97429742 __extension__ extern __inline uint32x4_t
97439743 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
9744-__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
9744+__arm_vaddq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, int __b, mve_pred16_t __p)
97459745 {
97469746 return __builtin_mve_vaddq_m_n_uv4si (__inactive, __a, __b, __p);
97479747 }
97489748
97499749 __extension__ extern __inline uint16x8_t
97509750 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
9751-__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
9751+__arm_vaddq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, int __b, mve_pred16_t __p)
97529752 {
97539753 return __builtin_mve_vaddq_m_n_uv8hi (__inactive, __a, __b, __p);
97549754 }
@@ -26493,42 +26493,42 @@ __arm_vabdq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16
2649326493
2649426494 __extension__ extern __inline int8x16_t
2649526495 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
26496-__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int8_t __b, mve_pred16_t __p)
26496+__arm_vaddq_m (int8x16_t __inactive, int8x16_t __a, int __b, mve_pred16_t __p)
2649726497 {
2649826498 return __arm_vaddq_m_n_s8 (__inactive, __a, __b, __p);
2649926499 }
2650026500
2650126501 __extension__ extern __inline int32x4_t
2650226502 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
26503-__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
26503+__arm_vaddq_m (int32x4_t __inactive, int32x4_t __a, int __b, mve_pred16_t __p)
2650426504 {
2650526505 return __arm_vaddq_m_n_s32 (__inactive, __a, __b, __p);
2650626506 }
2650726507
2650826508 __extension__ extern __inline int16x8_t
2650926509 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
26510-__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
26510+__arm_vaddq_m (int16x8_t __inactive, int16x8_t __a, int __b, mve_pred16_t __p)
2651126511 {
2651226512 return __arm_vaddq_m_n_s16 (__inactive, __a, __b, __p);
2651326513 }
2651426514
2651526515 __extension__ extern __inline uint8x16_t
2651626516 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
26517-__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
26517+__arm_vaddq_m (uint8x16_t __inactive, uint8x16_t __a, int __b, mve_pred16_t __p)
2651826518 {
2651926519 return __arm_vaddq_m_n_u8 (__inactive, __a, __b, __p);
2652026520 }
2652126521
2652226522 __extension__ extern __inline uint32x4_t
2652326523 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
26524-__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
26524+__arm_vaddq_m (uint32x4_t __inactive, uint32x4_t __a, int __b, mve_pred16_t __p)
2652526525 {
2652626526 return __arm_vaddq_m_n_u32 (__inactive, __a, __b, __p);
2652726527 }
2652826528
2652926529 __extension__ extern __inline uint16x8_t
2653026530 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
26531-__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
26531+__arm_vaddq_m (uint16x8_t __inactive, uint16x8_t __a, int __b, mve_pred16_t __p)
2653226532 {
2653326533 return __arm_vaddq_m_n_u16 (__inactive, __a, __b, __p);
2653426534 }
@@ -37383,12 +37383,12 @@ extern void *__ARM_undef;
3738337383 int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
3738437384 int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
3738537385 int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
37386- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
37387- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
37388- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
37389- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
37390- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
37391- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
37386+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \
37387+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \
37388+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \
37389+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int), p3), \
37390+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int), p3), \
37391+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int), p3), \
3739237392 int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
3739337393 int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
3739437394
@@ -39749,12 +39749,12 @@ extern void *__ARM_undef;
3974939749 __typeof(p1) __p1 = (p1); \
3975039750 __typeof(p2) __p2 = (p2); \
3975139751 _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
39752- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
39753- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
39754- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
39755- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
39756- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
39757- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
39752+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int), p3), \
39753+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int), p3), \
39754+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int), p3), \
39755+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int), p3), \
39756+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int), p3), \
39757+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int), p3), \
3975839758 int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
3975939759 int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
3976039760 int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_m.c
@@ -0,0 +1,48 @@
1+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
2+/* { dg-add-options arm_v8_1m_mve_fp } */
3+/* { dg-additional-options "-O2" } */
4+
5+#include <arm_mve.h>
6+mve_pred16_t p;
7+
8+int32x4_t fn1 (int32x4_t vecIdx)
9+{
10+ return vaddq_m(vuninitializedq_s32(), vecIdx, 1, p);
11+}
12+
13+int16x8_t fn2 (int16x8_t vecIdx)
14+{
15+ return vaddq_m(vuninitializedq_s16(), vecIdx, 1, p);
16+}
17+
18+int8x16_t fn3 (int8x16_t vecIdx)
19+{
20+ return vaddq_m(vuninitializedq_s8(), vecIdx, 1, p);
21+}
22+
23+uint32x4_t fn4 (uint32x4_t vecIdx)
24+{
25+ return vaddq_m(vuninitializedq_u32(), vecIdx, 1, p);
26+}
27+
28+uint16x8_t fn5 (uint16x8_t vecIdx)
29+{
30+ return vaddq_m(vuninitializedq_u16(), vecIdx, 1, p);
31+}
32+
33+uint8x16_t fn6 (uint8x16_t vecIdx)
34+{
35+ return vaddq_m(vuninitializedq_u8(), vecIdx, 1, p);
36+}
37+
38+float32x4_t fn7 (float32x4_t vecIdx)
39+{
40+ return vaddq_m(vuninitializedq_f32(), vecIdx, (float32_t) 1.23, p);
41+}
42+
43+float16x8_t fn8 (float16x8_t vecIdx)
44+{
45+ return vaddq_m(vuninitializedq_f16(), vecIdx, (float16_t) 1.40, p);
46+}
47+
48+/* { dg-final { scan-assembler-not "__ARM_undef" } } */