• R/O
  • HTTP
  • SSH
  • HTTPS

common_source_project-fm7: 提交

Common Source Code Project for Qt (a.k.a for FM-7).


Commit MetaInfo

修订版825bf2ca6d1fdd3b866e72eaf7e6db32a4a704c4 (tree)
时间2019-01-17 19:40:18
作者K.Ohta <whatisthis.sowhat@gmai...>
CommiterK.Ohta

Log Message

[COMMON] Fix unaligned SIMD variables.Fix crash built with "-msse2" at Win32.
[BUILD][Win32] Adjust optimize parameter for MinGW/Win32.

更改概述

差异

--- a/source/build-cmake/params/buildvars_mingw_params_gcc.dat
+++ b/source/build-cmake/params/buildvars_mingw_params_gcc.dat
@@ -29,14 +29,14 @@ case ${CSP_DEBUG} in
2929 MAKEFLAGS_BASE2="-ggdb ${ARCH_FLAGS} ${MAKEFLAGS_BASE} ${ADDITIONAL_CFLAGS} -DNDEBUG"
3030 ;;
3131 "No" | "no" | "NO" | * )
32- MAKEFLAGS_BASE2="${MAKEFLAGS_BASE} -O3 \
32+ MAKEFLAGS_BASE2="${MAKEFLAGS_BASE} -O2 \
3333 ${ARCH_FLAGS} \
34- -ftree-vectorize \
35- -ftree-loop-optimize \
36- -floop-nest-optimize \
3734 -std=c++11 \
3835 ${ADDITIONAL_CFLAGS} \
3936 -DNDEBUG "
37+# -ftree-vectorize \
38+# -ftree-loop-optimize \
39+# -floop-nest-optimize \
4040 ;;
4141 esac
4242
--- a/source/src/common.cpp
+++ b/source/src/common.cpp
@@ -537,10 +537,8 @@ uint8_t DLL_PREFIX A_OF_COLOR(scrntype_t c)
537537 void DLL_PREFIX PrepareBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val)
538538 {
539539 if(tbl == NULL) return;
540-__DECL_VECTORIZED_LOOP
541540 for(uint16_t i = 0; i < 256; i++) {
542541 uint16_t n = i;
543-__DECL_VECTORIZED_LOOP
544542 for(int j = 0; j < 8; j++) {
545543 tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val;
546544 n <<= 1;
@@ -554,10 +552,8 @@ __DECL_VECTORIZED_LOOP
554552 void DLL_PREFIX PrepareBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val)
555553 {
556554 if(tbl == NULL) return;
557-__DECL_VECTORIZED_LOOP
558555 for(uint16_t i = 0; i < 256; i++) {
559556 uint16_t n = i;
560-__DECL_VECTORIZED_LOOP
561557 for(int j = 0; j < 8; j++) {
562558 tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val;
563559 n <<= 1;
@@ -569,10 +565,8 @@ __DECL_VECTORIZED_LOOP
569565 void DLL_PREFIX PrepareReverseBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val)
570566 {
571567 if(tbl == NULL) return;
572-__DECL_VECTORIZED_LOOP
573568 for(uint16_t i = 0; i < 256; i++) {
574569 uint16_t n = i;
575-__DECL_VECTORIZED_LOOP
576570 for(int j = 0; j < 8; j++) {
577571 tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val;
578572 n >>= 1;
@@ -583,10 +577,8 @@ __DECL_VECTORIZED_LOOP
583577 void DLL_PREFIX PrepareReverseBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val)
584578 {
585579 if(tbl == NULL) return;
586-__DECL_VECTORIZED_LOOP
587580 for(uint16_t i = 0; i < 256; i++) {
588581 uint16_t n = i;
589-__DECL_VECTORIZED_LOOP
590582 for(int j = 0; j < 8; j++) {
591583 tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val;
592584 n >>= 1;
@@ -598,9 +590,9 @@ __DECL_VECTORIZED_LOOP
598590 void DLL_PREFIX ConvertByteToPackedPixelByColorTable2(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_scrn_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table)
599591 {
600592
601- scrntype_vec8_t tmpd;
602- scrntype_vec8_t tmpdd;
603- scrntype_vec8_t colors;
593+ __DECL_ALIGNED(32) scrntype_vec8_t tmpd;
594+ __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
595+ __DECL_ALIGNED(32) scrntype_vec8_t colors;
604596 scrntype_vec8_t* vt = (scrntype_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(scrntype_vec8_t));
605597
606598 uintptr_t disalign = (uintptr_t)dst;
@@ -664,10 +656,10 @@ __DECL_VECTORIZED_LOOP
664656 void DLL_PREFIX ConvertByteToSparceUint16(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask)
665657 {
666658
667- uint16_vec8_t tmpd;
659+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
668660 uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
669661
670- uint16_vec8_t __masks;
662+ __DECL_ALIGNED(16) uint16_vec8_t __masks;
671663
672664 __DECL_VECTORIZED_LOOP
673665 for(int i = 0; i < 8; i++) {
@@ -706,11 +698,11 @@ __DECL_VECTORIZED_LOOP
706698 void DLL_PREFIX ConvertByteToSparceUint8(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask)
707699 {
708700
709- uint16_vec8_t tmpd;
701+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
710702 uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
711703
712- uint16_vec8_t __masks;
713- uint8_vec8_t tmpdd;
704+ __DECL_ALIGNED(16) uint16_vec8_t __masks;
705+ __DECL_ALIGNED(16) uint8_vec8_t tmpdd;
714706
715707 __DECL_VECTORIZED_LOOP
716708 for(int i = 0; i < 8; i++) {
@@ -751,8 +743,8 @@ __DECL_VECTORIZED_LOOP
751743 void DLL_PREFIX ConvertByteToPackedPixelByColorTable(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table)
752744 {
753745
754- uint16_vec8_t tmpd;
755- scrntype_vec8_t tmpdd;
746+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
747+ __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
756748 uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
757749
758750 uintptr_t disalign = (uintptr_t)dst;
@@ -831,8 +823,8 @@ __DECL_VECTORIZED_LOOP
831823 uint8_t r, g, b;
832824 int shift = src->shift;
833825 const bool is_render[3] = { src->is_render[0], src->is_render[1], src->is_render[2] };
834- uint16_vec8_t tmpd;
835- scrntype_vec8_t tmp_dd;
826+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
827+ __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
836828 scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
837829
838830 x = src->begin_pos;
@@ -860,7 +852,7 @@ __DECL_VECTORIZED_LOOP
860852 #else // 24bit
861853 static const int shift_factor = 3;
862854 #endif
863- scrntype_vec8_t sline;
855+ __DECL_ALIGNED(32) scrntype_vec8_t sline;
864856 scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
865857 __DECL_VECTORIZED_LOOP
866858 for(int i = 0; i < 8; i++) {
@@ -935,8 +927,8 @@ __DECL_VECTORIZED_LOOP
935927 uint8_t r, g, b, n;
936928 int shift = src->shift;
937929 const bool is_render[4] = { src->is_render[0], src->is_render[1], src->is_render[2], src->is_render[3] };
938- uint16_vec8_t tmpd;
939- scrntype_vec8_t tmp_dd;
930+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
931+ __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
940932 scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
941933
942934 x = src->begin_pos;
@@ -966,7 +958,7 @@ __DECL_VECTORIZED_LOOP
966958 #else // 24bit
967959 static const int shift_factor = 3;
968960 #endif
969- scrntype_vec8_t sline;
961+ __DECL_ALIGNED(32) scrntype_vec8_t sline;
970962 scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
971963 __DECL_VECTORIZED_LOOP
972964 for(int i = 0; i < 8; i++) {
@@ -1035,8 +1027,8 @@ __DECL_VECTORIZED_LOOP
10351027 uint8_t d[16];
10361028 int shift = src->shift;
10371029 const bool is_render[4] = { src->is_render[0], src->is_render[1], src->is_render[2], src->is_render[3] };
1038- uint16_vec8_t tmpd;
1039- scrntype_vec8_t tmp_dd;
1030+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
1031+ __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
10401032 scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
10411033
10421034 x = src->begin_pos;
@@ -1065,7 +1057,7 @@ __DECL_VECTORIZED_LOOP
10651057 #else // 24bit
10661058 static const int shift_factor = 3;
10671059 #endif
1068- scrntype_vec8_t sline;
1060+ __DECL_ALIGNED(32) scrntype_vec8_t sline;
10691061 scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
10701062 __DECL_VECTORIZED_LOOP
10711063 for(int i = 0; i < 8; i++) {
@@ -1104,7 +1096,7 @@ void DLL_PREFIX Convert2NColorsToByte_Line(_render_command_data_t *src, uint8_t
11041096
11051097 uint8_t* srcp[8];
11061098 __DECL_ALIGNED(32) uint32_t offset[8] = {0};
1107- uint16_vec8_t dat;
1099+ __DECL_ALIGNED(16) uint16_vec8_t dat;
11081100 uint16_vec8_t* bp[8] ;
11091101
11101102 __DECL_VECTORIZED_LOOP
@@ -1152,7 +1144,7 @@ void DLL_PREFIX Convert2NColorsToByte_LineZoom2(_render_command_data_t *src, uin
11521144
11531145 uint8_t* srcp[8];
11541146 __DECL_ALIGNED(32) uint32_t offset[8] = {0};
1155- uint16_vec8_t dat;
1147+ __DECL_ALIGNED(16) uint16_vec8_t dat;
11561148 uint16_vec8_t* bp[8] ;
11571149
11581150 __DECL_VECTORIZED_LOOP
@@ -1200,10 +1192,10 @@ void DLL_PREFIX Convert8ColorsToByte_Line(_render_command_data_t *src, uint8_t *
12001192 uint8_t *gp = &(src->data[2][src->baseaddress[2]]);
12011193 __DECL_ALIGNED(16) uint32_t offset[4] = {0};
12021194
1203- uint16_vec8_t rdat;
1204- uint16_vec8_t gdat;
1205- uint16_vec8_t bdat;
1206- uint16_vec8_t tmpd;
1195+ __DECL_ALIGNED(16) uint16_vec8_t rdat;
1196+ __DECL_ALIGNED(16) uint16_vec8_t gdat;
1197+ __DECL_ALIGNED(16) uint16_vec8_t bdat;
1198+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
12071199
12081200 uint16_vec8_t* bpb = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[0]->plane_table[0]), sizeof(uint16_vec8_t));
12091201 uint16_vec8_t* bpr = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[1]->plane_table[0]), sizeof(uint16_vec8_t));
--- a/source/src/common.h
+++ b/source/src/common.h
@@ -1082,8 +1082,8 @@ typedef struct {
10821082
10831083 inline scrntype_vec8_t ConvertByteToMonochromePackedPixel(uint8_t src, _bit_trans_table_t *tbl,scrntype_t on_val, scrntype_t off_val)
10841084 {
1085- uint16_vec8_t tmpd;
1086- scrntype_vec8_t tmpdd;
1085+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
1086+ __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
10871087 _bit_trans_table_t* vt = (_bit_trans_table_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t));
10881088
10891089 tmpd.v = vt->plane_table[src].v;
@@ -1104,7 +1104,7 @@ void DLL_PREFIX ConvertByteToSparceUint8(uint8_t *src, uint16_t* dst, int bytes,
11041104 // Table must be (ON_VAL_COLOR : OFF_VAL_COLOR)[256].
11051105 inline scrntype_vec8_t ConvertByteToPackedPixel_PixelTbl(uint8_t src, _bit_trans_table_scrn_t *tbl)
11061106 {
1107- scrntype_vec8_t tmpdd;
1107+ __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
11081108 _bit_trans_table_scrn_t* vt = (_bit_trans_table_scrn_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t));
11091109
11101110 tmpdd.v = vt->plane_table[src].v;
@@ -1114,8 +1114,8 @@ inline scrntype_vec8_t ConvertByteToPackedPixel_PixelTbl(uint8_t src, _bit_trans
11141114 // Table must be (ON_VAL_COLOR : OFF_VAL_COLOR)[256].
11151115 inline scrntype_vec16_t ConvertByteToDoublePackedPixel_PixelTbl(uint8_t src, _bit_trans_table_scrn_t *tbl)
11161116 {
1117- scrntype_vec16_t tmpdd;
1118- scrntype_vec8_t tmpd;
1117+ __DECL_ALIGNED(32) scrntype_vec16_t tmpdd;
1118+ __DECL_ALIGNED(32) scrntype_vec8_t tmpd;
11191119 _bit_trans_table_scrn_t* vt = (_bit_trans_table_scrn_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t));
11201120 tmpd.v = vt->plane_table[src].v;
11211121 int j = 0;
@@ -1131,7 +1131,7 @@ __DECL_VECTORIZED_LOOP
11311131 // Table must be initialize ON_COLOR : OFF_COLOR
11321132 inline void ConvertByteToDoubleMonochromeUint8(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl)
11331133 {
1134- uint16_vec8_t tmpd;
1134+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
11351135 uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
11361136
11371137 __DECL_ALIGNED(16) uint8_t d[16];
@@ -1151,7 +1151,7 @@ __DECL_VECTORIZED_LOOP
11511151
11521152 inline void ConvertByteToMonochromeUint8(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl)
11531153 {
1154- uint16_vec8_t tmpd;
1154+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
11551155 uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
11561156
11571157 tmpd = vt[src];
@@ -1163,7 +1163,7 @@ __DECL_VECTORIZED_LOOP
11631163
11641164 inline void ConvertRGBTo8ColorsUint8(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
11651165 {
1166- uint16_vec8_t tmpd;
1166+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
11671167 uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
11681168 uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
11691169 uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1180,7 +1180,7 @@ __DECL_VECTORIZED_LOOP
11801180
11811181 inline void ConvertRGBTo8ColorsUint8_Zoom2Left(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
11821182 {
1183- uint16_vec8_t tmpd;
1183+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
11841184 uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
11851185 uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
11861186 uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1198,7 +1198,7 @@ __DECL_VECTORIZED_LOOP
11981198
11991199 inline void ConvertRGBTo8ColorsUint8_Zoom2Right(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
12001200 {
1201- uint16_vec8_t tmpd;
1201+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
12021202 uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
12031203 uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
12041204 uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1216,7 +1216,7 @@ __DECL_VECTORIZED_LOOP
12161216
12171217 inline void ConvertRGBTo8ColorsUint8_Zoom2Double(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
12181218 {
1219- uint16_vec8_t tmpd;
1219+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
12201220 uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
12211221 uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
12221222 uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1234,7 +1234,7 @@ __DECL_VECTORIZED_LOOP
12341234
12351235 inline void ConvertByteToMonochromeUint8Cond_Zoom2(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color)
12361236 {
1237- uint16_vec8_t tmpd;
1237+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
12381238 uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
12391239
12401240 __DECL_ALIGNED(16) uint8_t d[16];
@@ -1254,7 +1254,7 @@ __DECL_VECTORIZED_LOOP
12541254
12551255 inline void ConvertByteToMonochromeUint8Cond(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color)
12561256 {
1257- uint16_vec8_t tmpd;
1257+ __DECL_ALIGNED(16) uint16_vec8_t tmpd;
12581258 uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
12591259
12601260 tmpd = vt[src];
--- a/source/src/vm/fm7/display.cpp
+++ b/source/src/vm/fm7/display.cpp
@@ -42,31 +42,6 @@ DISPLAY::DISPLAY(VM_TEMPLATE* parent_vm, EMU* parent_emu) : DEVICE(parent_vm, pa
4242 mainio = NULL;
4343 subcpu = NULL;
4444 keyboard = NULL;
45-#if 1
46- PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_0[0][0])), 0x0080, 0x0000);
47- PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_1[0][0])), 0x0040, 0x0000);
48- PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_2[0][0])), 0x0020, 0x0000);
49- PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_3[0][0])), 0x0010, 0x0000);
50-#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
51- PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_4[0][0])), 0x0008, 0x0000);
52- PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_5[0][0])), 0x0004, 0x0000);
53-#endif
54-#else
55- for(int i = 0; i < 256; i++) {
56- uint16_t n = (uint16_t)i;
57- for(int j = 0; j < 8; j++) {
58- bit_trans_table_0[i][j] = n & 0x80;
59- bit_trans_table_1[i][j] = ((n & 0x80) != 0) ? 0x40 : 0;
60- bit_trans_table_2[i][j] = ((n & 0x80) != 0) ? 0x20 : 0;
61- bit_trans_table_3[i][j] = ((n & 0x80) != 0) ? 0x10 : 0;
62-#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
63- bit_trans_table_4[i][j] = ((n & 0x80) != 0) ? 0x08 : 0;
64- bit_trans_table_5[i][j] = ((n & 0x80) != 0) ? 0x04 : 0;
65-#endif
66- n <<= 1;
67- }
68- }
69-#endif
7045 displine = 0;
7146 active_page = 0;
7247 #if defined(USE_GREEN_DISPLAY)
@@ -3365,6 +3340,31 @@ void DISPLAY::initialize()
33653340 {
33663341 int i;
33673342
3343+#if 1
3344+ PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_0[0][0])), 0x0080, 0x0000);
3345+ PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_1[0][0])), 0x0040, 0x0000);
3346+ PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_2[0][0])), 0x0020, 0x0000);
3347+ PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_3[0][0])), 0x0010, 0x0000);
3348+#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
3349+ PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_4[0][0])), 0x0008, 0x0000);
3350+ PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_5[0][0])), 0x0004, 0x0000);
3351+#endif
3352+#else
3353+ for(int i = 0; i < 256; i++) {
3354+ uint16_t n = (uint16_t)i;
3355+ for(int j = 0; j < 8; j++) {
3356+ bit_trans_table_0[i][j] = n & 0x80;
3357+ bit_trans_table_1[i][j] = ((n & 0x80) != 0) ? 0x40 : 0;
3358+ bit_trans_table_2[i][j] = ((n & 0x80) != 0) ? 0x20 : 0;
3359+ bit_trans_table_3[i][j] = ((n & 0x80) != 0) ? 0x10 : 0;
3360+#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
3361+ bit_trans_table_4[i][j] = ((n & 0x80) != 0) ? 0x08 : 0;
3362+ bit_trans_table_5[i][j] = ((n & 0x80) != 0) ? 0x04 : 0;
3363+#endif
3364+ n <<= 1;
3365+ }
3366+ }
3367+#endif
33683368 memset(io_w_latch, 0xff, sizeof(io_w_latch));
33693369 screen_update_flag = true;
33703370 memset(gvram, 0x00, sizeof(gvram));
--- a/source/src/vm/fm7/vram.cpp
+++ b/source/src/vm/fm7/vram.cpp
@@ -671,8 +671,8 @@ void DISPLAY::CopyDrawnData(scrntype_t* src, scrntype_t* dst, int width, bool sc
671671 #endif
672672 scrntype_vec8_t* vsrc = (scrntype_vec8_t*)__builtin_assume_aligned(src, sizeof(scrntype_vec8_t));
673673 scrntype_vec8_t* vdst = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
674- scrntype_vec8_t tmp_dd;
675- scrntype_vec8_t sline;
674+ __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
675+ __DECL_ALIGNED(32) scrntype_vec8_t sline;
676676
677677 if(scan_line) {
678678 __DECL_VECTORIZED_LOOP
@@ -747,7 +747,7 @@ void DISPLAY::GETVRAM_1_400L(int yoff, scrntype_t *p)
747747 pixel = gvram_shadow[yoff_d];
748748 uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16);
749749 __DECL_ALIGNED(16) uint16_vec8_t tmp_d;
750- scrntype_vec8_t tmp_dd;
750+ __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
751751 scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
752752
753753 tmp_d.v = ppx->v;
@@ -770,7 +770,7 @@ void DISPLAY::GETVRAM_1_400L_GREEN(int yoff, scrntype_t *p)
770770 pixel = gvram_shadow[yoff_d];
771771 uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16);
772772 __DECL_ALIGNED(16) uint16_vec8_t tmp_d;
773- scrntype_vec8_t tmp_dd;
773+ __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
774774 scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
775775
776776 tmp_d.v = ppx->v;
@@ -794,7 +794,7 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px,
794794 {
795795 uint32_t b3, r3, g3;
796796 uint8_t bb[4], rr[4], gg[4];
797- uint16_vec8_t pixels;
797+ __DECL_ALIGNED(16) uint16_vec8_t pixels;
798798 __DECL_ALIGNED(16) const uint16_t __masks[8] = {(uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask};
799799 scrntype_t b, r, g;
800800 uint32_t idx;;
@@ -841,7 +841,7 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px,
841841 #else
842842 __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[16];
843843 #endif
844- uint16_vec8_t tmp_g, tmp_r, tmp_b;
844+ __DECL_ALIGNED(16) uint16_vec8_t tmp_g, tmp_r, tmp_b;
845845 __v8hi *vp0, *vp1, *vp2, *vp3;
846846 // G
847847 vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[gg[0]][0]), 16);
@@ -895,7 +895,7 @@ __DECL_VECTORIZED_LOOP
895895 tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = analog_palette_pixel[pixels.w[i]];;
896896 }
897897 scrntype_vec8_t *vpx = (scrntype_vec8_t*)__builtin_assume_aligned(px, sizeof(scrntype_vec8_t));
898- scrntype_vec8_t vmask;
898+ __DECL_ALIGNED(32) scrntype_vec8_t vmask;
899899 __DECL_VECTORIZED_LOOP
900900 for(int i = 0; i < 2; i++) {
901901 vp[i].v = dp[i].v;
@@ -957,9 +957,9 @@ void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_li
957957
958958 uint8_t bb[8], rr[8], gg[8];
959959
960- uint16_vec8_t _btmp;
961- uint16_vec8_t _rtmp;
962- uint16_vec8_t _gtmp;
960+ __DECL_ALIGNED(16) uint16_vec8_t _btmp;
961+ __DECL_ALIGNED(16) uint16_vec8_t _rtmp;
962+ __DECL_ALIGNED(16) uint16_vec8_t _gtmp;
963963 uint16_vec8_t *vp0, *vp1, *vp2, *vp3, *vp4, *vp5;
964964 #if !defined(FIXED_FRAMEBUFFER_SIZE)
965965 __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[8];
@@ -1088,7 +1088,7 @@ __DECL_VECTORIZED_LOOP
10881088 dp[i].v = dp[i].v >> 2;
10891089 #endif
10901090 }
1091- scrntype_vec8_t scanline_data;
1091+ __DECL_ALIGNED(32) scrntype_vec8_t scanline_data;
10921092 __DECL_VECTORIZED_LOOP
10931093 for(int i = 0; i < 8; i++) {
10941094 scanline_data.w[i] = RGBA_COLOR(31, 31, 31, 255);
--- a/source/src/vm/z80tvgame/memory.cpp
+++ b/source/src/vm/z80tvgame/memory.cpp
@@ -99,7 +99,7 @@ void MEMORY::draw_screen()
9999 dest[x] = (val & bit) ? col_w : col_b;
100100 }
101101 #else
102- scrntype_vec8_t d;
102+ __DECL_ALIGNED(32) scrntype_vec8_t d;
103103 for(int xx = 32; xx < (240 - 32); xx += 8) {
104104 uint8_t val = ram[offset + (xx >> 3)];
105105 d = ConvertByteToPackedPixel_PixelTbl(val, &pixel_trans_table);
--- a/source/src/vm/z80tvgame/memory.h
+++ b/source/src/vm/z80tvgame/memory.h
@@ -31,7 +31,7 @@ private:
3131
3232 bool inserted;
3333
34- _bit_trans_table_scrn_t pixel_trans_table;
34+ __DECL_ALIGNED(32) _bit_trans_table_scrn_t pixel_trans_table;
3535 public:
3636 MEMORY(VM_TEMPLATE* parent_vm, EMU* parent_emu) : DEVICE(parent_vm, parent_emu)
3737 {
Show on old repository browser