• R/O
  • HTTP
  • SSH
  • HTTPS

提交

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

減色プログラム


Commit MetaInfo

修订版31c64059d97d88db5b8c42a786bf15aeae293c6e (tree)
时间2011-05-17 03:28:02
作者berupon <berupon@gmai...>
Commiterberupon

Log Message

optimized

更改概述

差异

--- a/Color4f.h
+++ b/Color4f.h
@@ -67,6 +67,17 @@ struct Color4f
6767 result -= rhs;
6868 return result;
6969 }
70+
71+ Color4f& operator *= (const Color4f& rhs) {
72+ v = _mm_mul_ps(v, rhs.v);
73+ return *this;
74+ }
75+
76+ Color4f operator * (const Color4f& rhs) {
77+ Color4f result(*this);
78+ result *= rhs;
79+ return result;
80+ }
7081
7182 Color4f& operator *= (float scalar) {
7283 __m128 s = _mm_set1_ps(scalar);
@@ -95,6 +106,9 @@ struct Color4f
95106 return result;
96107 }
97108
109+ void zero() {
110+ v = _mm_setzero_ps();
111+ }
98112 };
99113
100114 inline Color4f operator * (float scalar, const Color4f& c) {
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -79,7 +79,9 @@ void random_permutation_2d(
7979
8080 void init_image(Image4f& image)
8181 {
82- std::fill(image.pBuff_, image.pBuff_+image.width_*image.height_, Color4f(0.0f,0.0f,0.0f,0.0f));
82+ Color4f z;
83+ z.zero();
84+ std::fill(image.pBuff_, image.pBuff_+image.width_*image.height_, z);
8385 }
8486
8587 void compute_b_array(
@@ -95,7 +97,8 @@ void compute_b_array(
9597 int offset_y = (b.height_ - 1)/2 - radius_height;
9698 for (int j_y=0; j_y < b.height_; j_y++) {
9799 for (int j_x=0; j_x < b.width_; j_x++) {
98- Color4f sum(0.0f,0.0f,0.0f,0.0f);
100+ Color4f sum;
101+ sum.zero();
99102 for (int k_y=0; k_y < filter_weights.height_; k_y++) {
100103 for (int k_x = 0; k_x < filter_weights.width_; k_x++) {
101104 if (k_x+offset_x >= j_x - radius_width &&
@@ -123,8 +126,11 @@ Color4f b_value(const Image4f& b, int i_x, int i_y, int j_x, int j_y)
123126 int k_y = j_y - i_y + radius_height;
124127 if (k_x >= 0 && k_y >= 0 && k_x < b.width_ && k_y < b.height_)
125128 return b[k_y][k_x];
126- else
127- return Color4f(0.0f, 0.0f, 0.0f, 0.0f);
129+ else {
130+ Color4f z;
131+ z.zero();
132+ return z;
133+ }
128134 }
129135
130136 void compute_a_image(const Image4f& image, const Image4f& b, Image4f& a)
@@ -133,7 +139,8 @@ void compute_a_image(const Image4f& image, const Image4f& b, Image4f& a)
133139 radius_height = (b.height_ - 1)/2;
134140 for (int i_y = 0; i_y < a.height_; i_y++) {
135141 for (int i_x = 0; i_x < a.width_; i_x++) {
136- Color4f sum(0,0,0,0);
142+ Color4f sum;
143+ sum.zero();
137144 for (int j_y = i_y - radius_height; j_y <= i_y + radius_height; j_y++) {
138145 if (j_y < 0) j_y = 0;
139146 if (j_y >= a.height_) break;
@@ -266,7 +273,8 @@ void compute_initial_s(
266273 int coarse_height = coarse_variables.height_;
267274 int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2;
268275 Color4f center_b = b_value(b,0,0,0,0);
269- Color4f zero_vector(0,0,0,0);
276+ Color4f zero_vector;
277+ zero_vector.zero();
270278 for (int v=0; v<palette_size; v++) {
271279 for (int alpha=v; alpha<palette_size; alpha++) {
272280 s[alpha][v] = zero_vector;
@@ -281,11 +289,10 @@ void compute_initial_s(
281289 if (i_x == j_x && i_y == j_y) continue;
282290 Color4f b_ij = b_value(b,i_x,i_y,j_x,j_y);
283291 for (int v=0; v<palette_size; v++) {
292+ float vcv = coarse_variables[v][i_y][i_x];
284293 for (int alpha=v; alpha<palette_size; alpha++) {
285- float mult = coarse_variables[v][i_y][i_x] * coarse_variables[alpha][j_y][j_x];
286- s[alpha][v][0] += mult * b_ij[0];
287- s[alpha][v][1] += mult * b_ij[1];
288- s[alpha][v][2] += mult * b_ij[2];
294+ float mult = vcv * coarse_variables[alpha][j_y][j_x];
295+ s[alpha][v] += mult * b_ij;
289296 }
290297 }
291298 }
@@ -317,17 +324,14 @@ void update_s(
317324 for (int i_x=max(0, j_x - center_x); i_x<max_i_x; i_x++) {
318325 Color4f delta_b_ij = delta*b_value(b,i_x,i_y,j_x,j_y);
319326 if (i_x == j_x && i_y == j_y) continue;
327+ Color4f* ps = s[alpha];
320328 for (int v=0; v <= alpha; v++) {
321329 float mult = coarse_variables[v][i_y][i_x];
322- s[alpha][v][0] += mult * delta_b_ij[0];
323- s[alpha][v][1] += mult * delta_b_ij[1];
324- s[alpha][v][2] += mult * delta_b_ij[2];
330+ ps[v] += mult * delta_b_ij;
325331 }
326332 for (int v=alpha; v<palette_size; v++) {
327333 float mult = coarse_variables[v][i_y][i_x];
328- s[v][alpha][0] += mult * delta_b_ij[0];
329- s[v][alpha][1] += mult * delta_b_ij[1];
330- s[v][alpha][2] += mult * delta_b_ij[2];
334+ s[v][alpha] += mult * delta_b_ij;
331335 }
332336 }
333337 }
@@ -351,7 +355,8 @@ void refine_palette(
351355
352356 Color4f r[256];
353357 for (unsigned int v=0; v<num_colors; v++) {
354- Color4f sum(0,0,0,0);
358+ Color4f sum;
359+ sum.zero();
355360 for (int i_y=0; i_y<coarse_variables.height_; i_y++) {
356361 for (int i_x=0; i_x<coarse_variables.width_; i_x++) {
357362 float cv = coarse_variables[v][i_y][i_x];
@@ -391,7 +396,8 @@ void compute_initial_j_palette_sum(
391396 {
392397 for (int j_y=0; j_y<coarse_variables.height_; ++j_y) {
393398 for (int j_x=0; j_x<coarse_variables.width_; ++j_x) {
394- Color4f palette_sum = Color4f(0,0,0,0);
399+ Color4f palette_sum;
400+ palette_sum.zero();
395401 for (size_t alpha=0; alpha<num_colors; ++alpha) {
396402 palette_sum += coarse_variables[alpha][j_y][j_x]*palette[alpha];
397403 }
@@ -444,7 +450,8 @@ void spatial_color_quant(
444450 Image4f& bi = *p_bi;
445451 for (int J_y=0; J_y<bi.height_; J_y++) {
446452 for (int J_x=0; J_x<bi.width_; J_x++) {
447- Color4f sum(0,0,0,0);
453+ Color4f sum;
454+ sum.zero();
448455 for (int i_y=radius_height*2; i_y<radius_height*2+2; i_y++) {
449456 for (int i_x=radius_width*2; i_x<radius_width*2+2; i_x++) {
450457 for (int j_y=J_y*2; j_y<J_y*2+2; j_y++) {
@@ -508,7 +515,8 @@ void spatial_color_quant(
508515 visit_queue.pop_front();
509516
510517 // Compute (25)
511- Color4f p_i(0.0f, 0.0f, 0.0f, 0.0f);
518+ Color4f p_i;
519+ p_i.zero();
512520 for (int y=0; y<b.height_; y++) {
513521 for (int x=0; x<b.width_; x++) {
514522 int j_x = x - center_x + i_x, j_y = y - center_y + i_y;
@@ -516,9 +524,7 @@ void spatial_color_quant(
516524 if (j_x < 0 || j_y < 0 || j_x >= coarse_variables.width_ || j_y >= coarse_variables.height_) continue;
517525 Color4f b_ij = b_value(b, i_x, i_y, j_x, j_y);
518526 Color4f j_pal = (*j_palette_sum)[j_y][j_x];
519- p_i[0] += b_ij[0]*j_pal[0];
520- p_i[1] += b_ij[1]*j_pal[1];
521- p_i[2] += b_ij[2]*j_pal[2];
527+ p_i += b_ij * j_pal;
522528 }
523529 }
524530 p_i *= 2.0;
@@ -555,9 +561,7 @@ void spatial_color_quant(
555561 if (new_val >= 1) new_val = 1 - 1e-10;
556562 float delta_m_iv = new_val - coarse_variables[v][i_y][i_x];
557563 coarse_variables[v][i_y][i_x] = new_val;
558- j_pal[0] += delta_m_iv*palette[v][0];
559- j_pal[1] += delta_m_iv*palette[v][1];
560- j_pal[2] += delta_m_iv*palette[v][2];
564+ j_pal += delta_m_iv * palette[v];
561565 if (abs(delta_m_iv) > 0.001 && !skip_palette_maintenance) {
562566 update_s(s, coarse_variables, b, i_x, i_y, v, delta_m_iv);
563567 }