減色プログラム
修订版 | 31c64059d97d88db5b8c42a786bf15aeae293c6e (tree) |
---|---|
时间 | 2011-05-17 03:28:02 |
作者 | berupon <berupon@gmai...> |
Commiter | berupon |
optimized
@@ -67,6 +67,17 @@ struct Color4f | ||
67 | 67 | result -= rhs; |
68 | 68 | return result; |
69 | 69 | } |
70 | + | |
71 | + Color4f& operator *= (const Color4f& rhs) { | |
72 | + v = _mm_mul_ps(v, rhs.v); | |
73 | + return *this; | |
74 | + } | |
75 | + | |
76 | + Color4f operator * (const Color4f& rhs) { | |
77 | + Color4f result(*this); | |
78 | + result *= rhs; | |
79 | + return result; | |
80 | + } | |
70 | 81 | |
71 | 82 | Color4f& operator *= (float scalar) { |
72 | 83 | __m128 s = _mm_set1_ps(scalar); |
@@ -95,6 +106,9 @@ struct Color4f | ||
95 | 106 | return result; |
96 | 107 | } |
97 | 108 | |
109 | + void zero() { | |
110 | + v = _mm_setzero_ps(); | |
111 | + } | |
98 | 112 | }; |
99 | 113 | |
100 | 114 | inline Color4f operator * (float scalar, const Color4f& c) { |
@@ -79,7 +79,9 @@ void random_permutation_2d( | ||
79 | 79 | |
80 | 80 | void init_image(Image4f& image) |
81 | 81 | { |
82 | - std::fill(image.pBuff_, image.pBuff_+image.width_*image.height_, Color4f(0.0f,0.0f,0.0f,0.0f)); | |
82 | + Color4f z; | |
83 | + z.zero(); | |
84 | + std::fill(image.pBuff_, image.pBuff_+image.width_*image.height_, z); | |
83 | 85 | } |
84 | 86 | |
85 | 87 | void compute_b_array( |
@@ -95,7 +97,8 @@ void compute_b_array( | ||
95 | 97 | int offset_y = (b.height_ - 1)/2 - radius_height; |
96 | 98 | for (int j_y=0; j_y < b.height_; j_y++) { |
97 | 99 | for (int j_x=0; j_x < b.width_; j_x++) { |
98 | - Color4f sum(0.0f,0.0f,0.0f,0.0f); | |
100 | + Color4f sum; | |
101 | + sum.zero(); | |
99 | 102 | for (int k_y=0; k_y < filter_weights.height_; k_y++) { |
100 | 103 | for (int k_x = 0; k_x < filter_weights.width_; k_x++) { |
101 | 104 | if (k_x+offset_x >= j_x - radius_width && |
@@ -123,8 +126,11 @@ Color4f b_value(const Image4f& b, int i_x, int i_y, int j_x, int j_y) | ||
123 | 126 | int k_y = j_y - i_y + radius_height; |
124 | 127 | if (k_x >= 0 && k_y >= 0 && k_x < b.width_ && k_y < b.height_) |
125 | 128 | return b[k_y][k_x]; |
126 | - else | |
127 | - return Color4f(0.0f, 0.0f, 0.0f, 0.0f); | |
129 | + else { | |
130 | + Color4f z; | |
131 | + z.zero(); | |
132 | + return z; | |
133 | + } | |
128 | 134 | } |
129 | 135 | |
130 | 136 | void compute_a_image(const Image4f& image, const Image4f& b, Image4f& a) |
@@ -133,7 +139,8 @@ void compute_a_image(const Image4f& image, const Image4f& b, Image4f& a) | ||
133 | 139 | radius_height = (b.height_ - 1)/2; |
134 | 140 | for (int i_y = 0; i_y < a.height_; i_y++) { |
135 | 141 | for (int i_x = 0; i_x < a.width_; i_x++) { |
136 | - Color4f sum(0,0,0,0); | |
142 | + Color4f sum; | |
143 | + sum.zero(); | |
137 | 144 | for (int j_y = i_y - radius_height; j_y <= i_y + radius_height; j_y++) { |
138 | 145 | if (j_y < 0) j_y = 0; |
139 | 146 | if (j_y >= a.height_) break; |
@@ -266,7 +273,8 @@ void compute_initial_s( | ||
266 | 273 | int coarse_height = coarse_variables.height_; |
267 | 274 | int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2; |
268 | 275 | Color4f center_b = b_value(b,0,0,0,0); |
269 | - Color4f zero_vector(0,0,0,0); | |
276 | + Color4f zero_vector; | |
277 | + zero_vector.zero(); | |
270 | 278 | for (int v=0; v<palette_size; v++) { |
271 | 279 | for (int alpha=v; alpha<palette_size; alpha++) { |
272 | 280 | s[alpha][v] = zero_vector; |
@@ -281,11 +289,10 @@ void compute_initial_s( | ||
281 | 289 | if (i_x == j_x && i_y == j_y) continue; |
282 | 290 | Color4f b_ij = b_value(b,i_x,i_y,j_x,j_y); |
283 | 291 | for (int v=0; v<palette_size; v++) { |
292 | + float vcv = coarse_variables[v][i_y][i_x]; | |
284 | 293 | for (int alpha=v; alpha<palette_size; alpha++) { |
285 | - float mult = coarse_variables[v][i_y][i_x] * coarse_variables[alpha][j_y][j_x]; | |
286 | - s[alpha][v][0] += mult * b_ij[0]; | |
287 | - s[alpha][v][1] += mult * b_ij[1]; | |
288 | - s[alpha][v][2] += mult * b_ij[2]; | |
294 | + float mult = vcv * coarse_variables[alpha][j_y][j_x]; | |
295 | + s[alpha][v] += mult * b_ij; | |
289 | 296 | } |
290 | 297 | } |
291 | 298 | } |
@@ -317,17 +324,14 @@ void update_s( | ||
317 | 324 | for (int i_x=max(0, j_x - center_x); i_x<max_i_x; i_x++) { |
318 | 325 | Color4f delta_b_ij = delta*b_value(b,i_x,i_y,j_x,j_y); |
319 | 326 | if (i_x == j_x && i_y == j_y) continue; |
327 | + Color4f* ps = s[alpha]; | |
320 | 328 | for (int v=0; v <= alpha; v++) { |
321 | 329 | float mult = coarse_variables[v][i_y][i_x]; |
322 | - s[alpha][v][0] += mult * delta_b_ij[0]; | |
323 | - s[alpha][v][1] += mult * delta_b_ij[1]; | |
324 | - s[alpha][v][2] += mult * delta_b_ij[2]; | |
330 | + ps[v] += mult * delta_b_ij; | |
325 | 331 | } |
326 | 332 | for (int v=alpha; v<palette_size; v++) { |
327 | 333 | float mult = coarse_variables[v][i_y][i_x]; |
328 | - s[v][alpha][0] += mult * delta_b_ij[0]; | |
329 | - s[v][alpha][1] += mult * delta_b_ij[1]; | |
330 | - s[v][alpha][2] += mult * delta_b_ij[2]; | |
334 | + s[v][alpha] += mult * delta_b_ij; | |
331 | 335 | } |
332 | 336 | } |
333 | 337 | } |
@@ -351,7 +355,8 @@ void refine_palette( | ||
351 | 355 | |
352 | 356 | Color4f r[256]; |
353 | 357 | for (unsigned int v=0; v<num_colors; v++) { |
354 | - Color4f sum(0,0,0,0); | |
358 | + Color4f sum; | |
359 | + sum.zero(); | |
355 | 360 | for (int i_y=0; i_y<coarse_variables.height_; i_y++) { |
356 | 361 | for (int i_x=0; i_x<coarse_variables.width_; i_x++) { |
357 | 362 | float cv = coarse_variables[v][i_y][i_x]; |
@@ -391,7 +396,8 @@ void compute_initial_j_palette_sum( | ||
391 | 396 | { |
392 | 397 | for (int j_y=0; j_y<coarse_variables.height_; ++j_y) { |
393 | 398 | for (int j_x=0; j_x<coarse_variables.width_; ++j_x) { |
394 | - Color4f palette_sum = Color4f(0,0,0,0); | |
399 | + Color4f palette_sum; | |
400 | + palette_sum.zero(); | |
395 | 401 | for (size_t alpha=0; alpha<num_colors; ++alpha) { |
396 | 402 | palette_sum += coarse_variables[alpha][j_y][j_x]*palette[alpha]; |
397 | 403 | } |
@@ -444,7 +450,8 @@ void spatial_color_quant( | ||
444 | 450 | Image4f& bi = *p_bi; |
445 | 451 | for (int J_y=0; J_y<bi.height_; J_y++) { |
446 | 452 | for (int J_x=0; J_x<bi.width_; J_x++) { |
447 | - Color4f sum(0,0,0,0); | |
453 | + Color4f sum; | |
454 | + sum.zero(); | |
448 | 455 | for (int i_y=radius_height*2; i_y<radius_height*2+2; i_y++) { |
449 | 456 | for (int i_x=radius_width*2; i_x<radius_width*2+2; i_x++) { |
450 | 457 | for (int j_y=J_y*2; j_y<J_y*2+2; j_y++) { |
@@ -508,7 +515,8 @@ void spatial_color_quant( | ||
508 | 515 | visit_queue.pop_front(); |
509 | 516 | |
510 | 517 | // Compute (25) |
511 | - Color4f p_i(0.0f, 0.0f, 0.0f, 0.0f); | |
518 | + Color4f p_i; | |
519 | + p_i.zero(); | |
512 | 520 | for (int y=0; y<b.height_; y++) { |
513 | 521 | for (int x=0; x<b.width_; x++) { |
514 | 522 | int j_x = x - center_x + i_x, j_y = y - center_y + i_y; |
@@ -516,9 +524,7 @@ void spatial_color_quant( | ||
516 | 524 | if (j_x < 0 || j_y < 0 || j_x >= coarse_variables.width_ || j_y >= coarse_variables.height_) continue; |
517 | 525 | Color4f b_ij = b_value(b, i_x, i_y, j_x, j_y); |
518 | 526 | Color4f j_pal = (*j_palette_sum)[j_y][j_x]; |
519 | - p_i[0] += b_ij[0]*j_pal[0]; | |
520 | - p_i[1] += b_ij[1]*j_pal[1]; | |
521 | - p_i[2] += b_ij[2]*j_pal[2]; | |
527 | + p_i += b_ij * j_pal; | |
522 | 528 | } |
523 | 529 | } |
524 | 530 | p_i *= 2.0; |
@@ -555,9 +561,7 @@ void spatial_color_quant( | ||
555 | 561 | if (new_val >= 1) new_val = 1 - 1e-10; |
556 | 562 | float delta_m_iv = new_val - coarse_variables[v][i_y][i_x]; |
557 | 563 | coarse_variables[v][i_y][i_x] = new_val; |
558 | - j_pal[0] += delta_m_iv*palette[v][0]; | |
559 | - j_pal[1] += delta_m_iv*palette[v][1]; | |
560 | - j_pal[2] += delta_m_iv*palette[v][2]; | |
564 | + j_pal += delta_m_iv * palette[v]; | |
561 | 565 | if (abs(delta_m_iv) > 0.001 && !skip_palette_maintenance) { |
562 | 566 | update_s(s, coarse_variables, b, i_x, i_y, v, delta_m_iv); |
563 | 567 | } |