• R/O
  • HTTP
  • SSH
  • HTTPS

提交

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

減色プログラム


Commit MetaInfo

修订版dd7e7c9a31088a00784565d88555963ddee35639 (tree)
时间2011-05-21 05:41:53
作者berupon <berupon@gmai...>
Commiterberupon

Log Message

changed color precision to double to decrease visual distortion.
(idk the reason but it's faster than single float...)

更改概述

差异

--- /dev/null
+++ b/Color4d.h
@@ -0,0 +1,138 @@
1+#pragma once
2+
3+struct Color4d
4+{
5+ __m128d v[2];
6+
7+ Color4d() {
8+ ;
9+ }
10+
11+ Color4d(const Color4d& c) {
12+ *this = c;
13+ }
14+
15+ Color4d(double r, double g, double b, double a) {
16+ v[0] = _mm_setr_pd(r,g);
17+ v[1] = _mm_setr_pd(b,a);
18+ }
19+
20+ Color4d& operator = (const Color4d& rhs) {
21+ v[0] = rhs.v[0];
22+ v[1] = rhs.v[1];
23+ return *this;
24+ }
25+
26+ Color4d direct_product(const Color4d& rhs) const {
27+ Color4d result;
28+#if 1
29+ result.v[0] = _mm_mul_pd(v[0], rhs.v[0]);
30+ result.v[1] = _mm_mul_pd(v[1], rhs.v[1]);
31+#else
32+ for (int i=0; i<3; i++) {
33+ result[i] = (*this)[i] * rhs[i];
34+ }
35+#endif
36+ return result;
37+ }
38+
39+ double dot_product(const Color4d& rhs) {
40+// http://www.icnet.ne.jp/~nsystem/simd_tobira/dpps.html
41+ double result = 0;
42+ for (int i=0; i<3; i++) {
43+ result += (*this)[i] * rhs[i];
44+ }
45+ return result;
46+ }
47+
48+ Color4d& operator += (const Color4d& rhs) {
49+ v[0] = _mm_add_pd(v[0], rhs.v[0]);
50+ v[1] = _mm_add_pd(v[1], rhs.v[1]);
51+ return *this;
52+ }
53+
54+ Color4d& operator += (const Color4f& rhs) {
55+ v[0] = _mm_add_pd(v[0], _mm_cvtps_pd(rhs.v));
56+ v[1] = _mm_add_pd(v[1], _mm_cvtps_pd(_mm_movehl_ps(rhs.v, rhs.v)));
57+ return *this;
58+ }
59+
60+ Color4d operator + (const Color4d& rhs) {
61+ Color4d result(*this);
62+ result += rhs;
63+ return result;
64+ }
65+
66+ Color4d& operator -= (const Color4d& rhs) {
67+ v[0] = _mm_sub_pd(v[0], rhs.v[0]);
68+ v[1] = _mm_sub_pd(v[1], rhs.v[1]);
69+ return *this;
70+ }
71+
72+ Color4d operator - (const Color4d& rhs) {
73+ Color4d result(*this);
74+ result -= rhs;
75+ return result;
76+ }
77+
78+ Color4d& operator *= (const Color4d& rhs) {
79+ v[0] = _mm_mul_pd(v[0], rhs.v[0]);
80+ v[1] = _mm_mul_pd(v[1], rhs.v[1]);
81+ return *this;
82+ }
83+
84+ Color4d operator * (const Color4d& rhs) {
85+ Color4d result(*this);
86+ result *= rhs;
87+ return result;
88+ }
89+
90+ Color4d& operator *= (double scalar) {
91+ __m128d s = _mm_set1_pd(scalar);
92+ v[0] = _mm_mul_pd(v[0], s);
93+ v[1] = _mm_mul_pd(v[1], s);
94+ return *this;
95+ }
96+
97+ Color4d operator * (double scalar) {
98+ Color4d result(*this);
99+ result *= scalar;
100+ return result;
101+ }
102+
103+ double& operator[] (int idx) {
104+ switch (idx) {
105+ case 0: return v[0].m128d_f64[0];
106+ case 1: return v[0].m128d_f64[1];
107+ case 2: return v[1].m128d_f64[0];
108+ case 3: return v[1].m128d_f64[1];
109+ }
110+ }
111+ const double& operator[] (int idx) const {
112+ switch (idx) {
113+ case 0: return v[0].m128d_f64[0];
114+ case 1: return v[0].m128d_f64[1];
115+ case 2: return v[1].m128d_f64[0];
116+ case 3: return v[1].m128d_f64[1];
117+ }
118+ }
119+
120+ double norm_squared() {
121+ double result = 0;
122+ for (int i=0; i<3; i++) {
123+ result += (*this)[i] * (*this)[i];
124+ }
125+ return result;
126+ }
127+
128+ void zero() {
129+ v[0] = _mm_setzero_pd();
130+ v[1] = _mm_setzero_pd();
131+ }
132+};
133+
134+inline Color4d operator * (double scalar, const Color4d& c) {
135+ Color4d tmp = c;
136+ return tmp * scalar;
137+}
138+
--- /dev/null
+++ b/Color4f.cpp
@@ -0,0 +1,13 @@
1+#include "stdafx.h"
2+#include "Color4f.h"
3+
4+#include "Color4d.h"
5+
6+Color4f& Color4f::operator = (const Color4d& rhs)
7+{
8+ v = _mm_movelh_ps(
9+ _mm_cvtpd_ps(rhs.v[0]),
10+ _mm_cvtpd_ps(rhs.v[1])
11+ );
12+ return *this;
13+}
--- a/Color4f.h
+++ b/Color4f.h
@@ -4,6 +4,8 @@
44
55 // http://www2.kobe-u.ac.jp/~lerl2/l_cc_p_10.1.008/doc/main_cls/mergedProjects/intref_cls/
66
7+struct Color4d;
8+
79 struct Color4f
810 {
911 __m128 v;
@@ -25,6 +27,8 @@ struct Color4f
2527 return *this;
2628 }
2729
30+ Color4f& operator = (const Color4d& rhs);
31+
2832 Color4f direct_product(const Color4f& rhs) const {
2933 Color4f result;
3034 #if 1
--- a/color_quantizer.vcproj
+++ b/color_quantizer.vcproj
@@ -178,6 +178,10 @@
178178 UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
179179 >
180180 <File
181+ RelativePath=".\Color4f.cpp"
182+ >
183+ </File>
184+ <File
181185 RelativePath=".\lib.cpp"
182186 >
183187 </File>
@@ -220,6 +224,10 @@
220224 >
221225 </File>
222226 <File
227+ RelativePath=".\Color4d.h"
228+ >
229+ </File>
230+ <File
223231 RelativePath=".\Color4f.h"
224232 >
225233 </File>
--- a/main.cpp
+++ b/main.cpp
@@ -27,11 +27,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2727
2828 #include "stdafx.h"
2929
30-#include "Color4f.h"
31-#include "Array.h"
32-
33-typedef Array2D<Color4f> Image4f;
34-
3530 #include <algorithm>
3631 #include <math.h>
3732 #include <stdio.h>
@@ -90,43 +85,43 @@ int _tmain(int argc, _TCHAR* argv[])
9085
9186 size_t width = imageInfo.width;
9287 size_t height = imageInfo.height;
93- Image4f image(width, height);
94- Color4f* pLine = image.pBuff_;
88+ Image image(width, height);
89+ Color* pLine = image.pBuff_;
9590 uint8_t* pSrcLine = p;
9691 for (size_t y=0; y<height; ++y) {
9792 for (size_t x=0; x<width; ++x) {
98- float r = pSrcLine[x*3+0] / 255.0f;
99- float g = pSrcLine[x*3+1] / 255.0f;
100- float b = pSrcLine[x*3+2] / 255.0f;
101- pLine[x] = Color4f(r,g,b,0.0f);
93+ double r = pSrcLine[x*3+0] / 255.0;
94+ double g = pSrcLine[x*3+1] / 255.0;
95+ double b = pSrcLine[x*3+2] / 255.0;
96+ pLine[x] = Color(r,g,b,0.0);
10297 }
10398 pLine += width;
10499 pSrcLine += lineBytes;
105100 }
106- Color4f buff_filter1_weights[1*1];
107- Color4f buff_filter3_weights[3*3];
108- Color4f buff_filter5_weights[5*5];
101+ Color buff_filter1_weights[1*1];
102+ Color buff_filter3_weights[3*3];
103+ Color buff_filter5_weights[5*5];
109104
110- Image4f filter1_weights(1, 1, buff_filter1_weights);
111- Image4f filter3_weights(3, 3, buff_filter3_weights);
112- Image4f filter5_weights(5, 5, buff_filter5_weights);
105+ Image filter1_weights(1, 1, buff_filter1_weights);
106+ Image filter3_weights(3, 3, buff_filter3_weights);
107+ Image filter5_weights(5, 5, buff_filter5_weights);
113108 std::vector<uint8_t> buff_quantized(width * height);
114109 Array2D<uint8_t> quantized_image(width, height, &buff_quantized[0]);
115- Color4f palette[256];
110+ Color palette[256];
116111
117- filter1_weights[0][0] = Color4f(1.0f, 1.0f, 1.0f, 0.0f);
112+ filter1_weights[0][0] = Color(1.0, 1.0, 1.0, 0.0);
118113
119114 for (size_t i=0; i<num_colors; ++i) {
120- palette[i] = Color4f(
121- ((float)rand())/RAND_MAX,
122- ((float)rand())/RAND_MAX,
123- ((float)rand())/RAND_MAX,
124- 0.0f
115+ palette[i] = Color(
116+ ((double)rand())/RAND_MAX,
117+ ((double)rand())/RAND_MAX,
118+ ((double)rand())/RAND_MAX,
119+ 0.0
125120 );
126121 }
127122
128- Array3D<float>* coarse_variables;
129- float dithering_level = 0.09*log((float)image.width_*image.height_) - 0.04*log((float)num_colors) + 0.001;
123+ Array3D<double>* coarse_variables;
124+ double dithering_level = 0.09*log((double)image.width_*image.height_) - 0.04*log((double)num_colors) + 0.001;
130125 if (argc > 1+3) {
131126 dithering_level = _ttof(argv[4]);
132127 if (dithering_level <= 0.0) {
@@ -143,12 +138,12 @@ int _tmain(int argc, _TCHAR* argv[])
143138 }
144139 }
145140
146- float stddev = dithering_level;
147- float sum = 0.0;
141+ double stddev = dithering_level;
142+ double sum = 0.0;
148143 for (int i=0; i<3; i++) {
149144 for (int j=0; j<3; j++) {
150- float w = exp(-sqrt((float)((i-1)*(i-1) + (j-1)*(j-1)))/(stddev*stddev));
151- filter3_weights[i][j] = Color4f(w,w,w,0);
145+ double w = exp(-sqrt((double)((i-1)*(i-1) + (j-1)*(j-1)))/(stddev*stddev));
146+ filter3_weights[i][j] = Color(w,w,w,0);
152147 sum += 3 * w;
153148 }
154149 }
@@ -163,8 +158,8 @@ int _tmain(int argc, _TCHAR* argv[])
163158 sum = 0.0;
164159 for (int i=0; i<5; i++) {
165160 for (int j=0; j<5; j++) {
166- float w = exp(-sqrt((float)((i-2)*(i-2) + (j-2)*(j-2)))/(stddev*stddev));
167- filter5_weights[i][j] = Color4f(w,w,w,0);
161+ double w = exp(-sqrt((double)((i-2)*(i-2) + (j-2)*(j-2)))/(stddev*stddev));
162+ filter5_weights[i][j] = Color(w,w,w,0);
168163 sum += 3 * w;
169164 }
170165 }
@@ -177,7 +172,7 @@ int _tmain(int argc, _TCHAR* argv[])
177172 }
178173 }
179174
180- Image4f* filters[] = {
175+ Image* filters[] = {
181176 NULL,
182177 &filter1_weights,
183178 NULL,
@@ -203,7 +198,7 @@ int _tmain(int argc, _TCHAR* argv[])
203198 for (int y=0; y<height; y++) {
204199 for (int x=0; x<width; x++) {
205200 const uint8_t idx = quantized_image[y][x];
206- Color4f col = palette[idx];
201+ Color col = palette[idx];
207202 c[2] = (unsigned char)(255*col[0]);
208203 c[1] = (unsigned char)(255*col[1]);
209204 c[0] = (unsigned char)(255*col[2]);
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -1,5 +1,7 @@
11 #include "stdafx.h"
22
3+#include "quantize.h"
4+
35 #include <algorithm>
46 #include <math.h>
57 #include <stdio.h>
@@ -11,11 +13,6 @@
1113
1214 using namespace std;
1315
14-#include "Array.h"
15-#include "Color4f.h"
16-
17-typedef Array2D<Color4f> Image4f;
18-
1916 size_t compute_max_coarse_level(size_t width, size_t height)
2017 {
2118 // We want the coarsest layer to have at most MAX_PIXELS pixels
@@ -29,12 +26,12 @@ size_t compute_max_coarse_level(size_t width, size_t height)
2926 return result;
3027 }
3128
32-void fill_random(Array3D<float>& a)
29+void fill_random(Array3D<double>& a)
3330 {
3431 for (size_t y=0; y<a.height_; ++y) {
3532 for (size_t x=0; x<a.width_; ++x) {
3633 for (size_t z=0; z<a.depth_; ++z) {
37- a(x,y,z) = ((float)rand())/RAND_MAX;
34+ a(x,y,z) = ((double)rand())/RAND_MAX;
3835 }
3936 }
4037 }
@@ -42,7 +39,7 @@ void fill_random(Array3D<float>& a)
4239
4340 void random_permutation(
4441 size_t count,
45- vector<size_t>& result
42+ vector<int>& result
4643 )
4744 {
4845 result.clear();
@@ -55,28 +52,28 @@ void random_permutation(
5552 void random_permutation_2d(
5653 size_t width,
5754 size_t height,
58- deque< pair<size_t, size_t> >& result
55+ deque< pair<int, int> >& result
5956 )
6057 {
61- vector<size_t> perm1d;
58+ vector<int> perm1d;
6259 random_permutation(width*height, perm1d);
6360 while (!perm1d.empty()) {
64- size_t idx = perm1d.back();
61+ int idx = perm1d.back();
6562 perm1d.pop_back();
66- result.push_back(pair<size_t,size_t>(idx % width, idx / width));
63+ result.push_back(pair<int,int>(idx % width, idx / width));
6764 }
6865 }
6966
70-void init_image(Image4f& image)
67+void init_image(Image& image)
7168 {
72- Color4f z;
69+ Color z;
7370 z.zero();
7471 std::fill(image.pBuff_, image.pBuff_+image.width_*image.height_, z);
7572 }
7673
7774 void compute_b_array(
78- const Image4f& filter_weights,
79- Image4f& b
75+ const Image& filter_weights,
76+ Image& b
8077 )
8178 {
8279 // Assume that the pixel i is always located at the center of b,
@@ -87,7 +84,7 @@ void compute_b_array(
8784 int offset_y = (b.height_ - 1)/2 - radius_height;
8885 for (int j_y=0; j_y<b.height_; ++j_y) {
8986 for (int j_x=0; j_x<b.width_; ++j_x) {
90- Color4f sum;
87+ Color sum;
9188 sum.zero();
9289 for (int k_y=0; k_y < filter_weights.height_; ++k_y) {
9390 for (int k_x = 0; k_x < filter_weights.width_; ++k_x) {
@@ -109,7 +106,7 @@ void compute_b_array(
109106 }
110107
111108 __forceinline
112-Color4f b_value(const Image4f& b, int i_x, int i_y, int j_x, int j_y)
109+Color b_value(const Image& b, int i_x, int i_y, int j_x, int j_y)
113110 {
114111 int radius_width = (b.width_ - 1)/2;
115112 int radius_height = (b.height_ - 1)/2;
@@ -118,19 +115,19 @@ Color4f b_value(const Image4f& b, int i_x, int i_y, int j_x, int j_y)
118115 if (k_x >= 0 && k_y >= 0 && k_x < b.width_ && k_y < b.height_)
119116 return b[k_y][k_x];
120117 else {
121- Color4f z;
118+ Color z;
122119 z.zero();
123120 return z;
124121 }
125122 }
126123
127-void compute_a_image(const Image4f& image, const Image4f& b, Image4f& a)
124+void compute_a_image(const Image& image, const Image& b, Image& a)
128125 {
129126 int radius_width = (b.width_ - 1)/2;
130127 int radius_height = (b.height_ - 1)/2;
131128 for (int i_y = 0; i_y<a.height_; ++i_y) {
132129 for (int i_x = 0; i_x<a.width_; ++i_x) {
133- Color4f sum;
130+ Color sum;
134131 sum.zero();
135132 for (int j_y = i_y - radius_height; j_y <= i_y + radius_height; ++j_y) {
136133 if (j_y < 0) j_y = 0;
@@ -149,14 +146,16 @@ void compute_a_image(const Image4f& image, const Image4f& b, Image4f& a)
149146 }
150147
151148 void sum_coarsen(
152- const Image4f& fine,
153- Image4f& coarse
149+ const Image& fine,
150+ Image& coarse
154151 )
155152 {
156153 for (size_t y=0; y<coarse.height_; ++y) {
157154 for (size_t x=0; x<coarse.width_; ++x) {
158- float divisor = 1.0;
159- Color4f val = fine[y*2][x*2];
155+ double divisor = 1.0;
156+ Color val;
157+ val.zero();
158+ val += fine[y*2][x*2];
160159 if (x*2 + 1 < fine.width_) {
161160 divisor += 1; val += fine[y*2][x*2 + 1];
162161 }
@@ -172,9 +171,9 @@ void sum_coarsen(
172171 }
173172 }
174173
175-Array2D<float> extract_vector_layer_2d(const Image4f& s, size_t k)
174+Array2D<double> extract_vector_layer_2d(const Image& s, size_t k)
176175 {
177- Array2D<float> result(s.width_, s.height_);
176+ Array2D<double> result(s.width_, s.height_);
178177 for (size_t y=0; y<s.height_; ++y) {
179178 for (size_t x=0; x<s.width_; ++x) {
180179 result[y][x] = s[y][x][k];
@@ -183,9 +182,9 @@ Array2D<float> extract_vector_layer_2d(const Image4f& s, size_t k)
183182 return result;
184183 }
185184
186-vector<float> extract_vector_layer_1d(const Color4f* s, size_t sz, size_t k)
185+vector<double> extract_vector_layer_1d(const Color* s, size_t sz, size_t k)
187186 {
188- vector<float> result(sz);
187+ vector<double> result(sz);
189188 for (size_t i=0; i<sz; ++i) {
190189 result[i] = s[i][k];
191190 }
@@ -193,15 +192,15 @@ vector<float> extract_vector_layer_1d(const Color4f* s, size_t sz, size_t k)
193192 }
194193
195194 size_t best_match_color(
196- const Array3D<float>& vars,
195+ const Array3D<double>& vars,
197196 size_t i_x,
198197 size_t i_y,
199- const Color4f* palette,
198+ const Color* palette,
200199 size_t num_colors
201200 )
202201 {
203202 size_t max_v = 0;
204- float max_weight = vars(i_x,i_y,0);
203+ double max_weight = vars(i_x,i_y,0);
205204 for (size_t v=1; v<num_colors; ++v) {
206205 if (vars(i_x,i_y,v) > max_weight) {
207206 max_v = v;
@@ -211,38 +210,29 @@ size_t best_match_color(
211210 return max_v;
212211 }
213212
214-void zoom(const Array3D<float>& small, Array3D<float>& big)
213+void zoom(const Array3D<double>& small, Array3D<double>& big)
215214 {
216215 // Simple scaling of the weights array based on mixing the four
217216 // pixels falling under each fine pixel, weighted by area.
218217 // To mix the pixels a little, we assume each fine pixel
219218 // is 1.2 fine pixels wide and high.
220- for (size_t y=0; y<big.height_/2*2; ++y) {
221- for (size_t x=0; x<big.width_/2*2; ++x) {
222- float left = max(0.0, (x-0.1)/2.0);
223- float right = min(small.width_-0.001, (x+1.1)/2.0);
224- float top = max(0.0, (y-0.1)/2.0);
225- float bottom = min(small.height_-0.001, (y+1.1)/2.0);
226- size_t x_left = (size_t)floor(left);
227- size_t x_right = (size_t)floor(right);
228- size_t y_top = (size_t)floor(top);
229- size_t y_bottom = (size_t)floor(bottom);
230- float area = (right-left)*(bottom-top);
231- float inv_area = 1.0 / area;
232- float left2 = (ceil(left) - left);
233- float top2 = (ceil(top) - top);
234- float bottom2 = (bottom - floor(bottom));
235- float right2 = (right - floor(right));
236- float top_left_weight = left2 * top2 * inv_area;
237- float top_right_weight = right2 * top2 * inv_area;
238- float bottom_left_weight = left2 * bottom2 * inv_area;
239- float bottom_right_weight = right2 * bottom2 * inv_area;
240- float top_weight = (right-left) * top2 * inv_area;
241- float bottom_weight = (right-left) * bottom2 * inv_area;
242- float left_weight = (bottom-top) * left2 * inv_area;
243- float right_weight = (bottom-top) * right2 * inv_area;
244- for (size_t z=0; z<big.depth_; ++z) {
245- float val;
219+ for (int y=0; y<big.height_/2*2; y++) {
220+ for (int x=0; x<big.width_/2*2; x++) {
221+ double left = max(0.0, (x-0.1)/2.0), right = min(small.width_-0.001, (x+1.1)/2.0);
222+ double top = max(0.0, (y-0.1)/2.0), bottom = min(small.height_-0.001, (y+1.1)/2.0);
223+ int x_left = (int)floor(left), x_right = (int)floor(right);
224+ int y_top = (int)floor(top), y_bottom = (int)floor(bottom);
225+ double area = (right-left)*(bottom-top);
226+ double top_left_weight = (ceil(left) - left)*(ceil(top) - top)/area;
227+ double top_right_weight = (right - floor(right))*(ceil(top) - top)/area;
228+ double bottom_left_weight = (ceil(left) - left)*(bottom - floor(bottom))/area;
229+ double bottom_right_weight = (right - floor(right))*(bottom - floor(bottom))/area;
230+ double top_weight = (right-left)*(ceil(top) - top)/area;
231+ double bottom_weight = (right-left)*(bottom - floor(bottom))/area;
232+ double left_weight = (bottom-top)*(ceil(left) - left)/area;
233+ double right_weight = (bottom-top)*(right - floor(right))/area;
234+ for (int z=0; z<big.depth_; z++) {
235+ double val;
246236 if (x_left == x_right && y_top == y_bottom) {
247237 val = small(x_left,y_top,z);
248238 } else if (x_left == x_right) {
@@ -264,9 +254,9 @@ void zoom(const Array3D<float>& small, Array3D<float>& big)
264254 }
265255
266256 void compute_initial_s(
267- Image4f& s,
268- const Array3D<float>& coarse_variables,
269- const Image4f& b
257+ Image& s,
258+ const Array3D<double>& coarse_variables,
259+ const Image& b
270260 )
271261 {
272262 init_image(s);
@@ -280,8 +270,8 @@ void compute_initial_s(
280270 __FUNCTION__, palette_size, coarse_width, coarse_height
281271 );
282272
283- Color4f center_b = b_value(b,0,0,0,0);
284- Color4f zero_vector;
273+ Color center_b = b_value(b,0,0,0,0);
274+ Color zero_vector;
285275 zero_vector.zero();
286276 for (size_t v=0; v<palette_size; ++v) {
287277 for (size_t alpha=v; alpha<palette_size; ++alpha) {
@@ -290,17 +280,17 @@ void compute_initial_s(
290280 }
291281 for (int i_y=0; i_y<coarse_height; ++i_y) {
292282 for (int i_x=0; i_x<coarse_width; ++i_x) {
293- const float* p_icv = &coarse_variables(i_x, i_y, 0);
283+ const double* p_icv = &coarse_variables(i_x, i_y, 0);
294284 const int max_j_x = min<int>(coarse_width, i_x - center_x + b.width_);
295285 const int max_j_y = min<int>(coarse_height, i_y - center_y + b.height_);
296286 for (size_t j_y=max<int>(0, i_y - center_y); j_y<max_j_y; ++j_y) {
297287 for (int j_x=max<int>(0, i_x - center_x); j_x<max_j_x; ++j_x) {
298288 if (i_x == j_x && i_y == j_y) continue;
299- Color4f b_ij = b_value(b,i_x,i_y,j_x,j_y);
289+ Color b_ij = b_value(b,i_x,i_y,j_x,j_y);
300290 for (size_t v=0; v<palette_size; ++v) {
301- Color4f b_ij2 = b_ij * p_icv[v];
302- const float* p_jcv = &coarse_variables(j_x, j_y, v);
303- Color4f* ps = s.pBuff_ + v * palette_size + v;
291+ Color b_ij2 = b_ij * p_icv[v];
292+ const double* p_jcv = &coarse_variables(j_x, j_y, v);
293+ Color* ps = s.pBuff_ + v * palette_size + v;
304294 // TODO: 変更画像、縦方向ではなく横方向に操作する。後で転置。
305295 // TODO: Color4fの全ての要素が同じなので、Array2D<Color4f> ではなく Array2D<float> で処理してみる。
306296 for (size_t alpha=v; alpha<palette_size; ++alpha) {
@@ -318,12 +308,12 @@ void compute_initial_s(
318308 }
319309
320310 void update_s(
321- Image4f& s,
322- const Array3D<float>& coarse_variables,
323- const Image4f& b,
311+ Image& s,
312+ const Array3D<double>& coarse_variables,
313+ const Image& b,
324314 const int j_x,
325315 const int j_y,
326- const int alpha,
316+ const size_t alpha,
327317 const double delta
328318 )
329319 {
@@ -332,14 +322,14 @@ void update_s(
332322 const int coarse_height = coarse_variables.height_;
333323 const int center_x = (b.width_-1) / 2;
334324 const int center_y = (b.height_-1) / 2;
335- const int max_i_x = min<int>(coarse_width, j_x + center_x + 1);
325+ const int max_i_x = min(coarse_width, j_x + center_x + 1);
336326 const int max_i_y = min<int>(coarse_height, j_y + center_y + 1);
337327 for (size_t i_y=max(0, j_y - center_y); i_y<max_i_y; ++i_y) {
338328 for (size_t i_x=max(0, j_x - center_x); i_x<max_i_x; ++i_x) {
339- const Color4f delta_b_ij = delta * b_value(b,i_x,i_y,j_x,j_y);
329+ const Color delta_b_ij = delta * b_value(b,i_x,i_y,j_x,j_y);
340330 if (i_x == j_x && i_y == j_y) continue;
341- Color4f* ps = s[alpha];
342- const float* p_cv = &coarse_variables(i_x, i_y, 0);
331+ Color* ps = s[alpha];
332+ const double* p_cv = &coarse_variables(i_x, i_y, 0);
343333 for (size_t v=0; v<=alpha; ++v) {
344334 ps[v] += (*p_cv++) * delta_b_ij;
345335 }
@@ -355,10 +345,10 @@ void update_s(
355345 }
356346
357347 void refine_palette(
358- Image4f& s,
359- const Array3D<float>& coarse_variables,
360- const Image4f& a,
361- Color4f* palette,
348+ Image& s,
349+ const Array3D<double>& coarse_variables,
350+ const Image& a,
351+ Color* palette,
362352 size_t num_colors
363353 )
364354 {
@@ -369,15 +359,15 @@ void refine_palette(
369359 }
370360 }
371361
372- Color4f r[256];
362+ Color r[256];
373363 for (size_t v=0; v<num_colors; ++v) {
374- Color4f sum;
364+ Color sum;
375365 sum.zero();
376366 for (size_t i_y=0; i_y<coarse_variables.height_; ++i_y) {
377367 for (size_t i_x=0; i_x<coarse_variables.width_; ++i_x) {
378- float cv = coarse_variables(i_x,i_y,v);
379- Color4f av = a[i_y][i_x];
380- Color4f result = cv * av;
368+ double cv = coarse_variables(i_x,i_y,v);
369+ Color av = a[i_y][i_x];
370+ Color result = cv * av;
381371 sum += result;
382372 }
383373 }
@@ -385,11 +375,11 @@ void refine_palette(
385375 }
386376
387377 for (size_t k=0; k<3; ++k) {
388- Array2D<float> S_k = extract_vector_layer_2d(s, k);
389- vector<float> R_k = extract_vector_layer_1d(&r[0], num_colors, k);
390- vector<float> palette_channel = -1.0f * ((2.0f*S_k).matrix_inverse()) * R_k;
378+ Array2D<double> S_k = extract_vector_layer_2d(s, k);
379+ vector<double> R_k = extract_vector_layer_1d(&r[0], num_colors, k);
380+ vector<double> palette_channel = -1.0 * ((2.0*S_k).matrix_inverse()) * R_k;
391381 for (size_t v=0; v<num_colors; ++v) {
392- float val = palette_channel[v];
382+ double val = palette_channel[v];
393383 if (val < 0) val = 0;
394384 if (val > 1) val = 1;
395385 palette[v][k] = val;
@@ -404,15 +394,15 @@ void refine_palette(
404394 }
405395
406396 void compute_initial_j_palette_sum(
407- Image4f& j_palette_sum,
408- const Array3D<float>& coarse_variables,
409- const Color4f* palette,
397+ Image& j_palette_sum,
398+ const Array3D<double>& coarse_variables,
399+ const Color* palette,
410400 size_t num_colors
411401 )
412402 {
413403 for (size_t j_y=0; j_y<coarse_variables.height_; ++j_y) {
414404 for (size_t j_x=0; j_x<coarse_variables.width_; ++j_x) {
415- Color4f palette_sum;
405+ Color palette_sum;
416406 palette_sum.zero();
417407 for (size_t alpha=0; alpha<num_colors; ++alpha) {
418408 palette_sum += coarse_variables(j_x,j_y,alpha)*palette[alpha];
@@ -423,11 +413,11 @@ void compute_initial_j_palette_sum(
423413 }
424414
425415 void spatial_color_quant(
426- Image4f& image,
427- Image4f& filter_weights,
416+ Image& image,
417+ Image& filter_weights,
428418 Array2D<uint8_t>& quantized_image,
429- Color4f* palette, size_t num_colors,
430- Array3D<float>*& p_coarse_variables,
419+ Color* palette, size_t num_colors,
420+ Array3D<double>*& p_coarse_variables,
431421 double initial_temperature,
432422 double final_temperature,
433423 int temps_per_level,
@@ -438,9 +428,9 @@ void spatial_color_quant(
438428 compute_max_coarse_level(image.width_, image.height_);
439429 size_t width2 = image.width_ >> max_coarse_level;
440430 size_t height2 = image.height_ >> max_coarse_level;
441- p_coarse_variables = new Array3D<float>(width2, height2, num_colors);
431+ p_coarse_variables = new Array3D<double>(width2, height2, num_colors);
442432 // For syntactic convenience
443- Array3D<float>& coarse_variables = *p_coarse_variables;
433+ Array3D<double>& coarse_variables = *p_coarse_variables;
444434 fill_random(coarse_variables);
445435
446436 double temperature = initial_temperature;
@@ -448,13 +438,13 @@ void spatial_color_quant(
448438 // Compute a_i, b_{ij} according to (11)
449439 size_t extended_neighborhood_width = filter_weights.width_*2 - 1;
450440 size_t extended_neighborhood_height = filter_weights.height_*2 - 1;
451- Image4f b0(extended_neighborhood_width, extended_neighborhood_height);
441+ Image b0(extended_neighborhood_width, extended_neighborhood_height);
452442 compute_b_array(filter_weights, b0);
453- Image4f a0(image.width_, image.height_);
443+ Image a0(image.width_, image.height_);
454444 compute_a_image(image, b0, a0);
455445
456446 // Compute a_I^l, b_{IJ}^l according to (18)
457- vector<Image4f*> a_vec, b_vec;
447+ vector<Image*> a_vec, b_vec;
458448 a_vec.push_back(&a0);
459449 b_vec.push_back(&b0);
460450
@@ -462,11 +452,11 @@ void spatial_color_quant(
462452 for (coarse_level=1; coarse_level <= max_coarse_level; ++coarse_level) {
463453 size_t radius_width = (filter_weights.width_ - 1)/2;
464454 size_t radius_height = (filter_weights.height_ - 1)/2;
465- Image4f* p_bi = new Image4f(max<size_t>(3, b_vec.back()->width_-2), max<size_t>(3, b_vec.back()->height_-2));
466- Image4f& bi = *p_bi;
455+ Image* p_bi = new Image(max<size_t>(3, b_vec.back()->width_-2), max<size_t>(3, b_vec.back()->height_-2));
456+ Image& bi = *p_bi;
467457 for (size_t J_y=0; J_y<bi.height_; ++J_y) {
468458 for (size_t J_x=0; J_x<bi.width_; ++J_x) {
469- Color4f sum;
459+ Color sum;
470460 sum.zero();
471461 for (size_t i_y=radius_height*2; i_y<radius_height*2+2; ++i_y) {
472462 for (size_t i_x=radius_width*2; i_x<radius_width*2+2; ++i_x) {
@@ -482,8 +472,8 @@ void spatial_color_quant(
482472 }
483473 b_vec.push_back(p_bi);
484474
485- Image4f* p_ai = new Image4f(image.width_ >> coarse_level, image.height_ >> coarse_level);
486- Image4f& ai = *p_ai;
475+ Image* p_ai = new Image(image.width_ >> coarse_level, image.height_ >> coarse_level);
476+ Image& ai = *p_ai;
487477 sum_coarsen(*a_vec.back(), ai);
488478 a_vec.push_back(p_ai);
489479 }
@@ -497,17 +487,17 @@ void spatial_color_quant(
497487 #endif
498488 size_t iters_at_current_level = 0;
499489 bool skip_palette_maintenance = false;
500- Image4f s(num_colors, num_colors);
490+ Image s(num_colors, num_colors);
501491 compute_initial_s(s, coarse_variables, *b_vec[coarse_level]);
502- Image4f* j_palette_sum =
503- new Image4f(coarse_variables.width_, coarse_variables.height_);
492+ Image* j_palette_sum =
493+ new Image(coarse_variables.width_, coarse_variables.height_);
504494 compute_initial_j_palette_sum(*j_palette_sum, coarse_variables, palette, num_colors);
505495 while (coarse_level >= 0 || temperature > final_temperature) {
506496 // Need to reseat this reference in case we changed p_coarse_variables
507- Array3D<float>& coarse_variables = *p_coarse_variables;
508- Image4f& a = *a_vec[coarse_level];
509- Image4f& b = *b_vec[coarse_level];
510- Color4f middle_b = b_value(b,0,0,0,0);
497+ Array3D<double>& coarse_variables = *p_coarse_variables;
498+ Image& a = *a_vec[coarse_level];
499+ Image& b = *b_vec[coarse_level];
500+ Color middle_b = b_value(b,0,0,0,0);
511501 #if TRACE
512502 cout << "Temperature: " << temperature << endl;
513503 #endif
@@ -516,14 +506,14 @@ void spatial_color_quant(
516506 size_t step_counter = 0;
517507 for (size_t repeat=0; repeat<repeats_per_temp; ++repeat) {
518508 size_t pixels_changed = 0, pixels_visited = 0;
519- deque< pair<size_t, size_t> > visit_queue;
509+ deque< pair<int, int> > visit_queue;
520510 random_permutation_2d(coarse_variables.width_, coarse_variables.height_, visit_queue);
521511
522512 // Compute 2*sum(j in extended neighborhood of i, j != i) b_ij
523513
524514 while (!visit_queue.empty()) {
525515 // If we get to 10% above initial size, just revisit them all
526- if (visit_queue.size() > coarse_variables.width_*coarse_variables.height_*11/10) {
516+ if (visit_queue.size() > coarse_variables.width_*coarse_variables.height_*11.0/10) {
527517 visit_queue.clear();
528518 random_permutation_2d(coarse_variables.width_, coarse_variables.height_, visit_queue);
529519 }
@@ -533,7 +523,7 @@ void spatial_color_quant(
533523 visit_queue.pop_front();
534524
535525 // Compute (25)
536- Color4f p_i;
526+ Color p_i;
537527 p_i.zero();
538528 for (int y=0; y<b.height_; ++y) {
539529 for (int x=0; x<b.width_; ++x) {
@@ -541,8 +531,8 @@ void spatial_color_quant(
541531 int j_y = y - center_y + i_y;
542532 if (i_x == j_x && i_y == j_y) continue;
543533 if (j_x < 0 || j_y < 0 || j_x >= coarse_variables.width_ || j_y >= coarse_variables.height_) continue;
544- Color4f b_ij = b_value(b, i_x, i_y, j_x, j_y);
545- Color4f j_pal = (*j_palette_sum)[j_y][j_x];
534+ Color b_ij = b_value(b, i_x, i_y, j_x, j_y);
535+ Color j_pal = (*j_palette_sum)[j_y][j_x];
546536 p_i += b_ij * j_pal;
547537 }
548538 }
@@ -557,7 +547,8 @@ void spatial_color_quant(
557547 // We can subtract an arbitrary factor to prevent overflow,
558548 // since only the weight relative to the sum matters, so we
559549 // will choose a value that makes the maximum e^100.
560- double m = -(palette[v].dot_product(p_i + middle_b.direct_product(palette[v]))) / temperature;
550+ Color p_i2; p_i2 = p_i;
551+ double m = -(palette[v].dot_product(p_i2 + middle_b.direct_product(palette[v]))) / temperature;
561552 meanfield_logs.push_back(m);
562553 if (m > max_meanfield_log) {
563554 max_meanfield_log = m;
@@ -572,7 +563,7 @@ void spatial_color_quant(
572563 exit(-1);
573564 }
574565 size_t old_max_v = best_match_color(coarse_variables, i_x, i_y, palette, num_colors);
575- Color4f& j_pal = (*j_palette_sum)[i_y][i_x];
566+ Color& j_pal = (*j_palette_sum)[i_y][i_x];
576567 for (size_t v=0; v<num_colors; ++v) {
577568 double new_val = meanfields[v]/meanfield_sum;
578569 // Prevent the matrix S from becoming singular
@@ -636,7 +627,7 @@ void spatial_color_quant(
636627 {
637628 --coarse_level;
638629 if (coarse_level < 0) break;
639- Array3D<float>* p_new_coarse_variables = new Array3D<float>(
630+ Array3D<double>* p_new_coarse_variables = new Array3D<double>(
640631 image.width_ >> coarse_level,
641632 image.height_ >> coarse_level,
642633 num_colors);
@@ -645,7 +636,7 @@ void spatial_color_quant(
645636 p_coarse_variables = p_new_coarse_variables;
646637 iters_at_current_level = 0;
647638 delete j_palette_sum;
648- j_palette_sum = new Image4f((*p_coarse_variables).width_, (*p_coarse_variables).height_);
639+ j_palette_sum = new Image((*p_coarse_variables).width_, (*p_coarse_variables).height_);
649640 compute_initial_j_palette_sum(*j_palette_sum, *p_coarse_variables, palette, num_colors);
650641 skip_palette_maintenance = true;
651642 #ifdef TRACE
@@ -660,7 +651,7 @@ void spatial_color_quant(
660651 // This is normally not used, but is handy sometimes for debugging
661652 while (coarse_level > 0) {
662653 coarse_level--;
663- Array3D<float>* p_new_coarse_variables = new Array3D<float>(
654+ Array3D<double>* p_new_coarse_variables = new Array3D<double>(
664655 image.width_ >> coarse_level,
665656 image.height_ >> coarse_level,
666657 num_colors
@@ -672,7 +663,7 @@ void spatial_color_quant(
672663
673664 {
674665 // Need to reseat this reference in case we changed p_coarse_variables
675- Array3D<float>& coarse_variables = *p_coarse_variables;
666+ Array3D<double>& coarse_variables = *p_coarse_variables;
676667
677668 for (size_t i_x = 0; i_x < image.width_; ++i_x) {
678669 for (size_t i_y = 0; i_y < image.height_; ++i_y) {
--- a/quantize.h
+++ b/quantize.h
@@ -1,11 +1,18 @@
11 #pragma once
22
3+#include "Array.h"
4+#include "Color4f.h"
5+#include "Color4d.h"
6+
7+typedef Color4d Color;
8+typedef Array2D<Color> Image;
9+
310 void spatial_color_quant(
4- Image4f& image,
5- Image4f& filter_weights,
11+ Image& image,
12+ Image& filter_weights,
613 Array2D<uint8_t>& quantized_image,
7- Color4f* palette, size_t num_colors,
8- Array3D<float>*& p_coarse_variables,
14+ Color* palette, size_t num_colors,
15+ Array3D<double>*& p_coarse_variables,
916 double initial_temperature,
1017 double final_temperature,
1118 int temps_per_level,