修订版 | 9126c779d33f363ad14177ae67e61c13d0ca44b3 (tree) |
---|---|
时间 | 2014-09-20 00:05:03 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
Two codes to plot and download data. I also introduce a function to generate a colour palette with many (>9) colours.
@@ -0,0 +1,261 @@ | ||
1 | +rm(list=ls()) | |
2 | + | |
3 | +library(ggplot2) | |
4 | +library(grid) | |
5 | +library(reshape2) | |
6 | +library(RJSDMX) | |
7 | +require(gridExtra) | |
8 | +library(tikzDevice) | |
9 | +library(scales) | |
10 | +library(digest) | |
11 | + | |
12 | + | |
13 | + | |
14 | +replace_country_code <- function(mylist){ | |
15 | + | |
16 | +eu_list <- c("BE", "BG","CZ", "DK", "DE", "EE", "IE", "EL", | |
17 | + "ES", "FR", "IT", "CY", "LV", "LT", "LU", "HU", "MT", | |
18 | + "NL", "AT","PL", "PT", "RO", "SI", "SK", "FI","SE","UK", "HR" ) | |
19 | + | |
20 | +eu_list <- sort(eu_list) | |
21 | + | |
22 | +oecd_list <- c("AUT", "BEL", "BGR", "CYP", "CZE","DEU", "DNK", "EST", "GRC", | |
23 | + "ESP", "FIN", "FRA", "HRV", "HUN", "IRL", "ITA", "LTU", | |
24 | + "LUX", "LVA", "MLT", "NLD", "POL", "PRT", "ROU", "SWE", | |
25 | + "SVN", "SVK", "GBR" | |
26 | + ) | |
27 | + | |
28 | + | |
29 | +for (i in seq(length(oecd_list))){ | |
30 | + | |
31 | +country <- oecd_list[i] | |
32 | + | |
33 | +sel <- which(mylist==country) | |
34 | + | |
35 | +mylist[sel] <- eu_list[i] | |
36 | + | |
37 | + | |
38 | + | |
39 | +} | |
40 | + | |
41 | +return(mylist) | |
42 | + | |
43 | +} | |
44 | + | |
45 | + | |
46 | +# see http://bit.ly/1zbvAti | |
47 | +integer_breaks <- function(n = 5, ...) { | |
48 | + breaker <- pretty_breaks(n, ...) | |
49 | + function(x) { | |
50 | + breaks <- breaker(x) | |
51 | + breaks[breaks == floor(breaks)] | |
52 | + } | |
53 | +} | |
54 | + | |
55 | + | |
56 | +fmt <- function(){ | |
57 | + function(x) { | |
58 | + d <- log10(min(diff(x),na.rm=TRUE)) | |
59 | + if(d < 0) format(x,nsmall = abs(round(d)),scientific = FALSE) else x | |
60 | + } | |
61 | +} | |
62 | + | |
63 | + | |
64 | +fmt2 <- function(n){ | |
65 | + function(x) format(x,nsmall = n,scientific = FALSE) | |
66 | +} | |
67 | + | |
68 | + | |
69 | +drop_last_char <- function(t,n){ | |
70 | + | |
71 | +substr(t, 1, nchar(t)-n) | |
72 | + | |
73 | +} | |
74 | + | |
75 | + | |
76 | +drop_ini_char <- function(t,n){ | |
77 | + | |
78 | +substr(t, (n+1), nchar(t)) | |
79 | + | |
80 | +} | |
81 | + | |
82 | + | |
83 | + | |
84 | +substrRight <- function(x, n){ | |
85 | + substr(x, nchar(x)-n+1, nchar(x)) | |
86 | +} | |
87 | + | |
88 | +substrLeft <- function(x, n){ | |
89 | + substr(x,1, n ) | |
90 | +} | |
91 | + | |
92 | + | |
93 | + | |
94 | +compact_list <- function(data_rd, missing=0){ | |
95 | + | |
96 | +myseq <- seq(length(data_rd)) | |
97 | + | |
98 | +data2 <- c() | |
99 | + | |
100 | +for (i in myseq){ | |
101 | + | |
102 | +temp <- as.data.frame(data_rd[[i]]) | |
103 | + | |
104 | +#remove the missing data | |
105 | + | |
106 | +if (missing==1){ | |
107 | + | |
108 | +sel <- complete.cases(temp) | |
109 | + | |
110 | +temp_year <- as.numeric(row.names(temp)[sel]) | |
111 | + | |
112 | +} else{ | |
113 | + | |
114 | +temp_year <- as.numeric(row.names(temp)) | |
115 | + | |
116 | +} | |
117 | + | |
118 | +namevec <-rep(names(data_rd[i]), length(temp_year)) | |
119 | + | |
120 | +## temp <- temp[sel, ] | |
121 | + | |
122 | +temp2 <- as.data.frame(cbind(temp, temp_year, namevec)) | |
123 | + | |
124 | +data2 <- rbind(data2, temp2) | |
125 | + | |
126 | +} | |
127 | + | |
128 | + | |
129 | + | |
130 | +#data2 <- as.data.frame(data2) | |
131 | + | |
132 | + | |
133 | +names(data2) <- letters[seq(ncol(data2))] | |
134 | + | |
135 | +data2$a <- as.numeric(data2$a) | |
136 | + | |
137 | + | |
138 | + | |
139 | +#print("ncol(data2) is, ") | |
140 | +#print(ncol(data2)) | |
141 | + | |
142 | +return(data2) | |
143 | + | |
144 | +} | |
145 | + | |
146 | + | |
147 | + | |
148 | + | |
149 | + | |
150 | + | |
151 | +my_ggplot_theme <- function(legend_coord){ theme( panel.background = element_rect(fill="gray", | |
152 | + colour = "black", size = 0.5, linetype = 1), | |
153 | + panel.grid.minor = element_blank(), | |
154 | + axis.ticks = element_line(colour = "black", size=1), | |
155 | + axis.ticks.length = unit(0.15, "cm"), | |
156 | + strip.background = element_rect(colour = 'blue', | |
157 | + fill = 'white', size = 1, linetype=1), | |
158 | + strip.text.x = element_text(colour = 'red', angle = 0, | |
159 | + size = 12, hjust = 0.5, | |
160 | + vjust = 0.5, face = 'bold'), | |
161 | + axis.title.x = element_text(size = 20), | |
162 | + axis.title.y = element_text(size = 20, angle=90, vjust=1), | |
163 | + axis.text.x = element_text(size=15, colour="black", vjust=1), | |
164 | + axis.text.y = element_text(size=15, colour="black", hjust=1), | |
165 | + legend.text = element_text(size = 14, vjust=0.4), | |
166 | + legend.title = element_text(size = 24, hjust=0), | |
167 | + legend.position = legend_coord, | |
168 | + legend.title = element_blank(), | |
169 | + legend.background=element_rect(color=NA, fill=NA), | |
170 | + legend.key = element_rect(colour = NA, fill=NA) ) | |
171 | + } | |
172 | + | |
173 | + | |
174 | + | |
175 | + | |
176 | +################################################################ | |
177 | +################################################################ | |
178 | +################################################################ | |
179 | +################################################################ | |
180 | + | |
181 | +data <- getTimeSeries('EUROSTAT', | |
182 | + 'rd_e_berdindr2.A.TOTAL|C|D_E|G|H|J|K|M|N|Q.PC_GDP.BE|BG|CZ|DK|DE|EE|IE|EL|ES|FR|IT|CY|LV|LT|LU|HU|MT|NL|AT|PL|PT|RO|SI|SK|FI|SE|UK|HR') | |
183 | + | |
184 | +data <- compact_list(data) | |
185 | + | |
186 | +data$country <- substrRight(as.character(data$c),2) | |
187 | + | |
188 | +names(data) <- c("value", "year", "indicator", "country") | |
189 | + | |
190 | +data$indicator <- as.character(data$indicator) | |
191 | + | |
192 | +data$value <- as.numeric(data$value) | |
193 | +#data_rd$indicator <- "Total" | |
194 | + | |
195 | +data$indicator <- drop_ini_char(data$indicator,17) | |
196 | +data$indicator <- drop_last_char(data$indicator,10) | |
197 | + | |
198 | +data$indicator <- as.factor(data$indicator) | |
199 | + | |
200 | +berd_all <- data | |
201 | + | |
202 | +#############################################################à | |
203 | +#############################################################à | |
204 | +#############################################################à | |
205 | +#############################################################à | |
206 | + | |
207 | + | |
208 | +data <- getTimeSeries('EUROSTAT', | |
209 | + 'rd_e_berdindr2.A.C10_C11|C17|C19|C20|C22|C23|C25|C26|C27|C28|C29|C31|C32.MIO_NAC.BE|BG|CZ|DK|DE|EE|IE|EL|ES|FR|IT|CY|LV|LT|LU|HU|MT|NL|AT|PL|PT|RO|SI|SK|FI|SE|UK|HR') | |
210 | + | |
211 | + | |
212 | +data <- compact_list(data) | |
213 | + | |
214 | +data$country <- substrRight(as.character(data$c),2) | |
215 | + | |
216 | +names(data) <- c("value", "year", "indicator", "country") | |
217 | + | |
218 | +data$indicator <- as.character(data$indicator) | |
219 | + | |
220 | +data$value <- as.numeric(data$value) | |
221 | + | |
222 | + | |
223 | +data$indicator <- drop_ini_char(data$indicator,17) | |
224 | +data$indicator <- drop_last_char(data$indicator,11) | |
225 | + | |
226 | +data$indicator <- as.factor(data$indicator) | |
227 | + | |
228 | + | |
229 | +berd_sect <- data | |
230 | + | |
231 | +#############################################################à | |
232 | +#############################################################à | |
233 | +#############################################################à | |
234 | + | |
235 | + | |
236 | + | |
237 | +## data_an <- getTimeSeries('OECD','ANBERD_REV4.AUT_MA|BEL_MA|CZE_MA|DEU_MA|DNK_MA|ESP_MA|EST_MA|FIN_MA|FRA_MA|GBR_MA|HUN_MA|ITA_MA|NLD_MA|POL_MA|PRT_MA|ROU_MA|SVK_MA|SVN_MA.NATCUR.DTOTAL|D01T03|D05T09|D10T33|D35T39|D41T43|D45T99') | |
238 | + | |
239 | + | |
240 | +data <- getTimeSeries('OECD','ANBERD_REV4.AUT_MA|BEL_MA|CZE_MA|DEU_MA|DNK_MA|ESP_MA|EST_MA|FIN_MA|FRA_MA|GBR_MA|HUN_MA|ITA_MA|NLD_MA|POL_MA|PRT_MA|ROU_MA|SVK_MA|SVN_MA.NATCUR.DTOTAL|D19|D10T33') | |
241 | + | |
242 | +data <- compact_list(data) | |
243 | + | |
244 | +data$country <- drop_ini_char(as.character(data$c),12) | |
245 | +data$country <- substrLeft(as.character(data$country),3) | |
246 | +data$country <- replace_country_code(data$country) | |
247 | + | |
248 | + | |
249 | +names(data) <- c("value", "year", "indicator", "country") | |
250 | + | |
251 | +data$indicator <- as.character(data$indicator) | |
252 | + | |
253 | +data$value <- as.numeric(data$value) | |
254 | +data$indicator <- drop_ini_char(data$indicator,26) | |
255 | + | |
256 | +data$indicator <- as.character(data$indicator) | |
257 | + | |
258 | +anberd_sect <- data | |
259 | + | |
260 | + | |
261 | +print("So far so good") |
@@ -0,0 +1,223 @@ | ||
1 | +source("download-data-rjsdmx.R") | |
2 | +library(RColorBrewer) | |
3 | +###################################### | |
4 | + | |
5 | +sel_country <- "FI" | |
6 | + | |
7 | +temp <- berd_all | |
8 | + | |
9 | +sel <- which(temp$country==sel_country) | |
10 | + | |
11 | +temp <- temp[sel, ] | |
12 | + | |
13 | +sel <- which(temp$year>2004) | |
14 | +temp <- temp[sel, ] | |
15 | + | |
16 | +temp <- temp[complete.cases(temp), ] | |
17 | + | |
18 | +lbls <- levels(temp$indicator) | |
19 | + | |
20 | + | |
21 | +title_exp <- paste(sel_country, ": R&D Intensity per sector", sep="") | |
22 | + | |
23 | +# see http://bit.ly/1uhCvjk to understand how to create a large palette | |
24 | + | |
25 | +colourCount = length(lbls) | |
26 | +getPalette = colorRampPalette(brewer.pal(9, "Set1")) | |
27 | +mypal <- getPalette(colourCount) | |
28 | +mypal[10] <- "black" | |
29 | + | |
30 | + | |
31 | + | |
32 | +gpl <- ggplot(temp, aes(x=year, y=value , | |
33 | + colour=indicator | |
34 | + )) + | |
35 | + | |
36 | + | |
37 | +my_ggplot_theme(c(0.13, 0.8))+ | |
38 | +theme(legend.position = 'right')+ | |
39 | +geom_point(size=2) + | |
40 | +geom_line()+ | |
41 | + | |
42 | + | |
43 | +scale_colour_manual("", breaks=lbls, values=mypal) + | |
44 | + | |
45 | +#scale_colour_brewer(mypal)+ | |
46 | + | |
47 | + | |
48 | +# scale_color_brewer(palette="Spectral")+ | |
49 | + | |
50 | +scale_y_continuous(breaks=pretty_breaks(n=5))+ | |
51 | + | |
52 | +scale_x_continuous(breaks=integer_breaks(n=5))+ | |
53 | + | |
54 | +labs(title=title_exp)+ | |
55 | +theme(plot.title = element_text(lineheight=.8, size=24, face="bold", vjust=1))+ | |
56 | + | |
57 | + | |
58 | + xlab("Year")+ | |
59 | + ylab("% GDP") | |
60 | + | |
61 | + | |
62 | + | |
63 | +fname <- paste(sel_country, "_berd_intensity.pdf", sep="") | |
64 | + | |
65 | +ggsave(fname, gpl, width=10,height=5) | |
66 | + | |
67 | +fname <- paste(sel_country, "_berd_intensity.png", sep="") | |
68 | +ggsave(fname, gpl, width=10,height=5) | |
69 | + | |
70 | +#################################################################à | |
71 | + | |
72 | +temp <- berd_all | |
73 | + | |
74 | +sel <- which(temp$country==sel_country) | |
75 | + | |
76 | +temp <- temp[sel, ] | |
77 | + | |
78 | +sel <- which(temp$year>2004) | |
79 | +temp <- temp[sel, ] | |
80 | + | |
81 | +temp <- temp[complete.cases(temp), ] | |
82 | + | |
83 | + | |
84 | +sel <- which(temp$indicator=="TOTAL") | |
85 | +temp <- temp[sel, ] | |
86 | + | |
87 | +lbls <- levels(temp$indicator) | |
88 | + | |
89 | + | |
90 | +title_exp <- paste(sel_country, ": R&D Intensity per sector", sep="") | |
91 | + | |
92 | + | |
93 | + | |
94 | + | |
95 | +gpl <- ggplot(temp, aes(x=year, y=value, | |
96 | + colour=indicator)) + | |
97 | + | |
98 | + | |
99 | +my_ggplot_theme(c(0.13, 0.8))+ | |
100 | +theme(legend.position = 'right')+ | |
101 | +geom_point(size=2) + | |
102 | +geom_line()+ | |
103 | + | |
104 | + | |
105 | +scale_colour_manual("", breaks=lbls, values="black") + | |
106 | + | |
107 | +#scale_colour_brewer(mypal)+ | |
108 | + | |
109 | + | |
110 | +# scale_color_brewer(palette="Spectral")+ | |
111 | + | |
112 | +scale_y_continuous(breaks=pretty_breaks(n=5))+ | |
113 | + | |
114 | +scale_x_continuous(breaks=integer_breaks(n=5))+ | |
115 | + | |
116 | +labs(title=title_exp)+ | |
117 | +theme(plot.title = element_text(lineheight=.8, size=24, face="bold", vjust=1))+ | |
118 | + | |
119 | + | |
120 | + xlab("Year")+ | |
121 | + ylab("% GDP") | |
122 | + | |
123 | + | |
124 | + | |
125 | +fname <- paste(sel_country, "_berd_total_intensity.pdf", sep="") | |
126 | + | |
127 | +ggsave(fname, gpl, width=10,height=5) | |
128 | + | |
129 | +fname <- paste(sel_country, "_berd_total_intensity.png", sep="") | |
130 | +ggsave(fname, gpl, width=10,height=5) | |
131 | + | |
132 | + | |
133 | + | |
134 | + | |
135 | + | |
136 | + | |
137 | + | |
138 | + | |
139 | + | |
140 | + | |
141 | + | |
142 | + | |
143 | + | |
144 | + | |
145 | + | |
146 | +################################################## | |
147 | + | |
148 | +temp <- berd_sect | |
149 | + | |
150 | +sel <- which(temp$country==sel_country) | |
151 | + | |
152 | +temp <- temp[sel, ] | |
153 | + | |
154 | +sel <- which(temp$year>2004) | |
155 | +temp <- temp[sel, ] | |
156 | + | |
157 | + | |
158 | +temp <- temp[complete.cases(temp), ] | |
159 | + | |
160 | + | |
161 | +lbls <- levels(temp$indicator) | |
162 | + | |
163 | + | |
164 | + | |
165 | +title_exp <- paste(sel_country, ": R&D Intensity per sector", sep="") | |
166 | + | |
167 | +# see http://bit.ly/1uhCvjk to understand how to create a large palette | |
168 | + | |
169 | +colourCount = length(lbls) | |
170 | +getPalette = colorRampPalette(brewer.pal(9, "Set1")) | |
171 | +mypal <- getPalette(colourCount) | |
172 | +mypal[10] <- "black" | |
173 | + | |
174 | + | |
175 | + | |
176 | +gpl <- ggplot(temp, aes(x=year, y=value , | |
177 | + colour=indicator | |
178 | + )) + | |
179 | + | |
180 | + | |
181 | +my_ggplot_theme(c(0.13, 0.8))+ | |
182 | +theme(legend.position = 'right')+ | |
183 | +geom_point(size=2) + | |
184 | +geom_line()+ | |
185 | + | |
186 | + | |
187 | + | |
188 | + | |
189 | + | |
190 | +scale_colour_manual("", breaks=lbls, values=mypal) + | |
191 | + | |
192 | +#scale_colour_brewer(mypal)+ | |
193 | + | |
194 | + | |
195 | +# scale_color_brewer(palette="Spectral")+ | |
196 | + | |
197 | +scale_y_continuous(breaks=pretty_breaks(n=5))+ | |
198 | + | |
199 | +scale_x_continuous(breaks=integer_breaks(n=5))+ | |
200 | + | |
201 | +labs(title=title_exp)+ | |
202 | +theme(plot.title = element_text(lineheight=.8, size=24, face="bold", vjust=1))+ | |
203 | + | |
204 | + | |
205 | + xlab("Year")+ | |
206 | + ylab("Millions National Currency") | |
207 | + | |
208 | + | |
209 | + | |
210 | +fname <- paste(sel_country, "_berd_sector_intensity.pdf", sep="") | |
211 | + | |
212 | +ggsave(fname, gpl, width=10,height=5) | |
213 | + | |
214 | +fname <- paste(sel_country, "_berd_sector_intensity.png", sep="") | |
215 | +ggsave(fname, gpl, width=10,height=5) | |
216 | + | |
217 | + | |
218 | + | |
219 | + | |
220 | + | |
221 | + | |
222 | + | |
223 | +print("So far so good") |