修订版 | 270df40bc2f57d6291010c6a0257bb13a0b6667c (tree) |
---|---|
时间 | 2017-08-23 00:03:11 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
I cleaned up the code and I added an aggregation by nace code. I also am using the cowplot library to save ggplot objects to pdf or png.
@@ -6,6 +6,8 @@ | ||
6 | 6 | library(reshape2) |
7 | 7 | library(magrittr) |
8 | 8 | ## library(ggthemes) |
9 | +library(cowplot) | |
10 | + | |
9 | 11 | library(haven) |
10 | 12 | library(scales) |
11 | 13 | library(ggthemes) |
@@ -13,7 +15,6 @@ | ||
13 | 15 | library(ecp) |
14 | 16 | library(ggthemes) |
15 | 17 | |
16 | - | |
17 | 18 | options( scipen = 16 ) |
18 | 19 | |
19 | 20 | source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R") |
@@ -29,7 +30,7 @@ | ||
29 | 30 | |
30 | 31 | |
31 | 32 | |
32 | -df <- read_dta("eurofundata.dta") | |
33 | +df <- read_dta("eurofundata.dta") %>% as.tibble %>% mutate(nace2digits= substrLeft(Nacecode, 2) ) | |
33 | 34 | |
34 | 35 | |
35 | 36 | df_table_trade <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year) %>% summarise(total.count=n()) |
@@ -37,82 +38,22 @@ | ||
37 | 38 | |
38 | 39 | df_table_all<- df %>% group_by(Country, year) %>% summarise(total.count=n()) |
39 | 40 | |
40 | -## test <- df %>% filter(Country=="Cyprus") %>% filter(TypeofRestructuring %in% trade_restructuring) | |
41 | - | |
42 | - | |
43 | -myformula <- paste("NumberEmployed", "~ year+ Country ") %>% as.formula() | |
44 | - | |
45 | - | |
46 | -df_table1 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} | |
47 | - | |
48 | - | |
49 | - | |
50 | -myformula <- paste("PlannedAVG ", "~ year+ Country ") %>% as.formula() | |
51 | - | |
52 | - | |
53 | -df_table2 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} | |
54 | - | |
55 | - | |
56 | - | |
57 | - | |
58 | 41 | df_table2bis <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year) %>% summarise(avg=sum(PlannedAVG, na.rm=T)) |
59 | 42 | |
60 | 43 | |
61 | - | |
62 | - | |
63 | -df_table3 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) | |
64 | - | |
65 | - | |
66 | - | |
67 | - | |
68 | - | |
69 | - | |
70 | - | |
71 | -myformula <- paste("PlannedAVG ", "~ year+ Country ") %>% as.formula() | |
72 | - | |
73 | - | |
74 | -df_table4 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} %>% mutate(type="Trade") %>% filter(Country != "World") | |
75 | - | |
76 | - | |
77 | - | |
78 | -df_table5 <- df %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} %>% mutate(type="All") %>% filter(Country != "World") | |
79 | - | |
80 | - | |
81 | - | |
82 | -### a better way to do this aggregation (and for several variables in one go) | |
83 | -### is to use group_by and summarise. NB: I treat the missing data as zero. | |
84 | - | |
44 | +df_table3 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year, nace2digits ) %>% summarise(nace2sum=sum(PlannedAVG, na.rm=T)) | |
85 | 45 | |
86 | 46 | |
87 | 47 | df_table4bis <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year) %>% summarise(avg=sum(PlannedAVG, na.rm=T),max=sum(PlannedJobReductionsmax, na.rm=T), min=sum(PlannedJobReductionsmin, na.rm=T), number_emp=sum(NumberEmployed, na.rm=T) ) %>% filter(Country != "World")%>% mutate(type="Trade") |
88 | 48 | |
89 | - | |
90 | - | |
91 | - | |
92 | - | |
93 | - | |
94 | - | |
95 | 49 | df_table5bis <- df %>% group_by(Country, year) %>% summarise(avg=sum(PlannedAVG, na.rm=T),max=sum(PlannedJobReductionsmax, na.rm=T), min=sum(PlannedJobReductionsmin, na.rm=T), number_emp=sum(NumberEmployed, na.rm=T) ) %>% filter(Country != "World")%>% mutate(type="All") |
96 | 50 | |
97 | 51 | |
98 | - | |
99 | 52 | df_tot <- rbind(df_table4bis,df_table5bis) |
100 | 53 | |
101 | 54 | |
102 | 55 | ## There is little difference between min, max and average, so there is no point in adding the min and max |
103 | 56 | |
104 | -## myformula <- paste(" . ~ year+ Country ") %>% as.formula() | |
105 | - | |
106 | - | |
107 | -## df_table6 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% select(c(PlannedAVG,PlannedJobReductionsmax, PlannedJobReductionsmin, year, Country )) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=F)} %>% filter(Country != "World") | |
108 | - | |
109 | - | |
110 | - | |
111 | - | |
112 | -## df_table6bis <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% select(c(PlannedAVG, year, Country )) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} %>% filter(Country != "World") | |
113 | - | |
114 | - | |
115 | - | |
116 | 57 | |
117 | 58 | |
118 | 59 | if (plot_loop == 1){ |
@@ -177,28 +118,7 @@ | ||
177 | 118 | ggsave("synoptic-trade2.pdf", gpl, width=15,height=15) |
178 | 119 | |
179 | 120 | |
180 | - | |
181 | - | |
182 | - | |
183 | - | |
184 | - | |
185 | -## gpl <- ggplot(df_table3, aes(x = PlannedAVG, y = year, group = year)) + | |
186 | -## ## geom_joy(scale = 10, size = 0.25, rel_min_height = 0.03) + | |
187 | -## ## theme_joy() + | |
188 | -## geom_joy()+ | |
189 | 121 | |
190 | -## ## scale_x_continuous(limits=c(1, 200), expand = c(0.01, 0)) + | |
191 | -## ## scale_y_reverse(breaks=c(2000, 1980, 1960, 1940, 1920, 1900), expand = c(0.01, 0)) | |
192 | -## coord_cartesian(xlim=c(0,600))+ | |
193 | - | |
194 | -## scale_x_continuous() + | |
195 | -## scale_y_reverse() | |
196 | - | |
197 | -## ggsave("joyplot_average.pdf", gpl, width=10,height=5) | |
198 | - | |
199 | - | |
200 | - | |
201 | - | |
202 | 122 | lbls <- unique(df_tot$type) |
203 | 123 | |
204 | 124 |
@@ -276,10 +196,13 @@ | ||
276 | 196 | |
277 | 197 | countries <- unique(df_tot$Country) |
278 | 198 | |
199 | +## countries <- "Germany" | |
200 | + | |
279 | 201 | |
280 | 202 | for (mycountry in countries){ |
281 | 203 | |
282 | - | |
204 | + print("mycountry is, ") | |
205 | + print(mycountry) | |
283 | 206 | |
284 | 207 | temp <- df_tot %>% filter(Country == mycountry) |
285 | 208 |
@@ -320,15 +243,87 @@ | ||
320 | 243 | ylab("Average Number Employees") |
321 | 244 | |
322 | 245 | |
323 | -fname <- paste(mycountry, "_single.pdf") | |
246 | +fname <- paste(mycountry, "_single.pdf", sep="") | |
324 | 247 | |
325 | 248 | ggsave(fname, gpl, width=10,height=5) |
326 | 249 | |
327 | 250 | |
328 | -fname <- paste(mycountry, "_single.png") | |
251 | +fname <- paste(mycountry, "_single.png", sep="") | |
329 | 252 | |
330 | 253 | ggsave(fname, gpl, width=10,height=5) |
331 | 254 | |
255 | + | |
256 | + | |
257 | +temp2 <- df_table3 %>% filter(Country==mycountry) | |
258 | + | |
259 | +if (nrow(temp2)>0){ | |
260 | + | |
261 | + gpl <- ggplot(temp2, aes(x=nace2digits, y=nace2sum, | |
262 | + ## colour=indicator, | |
263 | + ## fill=nace2digits | |
264 | + ## group=country | |
265 | + )) + | |
266 | + | |
267 | + | |
268 | + | |
269 | +## theme(legend.position = 'right')+ | |
270 | +## geom_point(size=2, col="black") + | |
271 | +## geom_line(col="black")+ | |
272 | + | |
273 | +## geom_point(size=2) + | |
274 | + | |
275 | +geom_bar(position="dodge", stat="identity", alpha=1, fill="blue" | |
276 | + ) + | |
277 | + | |
278 | + | |
279 | + | |
280 | +#scale_y_continuous(limits=c(0.3,0.9),breaks=seq(0.3, 0.9, by=0.3))+ | |
281 | +#scale_y_continuous(breaks=pretty_breaks(n=5))+ | |
282 | +scale_y_continuous(breaks=pretty_breaks(n=5))+ | |
283 | + | |
284 | +## scale_x_continuous(breaks=integer_breaks(n=5))+ | |
285 | + ## scale_x_continuous(breaks=data_g_temp$year | |
286 | + ## )+ | |
287 | + | |
288 | +my_ggplot_theme(c(0.13,0.8))+ | |
289 | +theme(legend.position = 'right')+ | |
290 | + | |
291 | +facet_wrap( ~ year, ncol = 2, scales = "fixed" )+ | |
292 | + | |
293 | +## scale_fill_gdocs(NULL, breaks=lbls)+ | |
294 | + | |
295 | +labs(title=mycountry)+ | |
296 | +theme(plot.title = element_text(lineheight=.8, size=24, face="bold", vjust=1))+ | |
297 | +## theme(legend.text = element_text(vjust=1,lineheight=1 ))+ | |
298 | +## theme(legend.title = element_text(colour="black", size=16, face="bold"))+ | |
299 | + | |
300 | + | |
301 | +theme(legend.position = "none")+ | |
302 | + | |
303 | + | |
304 | + | |
305 | + | |
306 | +## scale_fill_manual("", breaks=lbls, labels=c("Indirect Funding", "Funding from government"),values=col_seq) + | |
307 | + | |
308 | +## scale_color_manual("", breaks=lbls, labels=c("Indirect Funding", "Funding from government"), values=col_seq) + | |
309 | + | |
310 | + | |
311 | + | |
312 | + | |
313 | +xlab("NACE Division")+ | |
314 | + ylab("Number of Employees") | |
315 | + | |
316 | +fname <- paste(mycountry, "_nace_division.pdf", sep="") | |
317 | + | |
318 | + | |
319 | +save_plot(fname, gpl,base_height=14, base_aspect_ratio=2.5) | |
320 | + | |
321 | + | |
322 | +fname <- paste(mycountry, "_nace_division.png", sep="") | |
323 | + | |
324 | +save_plot(fname, gpl,base_height=14, base_aspect_ratio=2.5) | |
325 | + | |
326 | +} | |
332 | 327 | |
333 | 328 | } |
334 | 329 | } |