• R/O
  • SSH

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版270df40bc2f57d6291010c6a0257bb13a0b6667c (tree)
时间2017-08-23 00:03:11
作者Lorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

I cleaned up the code and I added an aggregation by nace code. I also am using the cowplot library to save ggplot objects to pdf or png.

更改概述

差异

diff -r eec95040918c -r 270df40bc2f5 R-codes/eurofund.R
--- a/R-codes/eurofund.R Tue Aug 22 15:36:48 2017 +0200
+++ b/R-codes/eurofund.R Tue Aug 22 17:03:11 2017 +0200
@@ -6,6 +6,8 @@
66 library(reshape2)
77 library(magrittr)
88 ## library(ggthemes)
9+library(cowplot)
10+
911 library(haven)
1012 library(scales)
1113 library(ggthemes)
@@ -13,7 +15,6 @@
1315 library(ecp)
1416 library(ggthemes)
1517
16-
1718 options( scipen = 16 )
1819
1920 source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
@@ -29,7 +30,7 @@
2930
3031
3132
32-df <- read_dta("eurofundata.dta")
33+df <- read_dta("eurofundata.dta") %>% as.tibble %>% mutate(nace2digits= substrLeft(Nacecode, 2) )
3334
3435
3536 df_table_trade <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year) %>% summarise(total.count=n())
@@ -37,82 +38,22 @@
3738
3839 df_table_all<- df %>% group_by(Country, year) %>% summarise(total.count=n())
3940
40-## test <- df %>% filter(Country=="Cyprus") %>% filter(TypeofRestructuring %in% trade_restructuring)
41-
42-
43-myformula <- paste("NumberEmployed", "~ year+ Country ") %>% as.formula()
44-
45-
46-df_table1 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)}
47-
48-
49-
50-myformula <- paste("PlannedAVG ", "~ year+ Country ") %>% as.formula()
51-
52-
53-df_table2 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)}
54-
55-
56-
57-
5841 df_table2bis <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year) %>% summarise(avg=sum(PlannedAVG, na.rm=T))
5942
6043
61-
62-
63-df_table3 <- df %>% filter(TypeofRestructuring %in% trade_restructuring)
64-
65-
66-
67-
68-
69-
70-
71-myformula <- paste("PlannedAVG ", "~ year+ Country ") %>% as.formula()
72-
73-
74-df_table4 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} %>% mutate(type="Trade") %>% filter(Country != "World")
75-
76-
77-
78-df_table5 <- df %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} %>% mutate(type="All") %>% filter(Country != "World")
79-
80-
81-
82-### a better way to do this aggregation (and for several variables in one go)
83-### is to use group_by and summarise. NB: I treat the missing data as zero.
84-
44+df_table3 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year, nace2digits ) %>% summarise(nace2sum=sum(PlannedAVG, na.rm=T))
8545
8646
8747 df_table4bis <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% group_by(Country, year) %>% summarise(avg=sum(PlannedAVG, na.rm=T),max=sum(PlannedJobReductionsmax, na.rm=T), min=sum(PlannedJobReductionsmin, na.rm=T), number_emp=sum(NumberEmployed, na.rm=T) ) %>% filter(Country != "World")%>% mutate(type="Trade")
8848
89-
90-
91-
92-
93-
94-
9549 df_table5bis <- df %>% group_by(Country, year) %>% summarise(avg=sum(PlannedAVG, na.rm=T),max=sum(PlannedJobReductionsmax, na.rm=T), min=sum(PlannedJobReductionsmin, na.rm=T), number_emp=sum(NumberEmployed, na.rm=T) ) %>% filter(Country != "World")%>% mutate(type="All")
9650
9751
98-
9952 df_tot <- rbind(df_table4bis,df_table5bis)
10053
10154
10255 ## There is little difference between min, max and average, so there is no point in adding the min and max
10356
104-## myformula <- paste(" . ~ year+ Country ") %>% as.formula()
105-
106-
107-## df_table6 <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% select(c(PlannedAVG,PlannedJobReductionsmax, PlannedJobReductionsmin, year, Country )) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=F)} %>% filter(Country != "World")
108-
109-
110-
111-
112-## df_table6bis <- df %>% filter(TypeofRestructuring %in% trade_restructuring) %>% select(c(PlannedAVG, year, Country )) %>% { aggregate(myformula, data=. , FUN= sum, na.rm=T)} %>% filter(Country != "World")
113-
114-
115-
11657
11758
11859 if (plot_loop == 1){
@@ -177,28 +118,7 @@
177118 ggsave("synoptic-trade2.pdf", gpl, width=15,height=15)
178119
179120
180-
181-
182-
183-
184-
185-## gpl <- ggplot(df_table3, aes(x = PlannedAVG, y = year, group = year)) +
186-## ## geom_joy(scale = 10, size = 0.25, rel_min_height = 0.03) +
187-## ## theme_joy() +
188-## geom_joy()+
189121
190-## ## scale_x_continuous(limits=c(1, 200), expand = c(0.01, 0)) +
191-## ## scale_y_reverse(breaks=c(2000, 1980, 1960, 1940, 1920, 1900), expand = c(0.01, 0))
192-## coord_cartesian(xlim=c(0,600))+
193-
194-## scale_x_continuous() +
195-## scale_y_reverse()
196-
197-## ggsave("joyplot_average.pdf", gpl, width=10,height=5)
198-
199-
200-
201-
202122 lbls <- unique(df_tot$type)
203123
204124
@@ -276,10 +196,13 @@
276196
277197 countries <- unique(df_tot$Country)
278198
199+## countries <- "Germany"
200+
279201
280202 for (mycountry in countries){
281203
282-
204+ print("mycountry is, ")
205+ print(mycountry)
283206
284207 temp <- df_tot %>% filter(Country == mycountry)
285208
@@ -320,15 +243,87 @@
320243 ylab("Average Number Employees")
321244
322245
323-fname <- paste(mycountry, "_single.pdf")
246+fname <- paste(mycountry, "_single.pdf", sep="")
324247
325248 ggsave(fname, gpl, width=10,height=5)
326249
327250
328-fname <- paste(mycountry, "_single.png")
251+fname <- paste(mycountry, "_single.png", sep="")
329252
330253 ggsave(fname, gpl, width=10,height=5)
331254
255+
256+
257+temp2 <- df_table3 %>% filter(Country==mycountry)
258+
259+if (nrow(temp2)>0){
260+
261+ gpl <- ggplot(temp2, aes(x=nace2digits, y=nace2sum,
262+ ## colour=indicator,
263+ ## fill=nace2digits
264+ ## group=country
265+ )) +
266+
267+
268+
269+## theme(legend.position = 'right')+
270+## geom_point(size=2, col="black") +
271+## geom_line(col="black")+
272+
273+## geom_point(size=2) +
274+
275+geom_bar(position="dodge", stat="identity", alpha=1, fill="blue"
276+ ) +
277+
278+
279+
280+#scale_y_continuous(limits=c(0.3,0.9),breaks=seq(0.3, 0.9, by=0.3))+
281+#scale_y_continuous(breaks=pretty_breaks(n=5))+
282+scale_y_continuous(breaks=pretty_breaks(n=5))+
283+
284+## scale_x_continuous(breaks=integer_breaks(n=5))+
285+ ## scale_x_continuous(breaks=data_g_temp$year
286+ ## )+
287+
288+my_ggplot_theme(c(0.13,0.8))+
289+theme(legend.position = 'right')+
290+
291+facet_wrap( ~ year, ncol = 2, scales = "fixed" )+
292+
293+## scale_fill_gdocs(NULL, breaks=lbls)+
294+
295+labs(title=mycountry)+
296+theme(plot.title = element_text(lineheight=.8, size=24, face="bold", vjust=1))+
297+## theme(legend.text = element_text(vjust=1,lineheight=1 ))+
298+## theme(legend.title = element_text(colour="black", size=16, face="bold"))+
299+
300+
301+theme(legend.position = "none")+
302+
303+
304+
305+
306+## scale_fill_manual("", breaks=lbls, labels=c("Indirect Funding", "Funding from government"),values=col_seq) +
307+
308+## scale_color_manual("", breaks=lbls, labels=c("Indirect Funding", "Funding from government"), values=col_seq) +
309+
310+
311+
312+
313+xlab("NACE Division")+
314+ ylab("Number of Employees")
315+
316+fname <- paste(mycountry, "_nace_division.pdf", sep="")
317+
318+
319+save_plot(fname, gpl,base_height=14, base_aspect_ratio=2.5)
320+
321+
322+fname <- paste(mycountry, "_nace_division.png", sep="")
323+
324+save_plot(fname, gpl,base_height=14, base_aspect_ratio=2.5)
325+
326+}
332327
333328 }
334329 }