Rev. | ea841657c41a239d61d8aae3aba87455e92284d9 |
---|---|
大小 | 14,040 字节 |
时间 | 2020-04-01 22:57:30 |
作者 | Lorenzo Isella |
Log Message | Code to read the bulk FDI data now that 2018 is available and we have
|
rm(list=ls())
library(tidyverse)
library(magrittr)
library(openxlsx)
source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
do_nace <- 0
save_by_geo <- 0
read_nace_RDS <- 0
year_start <- 2009
include_income <- 0
calculate_brexit <- 0
## %>% gather(year, value, c(`2016`, `2015`, `2014`, `2013`))
## %>% filter(complete.cases(.))
## top_nace_single <- c("TOTAL", "A", "B", "C", "E", "F", "G","H","I","J","K","L","M","N",
## "Q","R","S","FDI", "D35","P85" )
## top_nace_aggregates <- c("G-U","O_T_U")
## I do not need this any longer
## df_geo_ini <- read_tsv("bop_fdi6_geo.tsv")
## df_geo <- df_geo_ini %>% separate("currency,indic_bp,fdi_item,partner,geo\\time",c("currency","indic_bp","fdi_item","partner","geo") ,sep="," ) %>% gather(year, value, c(`2016`, `2015`, `2014`, `2013`)) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN")) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT")) %>% mutate(direction=paste(fdi_item,"_", indic_bp, sep="")) %>% select(year,geo,partner, direction, value )%>% {.$value <- as.numeric(.$value)
## .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
## .} %>% spread(direction, value)%>% arrange(geo, year)
df_pos_ini <- read_tsv("bop_fdi6_pos.tsv")
df_pos <- df_pos_ini %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2018`,`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( nace_r2=="FDI", stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" ) ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") )%>% mutate(direction=paste(fdi_item,"_STOCKS", sep=""))%>% select(year,geo,partner, direction, entity, value )%>% {.$value <- as.numeric(.$value)
.} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
.} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)
df_flow_ini <- read_tsv("bop_fdi6_flow.tsv")
df_flow <- df_flow_ini %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2018`,`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( nace_r2=="FDI", stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" ) ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") )%>% mutate(direction=paste(fdi_item,"_FLOWS", sep=""))%>% select(year,geo,partner, direction, entity, value )%>% {.$value <- as.numeric(.$value)
.} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
.} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)
if (include_income==1){
df_inc_ini <- read_tsv("bop_fdi6_inc.tsv")
df_inc <- df_inc_ini %>%
separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>%
gather(year, value, c(`2018`,`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( nace_r2=="FDI", stk_flow %in% c("II", "IO"), fdi_item %in% c("DI__D4P__D__F", "DO__D4P__D__F" ) ) %>%
mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D4P__D__F", "OUT"))%>%
mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D4P__D__F", "IN") )%>%
mutate(direction=paste(fdi_item,"_INCOME", sep=""))%>%
select(year,geo,partner, direction, entity, value )%>%
{.$value <- as.numeric(.$value)
.} %>%
filter(complete.cases(.))%>%
{.$year <- as.numeric(.$year)
.} %>%
distinct() %>%
{.=.[-c(9024, 9026, 14164, 16053), ]
.} %>%
spread(direction, value) %>%
arrange(geo, year)
}
print("OK here at least")
##Two alternative and equivalent ways to treat df_main. Both work, but one is
## now a comment.
## df_main <- read_tsv("bop_fdi_main.tsv") %>% separate("indic_bp,geo,partner,post\\time", c("indic_bp","geo","partner","post"), sep=",") %>% gather(year, value, c(`2012`, `2011`, `2010`, `2009` ,`2008` ,`2007` ,`2006`, `2005` ,
## `2004` , `2003` , `2002` , `2001` , `2000` ,
## `1999` , `1998` , `1997`, `1996` , `1995`, `1994` , `1993` , `1992`)) %>% filter(indic_bp %in% c("FLOWS", "INCOME", "STOCKS") ) %>% {.$year <- as.numeric(.$year)
## .} %>% filter(year>year_start) %>% mutate(value=replace(value, grep(" d", value),drop_last_char(value[grep(" d", value)], 2))) %>% {.$value <- as.numeric(.$value)
## .} %>% filter(complete.cases(.)) %>% mutate(indic_bp=replace(indic_bp, post=="505", paste("OUT_", indic_bp[post=="505"], sep="")))%>% mutate(indic_bp=replace(indic_bp, post=="555", paste("IN_", indic_bp[post=="555"], sep=""))) %>% select(-post) %>% spread(indic_bp, value) %>% mutate(entity="TOTAL") %>% select(year, geo, partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS, IN_INCOME, OUT_INCOME)
df_tot <- inner_join(x=df_pos,y=df_flow, by=c("year", "geo" ,"partner", "entity"))
if (include_income==1){
df_main <- read_tsv("bop_fdi_main.tsv") %>% separate("indic_bp,geo,partner,post\\time", c("indic_bp","geo","partner","post"), sep=",") %>% gather(year, value, c(`2012`, `2011`, `2010`, `2009` ,`2008` ,`2007` ,`2006`, `2005` ,
`2004` , `2003` , `2002` , `2001` , `2000` ,
`1999` , `1998` , `1997`, `1996` , `1995`, `1994` , `1993` , `1992`)) %>% filter(indic_bp %in% c("FLOWS", "INCOME", "STOCKS") ) %>% {.$year <- as.numeric(.$year)
.} %>% filter(year>=year_start) %>% mutate(value=replace(value, grep(" d", value),drop_last_char(value[grep(" d", value)], 2))) %>% {.$value <- as.numeric(.$value)
.} %>% filter(complete.cases(.)) %>% mutate(post=replace(post, post=="505", "OUT_")) %>% mutate(post=replace(post, post=="555", "IN_")) %>% mutate(indic_bp=paste(post, indic_bp, sep="")) %>% select(-post) %>% spread(indic_bp, value) %>% mutate(entity="TOTAL") %>% select(year, geo, partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS, IN_INCOME, OUT_INCOME)
write_csv(df_main, "df_main.csv")
df_tot <- df_tot %>% inner_join(y=df_inc,by=c("year", "geo" ,"partner", "entity")) %>% rbind(df_main) %>% arrange(year, geo)
}
write_csv(df_tot, "update_FDI.csv")
saveRDS(df_tot, "update_FDI.RDS")
write.xlsx(df_tot, file = "update_FDI.xlsx",
sheetName = "data", append = FALSE, row.names=F,keepNA = TRUE)
###########################################################################
###########################################################################
###########################################################################
###########################################################################
###########################################################################
if (calculate_brexit==1){
###now we get ready for the EU27!
eu28 <- df_tot %>%
filter(geo=="EU28")
uk <- df_tot %>%
filter(geo=="UK")
eu27 <- eu28 %>%
left_join(y=uk, by=c("year", "partner", "entity")) %>%
mutate(IN_STOCKS=IN_STOCKS.x-IN_STOCKS.y,
OUT_STOCKS=OUT_STOCKS.x-OUT_STOCKS.y,
IN_FLOWS=IN_FLOWS.x-IN_FLOWS.y,
OUT_FLOWS=OUT_FLOWS.x-OUT_FLOWS.y## ,
## IN_INCOME=IN_INCOME.x-IN_INCOME.y,
## OUT_INCOME=OUT_INCOME.x-OUT_INCOME.y,
) %>%
filter(partner %!in% c("UK", "WRL_REST"),
!grepl("EXT", partner)) %>%
select( year, geo.x , partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS## , IN_INCOME,OUT_INCOME
) %>%
rename("geo"="geo.x") %>%
mutate(geo="EU27B")
eu28_ext_eu28 <- df_tot %>%
filter(geo=="EU28", partner=="EXT_EU28") %>%
arrange(year, entity)
uk_ext_eu28 <- df_tot %>%
filter(geo=="UK", partner=="EXT_EU28")%>%
arrange(year, entity)
uk_ext_eu27 <- uk_ext_eu28 %>%
mutate(partner="EXT_EU27B")
eu28_uk <- df_tot %>%
filter(geo=="EU28", partner=="UK")%>%
arrange(year, entity)
eu27_extra_eu27 <- eu28_ext_eu28 %>%
mutate(IN_STOCKS=IN_STOCKS+eu28_uk$IN_STOCKS,
OUT_STOCKS=OUT_STOCKS+eu28_uk$OUT_STOCKS,
IN_FLOWS=IN_FLOWS+eu28_uk$IN_FLOWS## ,
## IN_INCOME=IN_INCOME+eu28_uk$IN_INCOME,
## OUT_INCOME=OUT_INCOME+eu28_uk$OUT_INCOME
) %>%
left_join(y=uk_ext_eu28, by=c("year", "partner", "entity")) %>%
mutate(IN_STOCKS=IN_STOCKS.x-IN_STOCKS.y,
OUT_STOCKS=OUT_STOCKS.x-OUT_STOCKS.y,
IN_FLOWS=IN_FLOWS.x-IN_FLOWS.y,
OUT_FLOWS=OUT_FLOWS.x-OUT_FLOWS.y## ,
## IN_INCOME=IN_INCOME.x-IN_INCOME.y,
## OUT_INCOME=OUT_INCOME.x-OUT_INCOME.y,
) %>%
select( year, geo.x , partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS## , IN_INCOME, OUT_INCOME
) %>%
rename("geo"="geo.x") %>%
mutate(geo="EU27B", partner="EXT_EU27B")
brexit_extra <- bind_rows(eu27, eu27_extra_eu27,uk_ext_eu27 )
write.xlsx(brexit_extra, file = "brexit_extra.xlsx",
sheetName = "data", append = FALSE, row.names=F,keepNA = TRUE)
write_csv(brexit_extra, "brexit_extra.csv")
saveRDS(brexit_extra, "brexit_extra.RDS")
df_tot_fin <- bind_rows(df_tot, eu27, eu27_extra_eu27,uk_ext_eu27 )
saveRDS(df_tot_fin, "update_FDI_brexit.RDS")
write_csv(df_tot_fin, "update_FDI_brexit.csv")
write.xlsx(df_tot_fin, file = "update_FDI_brexit.xlsx",
sheetName = "data", append = FALSE, row.names=F,keepNA = TRUE)
}
##########################################################################
##########################################################################
##########################################################################
if (do_nace==1){
nomenclature <- read_csv("nace_nomenclature_extended.csv")
if (read_nace_RDS==0){
## there is a problem with one line in the data after the filtering (734475) because it contains a net FDI inward coupled with a direct investment abroad (an impossible situation!!!!). It must be a typo in the data set.
df_pos_nace <- df_pos_ini %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2017`,`2016`, `2015`, `2014`, `2013`))%>% filter( stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" ) ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") ) %>% mutate(direction=paste(fdi_item,"_STOCKS", sep="")) %>% select(year,geo,partner, direction,nace_r2, entity, value )%>% {.$value <- as.numeric(.$value)
.} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
.} %>% distinct() %>% slice(-c( 1060974, 1061924 )) %>%
spread(direction, value) %>% arrange(geo, year)
df_flow_nace <- df_flow_ini %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" ) ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") )%>% mutate(direction=paste(fdi_item,"_FLOWS", sep=""))%>% select(year,geo,partner, direction, nace_r2, entity, value )%>% {.$value <- as.numeric(.$value)
.} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
.} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)
## df_inc_nace <- df_inc_ini %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",") %>% gather(year, value, c(`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( stk_flow %in% c("II", "IO"), fdi_item %in% c("DI__D4P__D__F", "DO__D4P__D__F" ) ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D4P__D__F", "OUT"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D4P__D__F", "IN") )%>% mutate(direction=paste(fdi_item,"_INCOME", sep=""))%>% select(year,geo,partner, direction,nace_r2, entity, value )%>% {.$value <- as.numeric(.$value)
## .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
## .} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)
df_tot_nace <- inner_join(x=df_pos_nace,y=df_flow_nace, by=c("year", "geo" ,"partner", "entity", "nace_r2")) ## %>% inner_join(y=df_inc_nace,by=c("year", "geo" ,"partner", "entity", "nace_r2"))
## df_tot_nace <- df_tot_nace %>%
write_csv(df_tot_nace, "FDI_by_nace.csv")
saveRDS(df_tot_nace, "FDI_by_nace.RDS")
## df_tot_nace <- df_tot_nace %>% replace(is.na(.), 0) %>% mutate(mysum=abs(IN_STOCKS)+abs(OUT_STOCKS)+abs(IN_FLOWS)+abs(OUT_FLOWS)## +abs(IN_INCOME)+abs(OUT_INCOME)
## ) %>% filter(mysum!=0) %>% select(-c(mysum)) %>% inner_join(y=nomenclature, by=c("nace_r2"="NACE_R2")) %>% arrange(year, geo, partner, nace_r2)
missing_values <- rowSums(is.na(df_tot_nace))
df_tot_nace <- df_tot_nace %>%
filter(missing_values!=4) %>%
inner_join(y=nomenclature, by=c("nace_r2"="NACE_R2")) %>% arrange(year, geo, partner, nace_r2)
saveRDS(df_tot_nace, "FDI_by_nace_filtered.RDS")
write_csv(df_tot_nace, "FDI_by_nace_filtered.csv")
} else if (read_nace_RDS ==1){
df_tot_nace <- readRDS("FDI_by_nace_filtered.RDS")
}
if (save_by_geo==1){
geo_list <- df_tot_nace$geo %>% unique
for (country in geo_list ){
data_slice <- df_tot_nace %>% filter(geo==country)
fname <- paste(country, "_FDI_by_nace.csv", sep="")
write_csv(data_slice,fname)
}
}
} else {
print("Skipping NACE breakdown")
}
print("So far so good")