• R/O
  • SSH

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

File Info

Rev. ea841657c41a239d61d8aae3aba87455e92284d9
大小 14,040 字节
时间 2020-04-01 22:57:30
作者 Lorenzo Isella
Log Message

Code to read the bulk FDI data now that 2018 is available and we have
the Brexit data by default.

Content

rm(list=ls())

library(tidyverse)
library(magrittr)

library(openxlsx)

source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")

do_nace <- 0

save_by_geo <- 0

read_nace_RDS <- 0

year_start <- 2009

include_income <- 0

calculate_brexit <- 0


## %>% gather(year, value, c(`2016`, `2015`, `2014`, `2013`))

## %>% filter(complete.cases(.)) 


## top_nace_single <- c("TOTAL", "A", "B", "C", "E", "F", "G","H","I","J","K","L","M","N",
##               "Q","R","S","FDI", "D35","P85" )

## top_nace_aggregates <- c("G-U","O_T_U")



## I do not need this any longer

## df_geo_ini <- read_tsv("bop_fdi6_geo.tsv") 

## df_geo <- df_geo_ini  %>% separate("currency,indic_bp,fdi_item,partner,geo\\time",c("currency","indic_bp","fdi_item","partner","geo") ,sep="," ) %>% gather(year, value, c(`2016`, `2015`, `2014`, `2013`)) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN")) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT")) %>% mutate(direction=paste(fdi_item,"_", indic_bp, sep="")) %>% select(year,geo,partner, direction, value )%>% {.$value <- as.numeric(.$value)
##     .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
##     .}  %>% spread(direction, value)%>% arrange(geo, year)



df_pos_ini <- read_tsv("bop_fdi6_pos.tsv") 

df_pos <- df_pos_ini  %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2018`,`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( nace_r2=="FDI", stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" )   ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") )%>% mutate(direction=paste(fdi_item,"_STOCKS", sep=""))%>% select(year,geo,partner, direction, entity, value )%>% {.$value <- as.numeric(.$value)
    .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
        .} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)



df_flow_ini <- read_tsv("bop_fdi6_flow.tsv") 

df_flow <- df_flow_ini %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2018`,`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( nace_r2=="FDI", stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" )   ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") )%>% mutate(direction=paste(fdi_item,"_FLOWS", sep=""))%>% select(year,geo,partner, direction, entity, value )%>% {.$value <- as.numeric(.$value)
    .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
        .} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)




if (include_income==1){

df_inc_ini <- read_tsv("bop_fdi6_inc.tsv")  

df_inc <- df_inc_ini  %>%
    separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>%
    gather(year, value, c(`2018`,`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter( nace_r2=="FDI", stk_flow %in% c("II", "IO"), fdi_item %in% c("DI__D4P__D__F", "DO__D4P__D__F" )   ) %>%
    mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D4P__D__F", "OUT"))%>%
    mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D4P__D__F", "IN") )%>%
    mutate(direction=paste(fdi_item,"_INCOME", sep=""))%>%
    select(year,geo,partner, direction, entity, value )%>%
    {.$value <- as.numeric(.$value)
        .} %>%
    filter(complete.cases(.))%>%
    {.$year <- as.numeric(.$year)
        .} %>%
    distinct() %>%
    {.=.[-c(9024, 9026, 14164, 16053), ]
    .} %>% 
    spread(direction, value) %>%
    arrange(geo, year)

}


print("OK here at least")


##Two alternative and equivalent ways to treat df_main. Both work, but one is
## now a comment.


## df_main <- read_tsv("bop_fdi_main.tsv")  %>% separate("indic_bp,geo,partner,post\\time", c("indic_bp","geo","partner","post"), sep=",") %>% gather(year, value, c(`2012`, `2011`, `2010`, `2009` ,`2008` ,`2007` ,`2006`, `2005` ,
##    `2004` , `2003` , `2002` , `2001` , `2000` ,
##    `1999` , `1998` , `1997`, `1996` , `1995`,   `1994` , `1993` , `1992`)) %>% filter(indic_bp %in% c("FLOWS", "INCOME", "STOCKS") ) %>% {.$year <- as.numeric(.$year)
##        .} %>% filter(year>year_start) %>% mutate(value=replace(value, grep(" d", value),drop_last_char(value[grep(" d", value)], 2))) %>% {.$value <- as.numeric(.$value)
##            .}  %>% filter(complete.cases(.))  %>% mutate(indic_bp=replace(indic_bp, post=="505", paste("OUT_", indic_bp[post=="505"], sep="")))%>% mutate(indic_bp=replace(indic_bp, post=="555", paste("IN_", indic_bp[post=="555"], sep=""))) %>% select(-post) %>% spread(indic_bp, value) %>% mutate(entity="TOTAL") %>% select(year, geo, partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS, IN_INCOME, OUT_INCOME)





df_tot <- inner_join(x=df_pos,y=df_flow, by=c("year", "geo" ,"partner", "entity"))


if (include_income==1){

df_main <- read_tsv("bop_fdi_main.tsv")  %>% separate("indic_bp,geo,partner,post\\time", c("indic_bp","geo","partner","post"), sep=",") %>% gather(year, value, c(`2012`, `2011`, `2010`, `2009` ,`2008` ,`2007` ,`2006`, `2005` ,
   `2004` , `2003` , `2002` , `2001` , `2000` ,
   `1999` , `1998` , `1997`, `1996` , `1995`,   `1994` , `1993` , `1992`)) %>% filter(indic_bp %in% c("FLOWS", "INCOME", "STOCKS") ) %>% {.$year <- as.numeric(.$year)
       .} %>% filter(year>=year_start) %>% mutate(value=replace(value, grep(" d", value),drop_last_char(value[grep(" d", value)], 2))) %>% {.$value <- as.numeric(.$value)
           .}  %>% filter(complete.cases(.))  %>% mutate(post=replace(post, post=="505", "OUT_")) %>% mutate(post=replace(post, post=="555", "IN_")) %>% mutate(indic_bp=paste(post, indic_bp, sep="")) %>% select(-post) %>% spread(indic_bp, value) %>% mutate(entity="TOTAL") %>% select(year, geo, partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS, IN_INCOME, OUT_INCOME)



write_csv(df_main, "df_main.csv")


    df_tot <- df_tot %>% inner_join(y=df_inc,by=c("year", "geo" ,"partner", "entity")) %>% rbind(df_main) %>% arrange(year, geo)

}

write_csv(df_tot, "update_FDI.csv")

saveRDS(df_tot, "update_FDI.RDS")




write.xlsx(df_tot, file = "update_FDI.xlsx",
      sheetName = "data", append = FALSE, row.names=F,keepNA = TRUE)



###########################################################################
###########################################################################
###########################################################################
###########################################################################
###########################################################################


if (calculate_brexit==1){

###now we get ready for the EU27!


eu28 <- df_tot %>%
    filter(geo=="EU28")

uk <- df_tot %>%
    filter(geo=="UK")


eu27 <- eu28 %>%
    left_join(y=uk, by=c("year", "partner", "entity"))  %>%
    mutate(IN_STOCKS=IN_STOCKS.x-IN_STOCKS.y,
           OUT_STOCKS=OUT_STOCKS.x-OUT_STOCKS.y,
           IN_FLOWS=IN_FLOWS.x-IN_FLOWS.y,
           OUT_FLOWS=OUT_FLOWS.x-OUT_FLOWS.y## ,
           ## IN_INCOME=IN_INCOME.x-IN_INCOME.y,
           ## OUT_INCOME=OUT_INCOME.x-OUT_INCOME.y,
           ) %>%
    filter(partner %!in%  c("UK", "WRL_REST"),
           !grepl("EXT", partner)) %>%
    select(     year, geo.x ,  partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS## , IN_INCOME,OUT_INCOME
           ) %>%
    rename("geo"="geo.x") %>%
    mutate(geo="EU27B")



eu28_ext_eu28 <- df_tot %>%
    filter(geo=="EU28", partner=="EXT_EU28") %>%
    arrange(year, entity)


uk_ext_eu28 <- df_tot %>%
    filter(geo=="UK", partner=="EXT_EU28")%>%
    arrange(year, entity)

uk_ext_eu27 <- uk_ext_eu28 %>%
    mutate(partner="EXT_EU27B")


eu28_uk <- df_tot %>%
    filter(geo=="EU28", partner=="UK")%>%
    arrange(year, entity)


eu27_extra_eu27 <- eu28_ext_eu28 %>%
    mutate(IN_STOCKS=IN_STOCKS+eu28_uk$IN_STOCKS,
           OUT_STOCKS=OUT_STOCKS+eu28_uk$OUT_STOCKS,
           IN_FLOWS=IN_FLOWS+eu28_uk$IN_FLOWS## ,
           ## IN_INCOME=IN_INCOME+eu28_uk$IN_INCOME,
           ## OUT_INCOME=OUT_INCOME+eu28_uk$OUT_INCOME
           ) %>%
    left_join(y=uk_ext_eu28, by=c("year", "partner", "entity"))  %>%
    mutate(IN_STOCKS=IN_STOCKS.x-IN_STOCKS.y,
           OUT_STOCKS=OUT_STOCKS.x-OUT_STOCKS.y,
           IN_FLOWS=IN_FLOWS.x-IN_FLOWS.y,
           OUT_FLOWS=OUT_FLOWS.x-OUT_FLOWS.y## ,
           ## IN_INCOME=IN_INCOME.x-IN_INCOME.y,
           ## OUT_INCOME=OUT_INCOME.x-OUT_INCOME.y,
           ) %>%
    select(     year, geo.x ,  partner, entity, IN_STOCKS, OUT_STOCKS, IN_FLOWS, OUT_FLOWS## , IN_INCOME, OUT_INCOME
           ) %>%
    rename("geo"="geo.x") %>%
    mutate(geo="EU27B", partner="EXT_EU27B")



brexit_extra <- bind_rows(eu27, eu27_extra_eu27,uk_ext_eu27 )

write.xlsx(brexit_extra, file = "brexit_extra.xlsx",
      sheetName = "data", append = FALSE, row.names=F,keepNA = TRUE)

write_csv(brexit_extra,  "brexit_extra.csv")
saveRDS(brexit_extra,  "brexit_extra.RDS")

    
    
df_tot_fin <- bind_rows(df_tot, eu27, eu27_extra_eu27,uk_ext_eu27 )

saveRDS(df_tot_fin, "update_FDI_brexit.RDS")

write_csv(df_tot_fin,  "update_FDI_brexit.csv")

    
write.xlsx(df_tot_fin, file = "update_FDI_brexit.xlsx",
      sheetName = "data", append = FALSE, row.names=F,keepNA = TRUE)


    
    
}

##########################################################################
##########################################################################
##########################################################################

if (do_nace==1){


nomenclature <- read_csv("nace_nomenclature_extended.csv")


if (read_nace_RDS==0){
    
## there is a problem with one line in the data after the filtering (734475) because it contains a net FDI inward coupled with a direct investment abroad (an impossible situation!!!!). It must be a typo in the data set.

df_pos_nace <- df_pos_ini  %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2017`,`2016`, `2015`, `2014`, `2013`))%>% filter( stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" )   ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") ) %>% mutate(direction=paste(fdi_item,"_STOCKS", sep="")) %>% select(year,geo,partner, direction,nace_r2, entity, value )%>% {.$value <- as.numeric(.$value)
    .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
        .} %>% distinct() %>%  slice(-c(  1060974, 1061924  )) %>%
    spread(direction, value) %>% arrange(geo, year)



df_flow_nace <- df_flow_ini %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",")%>% gather(year, value, c(`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter(  stk_flow %in% c("NI", "NO"), fdi_item %in% c("DI__D__F", "DO__D__F" )   ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D__F", "IN"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D__F", "OUT") )%>% mutate(direction=paste(fdi_item,"_FLOWS", sep=""))%>% select(year,geo,partner, direction, nace_r2, entity, value )%>% {.$value <- as.numeric(.$value)
    .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
        .} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)





## df_inc_nace <- df_inc_ini  %>% separate("currency,nace_r2,stk_flow,entity,fdi_item,partner,geo\\time", c("currency","nace_r2","stk_flow","entity","fdi_item","partner","geo"), sep=",") %>% gather(year, value, c(`2017`,`2016`, `2015`, `2014`, `2013`)) %>% filter(  stk_flow %in% c("II", "IO"), fdi_item %in% c("DI__D4P__D__F", "DO__D4P__D__F" )   ) %>% mutate(fdi_item=replace(fdi_item, fdi_item=="DO__D4P__D__F", "OUT"))%>% mutate(fdi_item=replace(fdi_item, fdi_item=="DI__D4P__D__F", "IN") )%>% mutate(direction=paste(fdi_item,"_INCOME", sep=""))%>% select(year,geo,partner, direction,nace_r2, entity, value )%>% {.$value <- as.numeric(.$value)
##     .} %>% filter(complete.cases(.)) %>% {.$year <- as.numeric(.$year)
##         .} %>% distinct() %>% spread(direction, value) %>% arrange(geo, year)


df_tot_nace <- inner_join(x=df_pos_nace,y=df_flow_nace, by=c("year", "geo" ,"partner", "entity", "nace_r2")) ## %>% inner_join(y=df_inc_nace,by=c("year", "geo" ,"partner", "entity", "nace_r2")) 


## df_tot_nace <- df_tot_nace %>% 
write_csv(df_tot_nace, "FDI_by_nace.csv")


saveRDS(df_tot_nace, "FDI_by_nace.RDS")

## df_tot_nace <- df_tot_nace %>%  replace(is.na(.), 0) %>% mutate(mysum=abs(IN_STOCKS)+abs(OUT_STOCKS)+abs(IN_FLOWS)+abs(OUT_FLOWS)## +abs(IN_INCOME)+abs(OUT_INCOME)
##                                                                 )  %>% filter(mysum!=0) %>% select(-c(mysum))  %>% inner_join(y=nomenclature, by=c("nace_r2"="NACE_R2"))  %>% arrange(year, geo, partner, nace_r2)


    missing_values <- rowSums(is.na(df_tot_nace))
    
   
    df_tot_nace <- df_tot_nace %>%
        filter(missing_values!=4) %>% 
        inner_join(y=nomenclature, by=c("nace_r2"="NACE_R2"))  %>% arrange(year, geo, partner, nace_r2)



    

    
saveRDS(df_tot_nace, "FDI_by_nace_filtered.RDS")

    
write_csv(df_tot_nace, "FDI_by_nace_filtered.csv")

} else if (read_nace_RDS ==1){

df_tot_nace <- readRDS("FDI_by_nace_filtered.RDS")


}
    
    if (save_by_geo==1){

geo_list <- df_tot_nace$geo %>% unique

        for (country in geo_list ){


data_slice <- df_tot_nace %>% filter(geo==country) 

fname <- paste(country, "_FDI_by_nace.csv", sep="")
            
write_csv(data_slice,fname)
        }

        

    }

    
    
  

} else {

print("Skipping NACE breakdown")
}

print("So far so good")