Rev. | ebacf24ce5be0dfe702c6151835269225177a366 |
---|---|
大小 | 3,301 字节 |
时间 | 2020-05-12 23:42:38 |
作者 | Lorenzo Isella |
Log Message | A trivial correction. |
rm(list=ls())
library(tidyverse)
library(janitor)
library(openxlsx)
library(scales)
library(tibbletime)
library(rlang)
## library(viridis)
## library(Cairo)
## library(patchwork)
## library(gridExtra)
## library(grid)
## library(magrittr)
source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
iso_eu27 <- c("AT", "BE", "BG", "CY", "CZ",
"DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE",
"IT", "LV", "LT", "LU", "MT", "NL", "PL",
"PT", "RO", "SK", "SI", "ES", "SE", "HR")
aver_on_latest_weeks <- 4
remove_last_weeks <- 3
## df <- readRDS("cleaned_dataset.RDS")
df <- read_delim("cdc_hs2.txt", delim=";", na="") %>%
clean_names() %>%
mutate(year=as.integer(year), week=as.integer(week),
iv=iv/1e6) %>%
select(-iq) ## %>%
## mutate(iv=if_else(year==2020 & week==10 & reporterid=="ES" & partnerid=="NG" & productid=="27", iv/1e3, iv )) %>%
## mutate(iv=if_else(year==2020 & week==8 & reporterid=="GB" & partnerid=="US" & productid=="85",328 , iv )) %>%
## mutate(iv=if_else(year==2020 & week==2 & reporterid=="MT" & partnerid=="CN" & productid=="88",0.923 , iv ))
week_max <- df %>%
filter(year==max(year)) %>%
mutate(week=as.integer(week)) %>%
select(week) %>%
range %>%
max %>%
- remove_last_weeks
df <- df %>%
filter(week<=week_max) %>%
filter( partnerid !="EU")
l2 <- df$week %>% max
l1 <- l2-aver_on_latest_weeks+1
seq1 <- 1:l2
seq2 <- l1:l2
df1 <- df %>%
filter(week %in% seq1 ) %>%
group_by(year, reporterid, partnerid, productid) %>%
summarise(iv=sum(iv, na.rm=T)) %>%
ungroup %>%
group_by(year, productid, partnerid) %>%
group_modify(~ bind_rows(.x,tibble(reporterid="EU27",
iv=sum(.x$iv[.x$reporterid %in% iso_eu27 ], na.rm=T)))) %>%
ungroup %>%
select(year, reporterid, partnerid, productid, iv) %>%
group_by(year, productid, reporterid) %>%
group_modify(~ bind_rows(.x,tibble(partnerid="Extra-EU28",
iv=sum(.x$iv, na.rm=T)))) %>%
ungroup %>%
group_by(year, partnerid, reporterid) %>%
group_modify(~ bind_rows(.x,tibble(productid="total",
iv=sum(.x$iv, na.rm=T)))) %>%
ungroup %>%
mutate(period=paste0(range(seq1), collapse="-"))
df2 <- df %>%
filter(week %in% seq2 ) %>%
group_by(year, reporterid, partnerid, productid) %>%
summarise(iv=sum(iv, na.rm=T)) %>%
ungroup %>%
group_by(year, productid, partnerid) %>%
group_modify(~ bind_rows(.x,tibble(reporterid="EU27",
iv=sum(.x$iv[.x$reporterid %in% iso_eu27 ], na.rm=T)))) %>%
ungroup %>%
select(year, reporterid, partnerid, productid, iv) %>%
group_by(year, productid, reporterid) %>%
group_modify(~ bind_rows(.x,tibble(partnerid="Extra-EU28",
iv=sum(.x$iv, na.rm=T)))) %>%
ungroup %>%
group_by(year, partnerid, reporterid) %>%
group_modify(~ bind_rows(.x,tibble(productid="total",
iv=sum(.x$iv, na.rm=T)))) %>%
ungroup %>%
mutate(period=paste0(range(seq2), collapse="-"))
df_out <- bind_rows(df1, df2) %>%
arrange(year) %>%
group_by(partnerid, reporterid, productid, period) %>%
calc_growth(iv, iv_growth) %>%
ungroup
saveRDS(df_out, "./CDC_extraction/database_aggregated.RDS")
print("So far so good")