Rev. | 0d021c7ee5d87db030a379761fb7c41a784bd956 |
---|---|
大小 | 2,339 字节 |
时间 | 2024-07-03 23:43:48 |
作者 | Lorenzo Isella |
Log Message | I added a new variable to the final output. |
rm(list=ls())
library(tidyverse)
library(janitor)
library(openxlsx)
source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
df_ini <- readRDS("../scoreboard.RDS") |>
filter(aid_element_eur>0, expenditure_year>=2012)
## we can filter out the entries with zero
## associated expenditure. To be discussed.
## we start by creating a minimally aggregated data set
df_disagg <- df_ini |>
summarise(expenditure_eur=sum(aid_element_eur, na.rm=T) ## ,
## expenditure_national_currency=sum(aid_element, na.rm=T)
,
.by=c(expenditure_year,member_state_2_letter_codes, aid_instrument , scoreboard_objective, sa_case_number, type_of_aid,case_type, all_sec,
all_sector_names,
covid, ukraine)) |>
filter(expenditure_eur>0 ## | expenditure_national_currency>0
) |>
arrange(member_state_2_letter_codes, expenditure_year,sa_case_number ) |>
select(member_state_2_letter_codes,sa_case_number,expenditure_year, everything() ) |>
rename("member_state"="member_state_2_letter_codes",
"state_aid_case_number"="sa_case_number",
"year_of_expenditure"="expenditure_year",
"expenditure_in_million_EUR"="expenditure_eur")
### part of the dataset to anonimize
df_to_anonim <- df_disagg |>
filter(type_of_aid %in% c("Ad Hoc Case" , "Individual Application within scheme") )
cases_to_anonim <- df_to_anonim$state_aid_case_number |>
su()
nn <- length(cases_to_anonim)
## create fake case numbers
fakes <- paste("fake", 1:nn, sep="")
## create the anonimized dataset
df_anonim <- df_to_anonim |>
mutate(state_aid_case_number=recode_many(state_aid_case_number,cases_to_anonim , fakes))
## replace part of the disaggregated data set
df_disagg_obfuscated <- df_disagg |>
filter(type_of_aid %!in% c("Ad Hoc Case" , "Individual Application within scheme") ) |>
bind_rows(y=df_anonim) |>
arrange(member_state, year_of_expenditure,state_aid_case_number,
aid_instrument, scoreboard_objective, type_of_aid, case_type ) |>
rename("TCF"="covid", "TCTF"="ukraine",
"NACE"="all_sec",
"NACE_description"="all_sector_names")
save_excel(df_disagg_obfuscated, "disaggregated_data_for_research.xlsx")
print("So far so good")