OSDN > Developer > larry77 > Chamber > myprojects-hg-reborn > 文件详情

myprojects-hg-reborn
Fork

(Original repository, No fork origin)

File Info

Rev.	0d021c7ee5d87db030a379761fb7c41a784bd956
大小	2,339 字节
时间	2024-07-03 23:43:48
作者	Lorenzo Isella
Log Message	I added a new variable to the final output.

Content

Export as raw format

rm(list=ls())

library(tidyverse)

library(janitor)
library(openxlsx)


source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")


df_ini <- readRDS("../scoreboard.RDS") |>
    filter(aid_element_eur>0, expenditure_year>=2012) 
## we can filter out the entries with zero
## associated expenditure. To be discussed.

## we start by creating a minimally aggregated data set


df_disagg <- df_ini |>
    summarise(expenditure_eur=sum(aid_element_eur, na.rm=T) ## ,
              ## expenditure_national_currency=sum(aid_element, na.rm=T)
             ,
              .by=c(expenditure_year,member_state_2_letter_codes, aid_instrument , scoreboard_objective, sa_case_number, type_of_aid,case_type, all_sec,
                    all_sector_names,
                    covid, ukraine)) |>
    filter(expenditure_eur>0 ## | expenditure_national_currency>0
           ) |>
    arrange(member_state_2_letter_codes, expenditure_year,sa_case_number ) |>
    select(member_state_2_letter_codes,sa_case_number,expenditure_year, everything() ) |>
    rename("member_state"="member_state_2_letter_codes",
           "state_aid_case_number"="sa_case_number",
           "year_of_expenditure"="expenditure_year",
           "expenditure_in_million_EUR"="expenditure_eur")



### part of the dataset to anonimize
df_to_anonim <- df_disagg |>
    filter(type_of_aid %in% c("Ad Hoc Case" , "Individual Application within scheme") )


cases_to_anonim <- df_to_anonim$state_aid_case_number |>
    su()


nn <- length(cases_to_anonim)

## create fake case numbers
fakes <- paste("fake", 1:nn, sep="")

## create the anonimized dataset

df_anonim <-  df_to_anonim |>
    mutate(state_aid_case_number=recode_many(state_aid_case_number,cases_to_anonim ,             fakes))



## replace part of the disaggregated data set

df_disagg_obfuscated <- df_disagg |>
        filter(type_of_aid %!in% c("Ad Hoc Case" , "Individual Application within scheme") )  |> 
    bind_rows(y=df_anonim) |>
    arrange(member_state, year_of_expenditure,state_aid_case_number,
          aid_instrument,   scoreboard_objective,  type_of_aid, case_type )   |>
    rename("TCF"="covid", "TCTF"="ukraine",
           "NACE"="all_sec",
           "NACE_description"="all_sector_names")
    

save_excel(df_disagg_obfuscated, "disaggregated_data_for_research.xlsx")


print("So far so good")

myprojects-hg-reborn Fork

标签

Frequently used words (click to add to your profile)

File Info

Content

myprojects-hg-reborn
Fork