修订版 | 171bb0230e69fb3df7af8ab5f22703244d890119 (tree) |
---|---|
时间 | 2023-01-31 06:19:29 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
I added the option to save/read the ESTAT downloaded data.
@@ -21,10 +21,15 @@ | ||
21 | 21 | ## clean_names |
22 | 22 | |
23 | 23 | |
24 | +read_estat <- 0 | |
25 | + | |
24 | 26 | df_ro_ini <- read_csv("Copy_of_ExportTransparenta_20220919.csv") |> |
25 | 27 | clean_names() |
26 | 28 | |
29 | +repeated_entries <- df_ro_ini |> | |
30 | + get_dupes_short() | |
27 | 31 | |
32 | +save_excel(repeated_entries, "repeated_entries_Romania.xlsx") | |
28 | 33 | |
29 | 34 | ## tam <- read_parquet("tam.parquet") |> |
30 | 35 | ## slice(1:5) |
@@ -63,7 +68,8 @@ | ||
63 | 68 | |
64 | 69 | df_ro_name <- df_ro |> |
65 | 70 | rename_many(df_name$english,ini_names) |> |
66 | - clean_data() |> | |
71 | + clean_data( remove_duplicated_rows=F | |
72 | + ) |> | |
67 | 73 | select(-national_identification_for_other_categories_foreign_undertakings) |> |
68 | 74 | mutate(aid_award_granted_date=dmy(aid_award_granted_date)) |> |
69 | 75 | mutate(year=year(aid_award_granted_date), |
@@ -83,12 +89,21 @@ | ||
83 | 89 | |
84 | 90 | |
85 | 91 | |
86 | - | |
92 | +if (read_estat!=1){ | |
87 | 93 | |
88 | 94 | all_rates_ini <- estat_retrieval(query) |> |
89 | 95 | clean_names() |> |
90 | 96 | select(time_period, obs_value) |> |
91 | 97 | mutate(time_period=as.numeric(time_period)) |
98 | + saveRDS(all_rates_ini, "all_rates.RDS") | |
99 | + | |
100 | + | |
101 | + | |
102 | +} else{ | |
103 | + | |
104 | +all_rates_ini <- readRDS("all_rates.RDS") | |
105 | + | |
106 | +} | |
92 | 107 | |
93 | 108 | |
94 | 109 |
@@ -166,6 +181,8 @@ | ||
166 | 181 | covid <- read_excel("SA-Covid19.xlsx") |> |
167 | 182 | clean_names() |
168 | 183 | |
184 | +nace <- readRDS("../nace_codes/df_nace.RDS") |> | |
185 | + select(-code) | |
169 | 186 | |
170 | 187 | df.out <- df_temp |> |
171 | 188 | rename("aid_award_instrument"="aid_award_instrument_other_english") |> |
@@ -175,8 +192,16 @@ | ||
175 | 192 | granted_value_extended_eur=granted_aid_absolute_eur, |
176 | 193 | is_covid_case=if_else(case_reference %in% covid$case_reference, "Yes", "No")) |> |
177 | 194 | mutate(granted_value_extended_eur=if_else(is_covid_case=="No", granted_value_extended_eur, NA_real_)) |> |
178 | - filter(year!=2022) | |
195 | + filter(year!=2022) |> | |
196 | + mutate(beneficiary_sector=if_else(nchar(beneficiary_sector)==3, | |
197 | + paste("0", beneficiary_sector, sep=""), beneficiary_sector)) |> | |
198 | + left_join(y=nace, by=c("beneficiary_sector"="code2")) |> | |
199 | + mutate(beneficiary_sector=paste(beneficiary_sector, description, | |
200 | + sep=" - ")) |> | |
201 | + select(-description) | |
179 | 202 | |
203 | +## test <- df.out |> | |
204 | +## filter(is.na(description)) | |
180 | 205 | |
181 | 206 | |
182 | 207 | save_excel(df.out, "romania_extra_tam.xlsx") |