修订版 | 4d0a0af349fed4acb9398346163be6281370ab6c (tree) |
---|---|
时间 | 2024-09-18 02:59:18 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
A cleanup of the script and its file management.
@@ -26,14 +26,14 @@ | ||
26 | 26 | ## df_ro_ini <- read_csv("ExportTransparenta_20230403.csv") |> |
27 | 27 | ## clean_names() |
28 | 28 | |
29 | -df_ro_ini <- read_csv("ExportTransparency.csv") |> | |
29 | +df_ro_ini <- read_csv("../input/ExportTransparency.csv") |> | |
30 | 30 | clean_names() |
31 | 31 | |
32 | 32 | |
33 | 33 | repeated_entries <- df_ro_ini |> |
34 | 34 | get_dupes_short() |
35 | 35 | |
36 | -save_excel(repeated_entries, "repeated_entries_Romania.xlsx") | |
36 | +save_excel(repeated_entries, "../output/repeated_entries_Romania.xlsx") | |
37 | 37 | |
38 | 38 | ## tam <- read_parquet("tam.parquet") |> |
39 | 39 | ## slice(1:5) |
@@ -56,29 +56,17 @@ | ||
56 | 56 | clean_names() |> |
57 | 57 | select(-c(text_integral_masura, executanti)) |
58 | 58 | |
59 | -df_name <- read_csv("correspondence_modified.csv") |> | |
59 | +df_name <- read_csv("../input/correspondence_modified.csv") |> | |
60 | 60 | clean_names() |> |
61 | 61 | pattern_to_na("...") |> |
62 | 62 | complete_data() |
63 | 63 | |
64 | -## tam_names <- c("id", "case_reference", "aid_award_created_date", "aid_award_granted_date", | |
65 | -## "aid_award_published_date", "aid_award_reference", "case_title_original", | |
66 | -## "case_title_english", "main_procedure_type_code", "is_co_finance", | |
67 | -## "aid_award_objective", "aid_award_objective_other_english", "aid_award_instrument", | |
68 | -## "aid_award_instrument_other_english", "beneficiary_name", "beneficiary_name_english", | |
69 | -## "national_identification", "national_identification_type", "beneficiary_type", | |
70 | -## "beneficiary_country", "beneficiary_region", "beneficiary_sector", | |
71 | -## "granted_aid_absolute_eur", "nominal_aid_absolute_eur", "granted_range_eur", | |
72 | -## "aid_award_ga_original", "aid_award_ga_english", "aid_award_nuts_code", | |
73 | -## "creator_country", "year", "granted_value_extended_eur", "nominal_value_extended_eur", | |
74 | -## "is_covid_case") | |
75 | - ## names(tam) | |
76 | 64 | |
77 | 65 | |
78 | 66 | ini_names <- names(df_ro) |
79 | 67 | |
80 | 68 | |
81 | -covid <- read_excel("SA-Covid19.xlsx") |> | |
69 | +covid <- read_csv("../../tam_arrow/input/csv_files/SA-Covid19.csv") |> | |
82 | 70 | clean_names() |> |
83 | 71 | filter(member_state_2_letter_code=="RO") |
84 | 72 |
@@ -119,19 +107,19 @@ | ||
119 | 107 | clean_names() |> |
120 | 108 | select(time_period, obs_value) |> |
121 | 109 | mutate(time_period=as.numeric(time_period)) |
122 | - saveRDS(all_rates_ini, "all_rates.RDS") | |
110 | + saveRDS(all_rates_ini, "../input/all_rates.RDS") | |
123 | 111 | |
124 | 112 | |
125 | 113 | |
126 | 114 | } else{ |
127 | 115 | |
128 | -all_rates_ini <- readRDS("all_rates.RDS") | |
116 | +all_rates_ini <- readRDS("../input/all_rates.RDS") | |
129 | 117 | |
130 | 118 | } |
131 | 119 | |
132 | 120 | |
133 | 121 | |
134 | -aid <- read_csv("aid_type_modified.csv") |> | |
122 | +aid <- read_csv("../input/aid_type_modified.csv") |> | |
135 | 123 | complete_data() |> |
136 | 124 | mutate(aid_instrument_rom=tolower(aid_instrument_rom)) |
137 | 125 |
@@ -202,10 +190,10 @@ | ||
202 | 190 | ## slice(6:nrow(.)) |
203 | 191 | |
204 | 192 | |
205 | -covid <- read_excel("SA-Covid19.xlsx") |> | |
206 | - clean_names() | |
193 | +## covid <- read_excel("SA-Covid19.xlsx") |> | |
194 | +## clean_names() | |
207 | 195 | |
208 | -nace <- readRDS("../nace_codes/df_nace.RDS") |> | |
196 | +nace <- readRDS("../input/df_nace.RDS") |> | |
209 | 197 | select(-code) |
210 | 198 | |
211 | 199 | df_nace <- tibble(macro=seq_fixed_width(1:99,2), |
@@ -260,13 +248,19 @@ | ||
260 | 248 | |
261 | 249 | |
262 | 250 | |
263 | -save_excel(df.out, "romania_extra_tam.xlsx") | |
251 | +save_excel(df.out, "../output/romania_extra_tam.xlsx") | |
264 | 252 | |
265 | -write_csv(df.out, "romania_extra_tam.csv.gz") | |
266 | -write_csv(df.out, "romania_extra_tam.csv") | |
253 | +write_csv(df.out, "../output/romania_extra_tam.csv.gz") | |
254 | +write_csv(df.out, "../output/romania_extra_tam.csv") | |
267 | 255 | ## write_dta(df.out, "romania_extra_tam.dta") |
268 | -saveRDS(df.out, "romania_extra_tam.RDS") | |
256 | +saveRDS(df.out, "../output/romania_extra_tam.RDS") | |
269 | 257 | |
270 | -write_parquet(df.out, "romania_extra.parquet") | |
258 | +remove_file("../output/romania_extra.parquet") | |
259 | +write_parquet(df.out, "../output/romania_extra.parquet") | |
260 | + | |
261 | +remove_file("../../tam_arrow/input/parquet-files/romania/romania_extra.parquet") | |
262 | + | |
263 | +write_parquet(df.out, "../../tam_arrow/input/parquet-files/romania/romania_extra.parquet") | |
264 | + | |
271 | 265 | |
272 | 266 | print("So far so good") |