修订版 | 4fb846d53255fe3ad23b90eb85f758a01021a4de (tree) |
---|---|
时间 | 2021-04-21 21:23:02 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
A code to fix some typos in the ISIS database about the allocated budgets.
@@ -0,0 +1,136 @@ | ||
1 | +rm(list=ls()) | |
2 | + | |
3 | +## last saved on Time-stamp: "2021-04-21 11:50:39 lorenzo" | |
4 | + | |
5 | + | |
6 | +library(tidyverse) | |
7 | +library(janitor) | |
8 | +library(lubridate) | |
9 | +library(openxlsx) | |
10 | +library(stringr) | |
11 | +library(stringi) | |
12 | + | |
13 | +source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R") | |
14 | + | |
15 | +my_sel <- c("SA.54376", | |
16 | + "SA.54375", | |
17 | + "SA.54042", | |
18 | + "SA.23842", | |
19 | + "SA.50760", | |
20 | + "SA.20961", | |
21 | + "SA.24091", | |
22 | + "SA.22989", | |
23 | + "SA.19238", | |
24 | + "SA.20857", | |
25 | + "SA.20875", | |
26 | + "SA.46894", | |
27 | + "SA.40171", | |
28 | + "SA.53525", | |
29 | + "SA.39399", | |
30 | + "SA.47205", | |
31 | + "SA.33134", | |
32 | + "SA.33384", | |
33 | + "SA.32125", | |
34 | + "SA.44840", | |
35 | + "SA.43756") | |
36 | + | |
37 | +df_isis_ini <- read.xlsx("SA-ISIS_full_extraction_DWH-258527.xlsx") %>% | |
38 | + as_tibble | |
39 | + | |
40 | +## df_isis_ini <- readRDS("scoreboard.RDS") | |
41 | + | |
42 | +df_isis <- df_isis_ini %>% | |
43 | + clean_data() %>% | |
44 | + mutate(aid_start_date=excel_numeric_to_date(aid_start_date), | |
45 | + aid_end_date=excel_numeric_to_date(aid_end_date) | |
46 | + ) | |
47 | + | |
48 | +df_isis_non_gber <- df_isis %>% | |
49 | + filter(procedure_type_code!="X", | |
50 | + !is.na(decision_date)) %>% | |
51 | + mutate(decision_date=excel_numeric_to_date(decision_date)) %>% | |
52 | + mutate(case_creation_date=excel_numeric_to_date(case_creation_date)) %>% | |
53 | + mutate(year=year(decision_date)) %>% | |
54 | + distinct(procedure_reference, .keep_all=T) %>% | |
55 | + mutate(primary_law=tolower(primary_law), | |
56 | + secondary_law=tolower(secondary_law)) | |
57 | + | |
58 | +my_filter <- df_isis_non_gber %>% | |
59 | + filter(grepl("environment", secondary_law)) %>% | |
60 | + pull(secondary_law) %>% | |
61 | + unique | |
62 | + | |
63 | +df_isis_non_gber <- df_isis_non_gber %>% | |
64 | + filter(secondary_law %in% c(my_filter)) | |
65 | + | |
66 | +df_isis_non_gber_budget <- df_isis_non_gber %>% | |
67 | + filter(!is.na(overall_aid_amount_eur)) %>% | |
68 | + distinct(case_reference, .keep_all=T) %>% | |
69 | + filter(decision_type_article %in% c("Decision not to raise objections" , | |
70 | + "Corrigendum" | |
71 | + )) | |
72 | + | |
73 | + | |
74 | +non_gber_stat_ms_budget <- df_isis_non_gber_budget %>% | |
75 | + ## mutate(year=year(decision_date)) %>% | |
76 | + group_by(year, member_state_long_name) %>% | |
77 | + summarise(allocated_mio_eur=sum(overall_aid_amount_eur)) %>% | |
78 | + ungroup | |
79 | + | |
80 | + | |
81 | +outliers <- detectOutlier(non_gber_stat_ms_budget$allocated_mio_eur) | |
82 | + | |
83 | + | |
84 | +test_outliers <- non_gber_stat_ms_budget %>% | |
85 | + filter(outliers!="no") %>% | |
86 | + arrange(desc(allocated_mio_eur)) | |
87 | + | |
88 | +save_excel(test_outliers, "outliers_to_check.xlsx") | |
89 | + | |
90 | + | |
91 | +test_outliers2 <- df_isis_non_gber %>% | |
92 | + filter(year %in% test_outliers$year, | |
93 | + member_state_long_name %in% test_outliers$member_state_long_name) %>% | |
94 | + arrange(desc(overall_aid_amount_eur)) | |
95 | + | |
96 | +save_excel(test_outliers2, "outliers_to_check2.xlsx") | |
97 | + | |
98 | + | |
99 | + | |
100 | +df_isis_corrections <- df_isis_non_gber %>% | |
101 | + filter(case_reference %in% my_sel) %>% | |
102 | + select(case_reference,procedure_reference , | |
103 | + case_title, case_creation_date, | |
104 | + overall_aid_amount_eur) %>% | |
105 | + arrange(desc(overall_aid_amount_eur)) %>% | |
106 | + mutate(amount_corrected=c(2042, | |
107 | + 1903, | |
108 | + 93.810483, | |
109 | + 70, | |
110 | + 30, | |
111 | + 12.547515, | |
112 | + 10.174779, | |
113 | + 10, | |
114 | + 7, | |
115 | + 2.630162, | |
116 | + 1, | |
117 | + 78.135/4.4904, | |
118 | + 30.95, | |
119 | + 3e4, | |
120 | + 22.1e3, | |
121 | + 20160, | |
122 | + 19.5, | |
123 | + 10450, | |
124 | + 8350, | |
125 | + 6783, | |
126 | + 6000 | |
127 | + )) %>% | |
128 | + mutate(comments=if_else(overall_aid_amount_eur==amount_corrected, | |
129 | + "further checks needed", "OK")) | |
130 | + | |
131 | + | |
132 | +save_excel(df_isis_corrections, "isis_corrections.xlsx") | |
133 | +write_csv(df_isis_corrections, "isis_corrections.csv") | |
134 | + | |
135 | + | |
136 | +print("So far so good") |