• R/O
  • SSH

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版4fb846d53255fe3ad23b90eb85f758a01021a4de (tree)
时间2021-04-21 21:23:02
作者Lorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

A code to fix some typos in the ISIS database about the allocated budgets.

更改概述

差异

diff -r 85c19e4289cb -r 4fb846d53255 R-codes/correct_isis.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/R-codes/correct_isis.R Wed Apr 21 14:23:02 2021 +0200
@@ -0,0 +1,136 @@
1+rm(list=ls())
2+
3+## last saved on Time-stamp: "2021-04-21 11:50:39 lorenzo"
4+
5+
6+library(tidyverse)
7+library(janitor)
8+library(lubridate)
9+library(openxlsx)
10+library(stringr)
11+library(stringi)
12+
13+source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
14+
15+my_sel <- c("SA.54376",
16+ "SA.54375",
17+ "SA.54042",
18+ "SA.23842",
19+ "SA.50760",
20+ "SA.20961",
21+ "SA.24091",
22+ "SA.22989",
23+ "SA.19238",
24+ "SA.20857",
25+ "SA.20875",
26+ "SA.46894",
27+ "SA.40171",
28+ "SA.53525",
29+ "SA.39399",
30+ "SA.47205",
31+ "SA.33134",
32+ "SA.33384",
33+ "SA.32125",
34+ "SA.44840",
35+ "SA.43756")
36+
37+df_isis_ini <- read.xlsx("SA-ISIS_full_extraction_DWH-258527.xlsx") %>%
38+ as_tibble
39+
40+## df_isis_ini <- readRDS("scoreboard.RDS")
41+
42+df_isis <- df_isis_ini %>%
43+ clean_data() %>%
44+ mutate(aid_start_date=excel_numeric_to_date(aid_start_date),
45+ aid_end_date=excel_numeric_to_date(aid_end_date)
46+ )
47+
48+df_isis_non_gber <- df_isis %>%
49+ filter(procedure_type_code!="X",
50+ !is.na(decision_date)) %>%
51+ mutate(decision_date=excel_numeric_to_date(decision_date)) %>%
52+ mutate(case_creation_date=excel_numeric_to_date(case_creation_date)) %>%
53+ mutate(year=year(decision_date)) %>%
54+ distinct(procedure_reference, .keep_all=T) %>%
55+ mutate(primary_law=tolower(primary_law),
56+ secondary_law=tolower(secondary_law))
57+
58+my_filter <- df_isis_non_gber %>%
59+ filter(grepl("environment", secondary_law)) %>%
60+ pull(secondary_law) %>%
61+ unique
62+
63+df_isis_non_gber <- df_isis_non_gber %>%
64+ filter(secondary_law %in% c(my_filter))
65+
66+df_isis_non_gber_budget <- df_isis_non_gber %>%
67+ filter(!is.na(overall_aid_amount_eur)) %>%
68+ distinct(case_reference, .keep_all=T) %>%
69+ filter(decision_type_article %in% c("Decision not to raise objections" ,
70+ "Corrigendum"
71+ ))
72+
73+
74+non_gber_stat_ms_budget <- df_isis_non_gber_budget %>%
75+ ## mutate(year=year(decision_date)) %>%
76+ group_by(year, member_state_long_name) %>%
77+ summarise(allocated_mio_eur=sum(overall_aid_amount_eur)) %>%
78+ ungroup
79+
80+
81+outliers <- detectOutlier(non_gber_stat_ms_budget$allocated_mio_eur)
82+
83+
84+test_outliers <- non_gber_stat_ms_budget %>%
85+ filter(outliers!="no") %>%
86+ arrange(desc(allocated_mio_eur))
87+
88+save_excel(test_outliers, "outliers_to_check.xlsx")
89+
90+
91+test_outliers2 <- df_isis_non_gber %>%
92+ filter(year %in% test_outliers$year,
93+ member_state_long_name %in% test_outliers$member_state_long_name) %>%
94+ arrange(desc(overall_aid_amount_eur))
95+
96+save_excel(test_outliers2, "outliers_to_check2.xlsx")
97+
98+
99+
100+df_isis_corrections <- df_isis_non_gber %>%
101+ filter(case_reference %in% my_sel) %>%
102+ select(case_reference,procedure_reference ,
103+ case_title, case_creation_date,
104+ overall_aid_amount_eur) %>%
105+ arrange(desc(overall_aid_amount_eur)) %>%
106+ mutate(amount_corrected=c(2042,
107+ 1903,
108+ 93.810483,
109+ 70,
110+ 30,
111+ 12.547515,
112+ 10.174779,
113+ 10,
114+ 7,
115+ 2.630162,
116+ 1,
117+ 78.135/4.4904,
118+ 30.95,
119+ 3e4,
120+ 22.1e3,
121+ 20160,
122+ 19.5,
123+ 10450,
124+ 8350,
125+ 6783,
126+ 6000
127+ )) %>%
128+ mutate(comments=if_else(overall_aid_amount_eur==amount_corrected,
129+ "further checks needed", "OK"))
130+
131+
132+save_excel(df_isis_corrections, "isis_corrections.xlsx")
133+write_csv(df_isis_corrections, "isis_corrections.csv")
134+
135+
136+print("So far so good")