修订版 | cf3decb3c491db93bf7a07d5c93e86779b5f1664 (tree) |
---|---|
时间 | 2022-10-05 16:20:37 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
A script to find info about aid to semiconductors and rare earth elements relying on a number of text tools.
@@ -0,0 +1,129 @@ | ||
1 | +rm(list=ls()) | |
2 | + | |
3 | +library(tidyverse) | |
4 | +library(openxlsx) | |
5 | +library(janitor) | |
6 | +library(stringr) | |
7 | +library(furrr) | |
8 | +library(lubridate) | |
9 | + | |
10 | +source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R") | |
11 | + | |
12 | + | |
13 | +n_cores <- 3 | |
14 | + | |
15 | +plan(multicore(workers=return_cores(n_cores))) | |
16 | + | |
17 | + | |
18 | + | |
19 | + | |
20 | +df <- read_excel("list-materials-semiconductors.xlsx") |> | |
21 | + mutate(cases=str_trim(cases, side="both" )) |> | |
22 | + distinct() | |
23 | + | |
24 | +save_excel(df, "list-materials-semiconductors_no_duplicates.xlsx") | |
25 | + | |
26 | +df_sc_ini <- readRDS("scoreboard.RDS") | |
27 | + | |
28 | +df_sc <- df_sc_ini |> | |
29 | + filter(case_number %in% df$cases) |> | |
30 | + select(case_number, member_state, expenditure_year , | |
31 | + aid_element_eur) | |
32 | + | |
33 | + | |
34 | +df_sc_wide <- df_sc |> | |
35 | + group_by(member_state, case_number, expenditure_year) |> | |
36 | + summarise(aid_element_eur=sum(aid_element_eur, na.rm=T)) |> | |
37 | + ungroup() |> | |
38 | + ## select(member_state) |> | |
39 | + make_wide_with_total("expenditure_year" ,"aid_element_eur") | |
40 | + | |
41 | + | |
42 | + | |
43 | +save_excel(df, "semiconductors_materials.xlsx", "case_list") | |
44 | +save_excel(df_sc_wide, "semiconductors_materials.xlsx", "scoreboard_expenditure") | |
45 | + | |
46 | + | |
47 | +df_tam_ini <- readRDS("TAM_cleaned_for_shiny.RDS") |> | |
48 | + mutate(beneficiary_name=tolower(beneficiary_name)) | |
49 | + | |
50 | + | |
51 | +## tam_beneficiaries <- df_tam_ini |> | |
52 | +## filter(case_reference %in% df$cases) | |
53 | + | |
54 | +## beneficiaries_list <- tam_beneficiaries |> | |
55 | +## pull(beneficiary_name) |> | |
56 | +## su() | |
57 | + | |
58 | + | |
59 | +## tam_beneficiaries2 <- df_tam_ini |> | |
60 | +## filter(beneficiary_name %in% beneficiaries_list) |> | |
61 | +## distinct(beneficiary_name, case_reference ) |> | |
62 | +## mutate(scheme_in_original_list=if_else(case_reference %in% df$cases, "Yes", "No")) |> | |
63 | +## select(beneficiary_name, case_reference,scheme_in_original_list ) |> | |
64 | +## arrange(beneficiary_name, case_reference) | |
65 | + | |
66 | + | |
67 | +## save_excel(tam_beneficiaries2, "beneficiaries_and_other_SA.xlsx") | |
68 | + | |
69 | + | |
70 | +## df_tam <- df_tam_ini |> | |
71 | +## filter(case_reference %in% df$cases) |> | |
72 | +## distinct(year, case_reference, beneficiary_name ) | |
73 | + | |
74 | +## save_excel(df_tam, "TAM_semiconductors_raw_materials.xlsx") | |
75 | + | |
76 | + | |
77 | +keywords <- read_delim("words2.txt", delim="\n", col_names=F) |> | |
78 | + clean_data() |> | |
79 | + all_to_lower() |> | |
80 | + mutate(x1=str_trim(x1, side="both" )) |> | |
81 | + pull(x1) | |
82 | + | |
83 | +## tam_words <- find_text_multiple_keywords_exact_matches_future(df_tam_ini, keywords) | |
84 | + | |
85 | + | |
86 | + | |
87 | +## find_text_filter_exact_matches_col <- function(df, xx, tt){ | |
88 | +## res <- df |> | |
89 | +## filter( find_exact_matches(tt,{{xx}})) | |
90 | + | |
91 | + | |
92 | + | |
93 | +## return(res) | |
94 | +## } | |
95 | + | |
96 | + | |
97 | + | |
98 | +tam_words <- df_tam_ini |> | |
99 | + find_text_filter_exact_matches_col(beneficiary_name,keywords ) | |
100 | + | |
101 | + | |
102 | + | |
103 | + | |
104 | + | |
105 | +save_excel(tam_words, "tam_results.xlsx") | |
106 | + | |
107 | +tam_beneficiaries2 <- tam_words |> | |
108 | + distinct(case_reference, beneficiary_name, case_title) |> | |
109 | + mutate(case_number_in_list=if_else(case_reference %in% df$cases, "Yes", "No" )) | |
110 | + | |
111 | +save_excel(tam_beneficiaries2, "tam_final_keywords.xlsx") | |
112 | + | |
113 | + | |
114 | +tam_beneficiaries3 <- tam_words |> | |
115 | + mutate(year_of_award=year(aid_award_granted_date)) |> | |
116 | + group_by(case_reference, beneficiary_name, case_title, year_of_award) |> | |
117 | + summarise(estimated_amount_eur_mio=sum(granted_value_extended_eur, na.rm=T)/1e6) |> | |
118 | + ungroup() |> | |
119 | + | |
120 | + mutate(case_number_in_list=if_else(case_reference %in% df$cases, "Yes", "No" )) |> | |
121 | + make_wide_with_total("year_of_award","estimated_amount_eur_mio" ) | |
122 | + | |
123 | + | |
124 | + | |
125 | +save_excel(tam_beneficiaries3, "tam_final_keywords2.xlsx") | |
126 | + | |
127 | + | |
128 | + | |
129 | +print("So far so good") |