• R/O
  • SSH

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版160d2f95fe605f31e3a4822172381ce887262ee1 (tree)
时间2022-03-16 18:53:51
作者Lorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

A script using several cores to read the csv files in different locations.

更改概述

差异

diff -r b6bd1c6d19bc -r 160d2f95fe60 R-codes/read_all_polish_tam_no_filters.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/R-codes/read_all_polish_tam_no_filters.R Wed Mar 16 10:53:51 2022 +0100
@@ -0,0 +1,99 @@
1+rm(list=ls())
2+
3+library(tidyverse)
4+library(janitor)
5+library(openxlsx)
6+library(stringr)
7+library(lubridate)
8+library(stringi)
9+library(future)
10+library(furrr)
11+library(openxlsx)
12+
13+
14+source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
15+
16+
17+
18+
19+read_large <- function(file_pos){
20+
21+ m.out <- paste("Processing ", file_pos, sep="")
22+ print(m.out)
23+
24+ res <- read_csv(file_pos) %>%
25+ clean_names() %>%
26+ ## all_to_char() %>%
27+ select(contains(c("nominalna", "brutto"))) %>%
28+ mutate(across(contains(c("nominalna", "brutto")), ~as.numeric(.x))) ## %>%
29+ ## filter(brutto_eur>=1e5)
30+
31+
32+return(res)
33+
34+
35+
36+
37+}
38+
39+#########################################################################
40+
41+
42+
43+n_cores <- 6
44+
45+plan(multicore(workers=return_cores(n_cores)))
46+
47+
48+
49+
50+df_sa <- read_excel("SA-Case_Full_Extract_Case_EC-608096.xlsx") %>%
51+ clean_data() %>%
52+ date_to_date()
53+
54+tf_cases <- df_sa %>%
55+ filter(internal_qualifier_s=="Covid19-TF") %>%
56+ pull(case_reference)
57+
58+
59+
60+
61+mypath <- "./data1/text_conversion/"
62+mypath2 <- "./data2/text_conversion/"
63+
64+flist <- list.files(path=mypath , pattern= "*csv")
65+flist2 <- list.files(path=mypath2 , pattern= "*csv")
66+
67+
68+flist_ext <- paste(mypath, flist, sep="")
69+
70+flist_ext2 <- paste(mypath2, flist2, sep="")
71+
72+
73+
74+df_filtered1 <- future_map_dfr(flist_ext, read_large)
75+
76+
77+df_filtered2 <- future_map_dfr(flist_ext2, read_large)
78+
79+
80+df.out <- bind_rows(df_filtered1, df_filtered2) ## %>%
81+ ## convert_to_utf_all() ## %>%
82+ ## distinct(akt_data, pkd_kod, ustawa_kod,dzien_udzelenia_pomocy,identyfikator,
83+ ## .keep_all=T) %>%
84+ ## mutate(date=parse_date_time(akt_data, c("dmy", "ymd"))) %>%
85+ ## mutate(year=year(date)) %>%
86+ ## filter(year %in% c(2019, 2020)) %>%
87+ ## mutate(case_reference=substrLeft(srodekpomoc_nr,8)) %>%
88+ ## mutate(covid19_TF=if_else(case_reference %in% tf_cases,
89+ ## "yes", "no")) ## %>%
90+ ## filter(covid19_TF=="yes" & brutto_eur>=500e3 |
91+ ## covid19_TF=="no" & brutto_eur>=100e3)
92+
93+
94+
95+saveRDS(df.out, "all_PL_tam.RDS")
96+
97+
98+
99+print("So far so good")