修订版 | 58bdda60df9632ec816ae58dd167966cd2869f88 (tree) |
---|---|
时间 | 2024-08-30 17:13:08 |
作者 | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
I added an automatic exploratory data analysis.
@@ -1,23 +1,28 @@ | ||
1 | 1 | rm(list=ls()) |
2 | -## last saved on Time-stamp: "2024-01-12 10:43:01 lorenzo" | |
2 | +## last saved on Time-stamp: "2024-08-30 10:09:30 lorenzo" | |
3 | 3 | |
4 | 4 | |
5 | 5 | library(tidyverse) |
6 | 6 | library(janitor) |
7 | 7 | library(openxlsx) |
8 | - | |
8 | +library(DataExplorer) | |
9 | 9 | |
10 | 10 | |
11 | 11 | source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R") |
12 | 12 | |
13 | -df<- read.xlsx("../input/scb_data_for_figures_2-22_top.xlsx") |> | |
14 | - as_tibble() |> | |
13 | +## df<- read.xlsx("../input/scb_data_for_figures-24-01-2024.xlsx") |> | |
14 | +## as_tibble() |> | |
15 | +## clean_names() | |
16 | +## ## clean_data() | |
17 | + | |
18 | +df<- read_csv("../input/scb_data_for_figures_LI.zip") |> | |
19 | + ## as_tibble() |> | |
15 | 20 | clean_names() |
16 | - ## clean_data() | |
17 | 21 | |
18 | -saveRDS(df, "scoreboard.RDS") | |
19 | 22 | |
20 | -write_tsv(df, "scoreboard.tsv.gz") | |
23 | +saveRDS(df, "../input/scoreboard.RDS") | |
24 | + | |
25 | +## write_tsv(df, "scoreboard.tsv.gz") | |
21 | 26 | |
22 | 27 | ## df_summary <- df %>% |
23 | 28 | ## group_by(year, member_state) %>% |
@@ -29,4 +34,36 @@ | ||
29 | 34 | |
30 | 35 | ## saveRDS(df_summary, "scoreboard_aggregated_expenditure.RDS") |
31 | 36 | |
37 | + | |
38 | + | |
39 | + | |
40 | +config <- list( | |
41 | + "introduce" = list(), | |
42 | + "plot_intro" = list(), | |
43 | + "plot_str" = list( | |
44 | + "type" = "diagonal", | |
45 | + "fontSize" = 35, | |
46 | + "width" = 1000, | |
47 | + "margin" = list("left" = 350, "right" = 250) | |
48 | + ), | |
49 | + "plot_missing" = list(), | |
50 | + "plot_histogram" = list(), | |
51 | + "plot_density" = list(), | |
52 | + "plot_qq" = list(sampled_rows = 1000L), | |
53 | + "plot_bar" = list(), | |
54 | + "plot_correlation" = list("cor_args" = list("use" = "pairwise.complete.obs")), | |
55 | + "plot_prcomp" = list(), | |
56 | + "plot_boxplot" = list(), | |
57 | + "plot_scatterplot" = list(sampled_rows = 1000L) | |
58 | +) | |
59 | + | |
60 | + | |
61 | +create_report(df ,output_file = "scoreboard_report.html", | |
62 | + output_dir="../output" , | |
63 | + config = config | |
64 | + ) | |
65 | + | |
66 | + | |
67 | + | |
68 | + | |
32 | 69 | print("So far so good") |