• R/O
  • SSH

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版060281ed69fad21d2b70bcdc347ec3fb771200ca (tree)
时间2017-11-06 07:06:03
作者Lorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

I have further developed the code. Now I can find the word following a
given initial word.

更改概述

差异

diff -r 938e846c3aae -r 060281ed69fa R-codes/text_mining.R
--- a/R-codes/text_mining.R Sun Nov 05 23:05:05 2017 +0100
+++ b/R-codes/text_mining.R Sun Nov 05 23:06:03 2017 +0100
@@ -21,7 +21,7 @@
2121
2222
2323
24-clean_monograms <- function(tidytext, stop_words){
24+clean_unigrams <- function(tidytext, stop_words){
2525
2626 tidytext %>% unnest_tokens(word, text)%>% anti_join(stop_words)
2727
@@ -48,7 +48,7 @@
4848
4949
5050
51-count_monograms <- function(tidytext, stop_words){
51+count_unigrams <- function(tidytext, stop_words){
5252
5353 tidytext %>% unnest_tokens(word, text)%>% anti_join(stop_words) %>% count(word, sort = TRUE)
5454
@@ -87,7 +87,7 @@
8787 stop_words <- rbind(stop_words, mystopwords)
8888
8989
90-text_kor_monogram <- clean_text("korea_extraction.txt") %>% clean_monograms(stop_words)
90+text_kor_unigram <- clean_text("korea_extraction.txt") %>% clean_unigrams(stop_words)
9191
9292
9393 text_kor_bigram <- clean_text("korea_extraction.txt") %>% clean_bigrams(stop_words)
@@ -100,7 +100,7 @@
100100
101101
102102
103-kor_monogram_count <- clean_text("korea_extraction.txt") %>% count_monograms(stop_words)
103+kor_unigram_count <- clean_text("korea_extraction.txt") %>% count_unigrams(stop_words)
104104
105105
106106 kor_bigram_count <- clean_text("korea_extraction.txt") %>% count_bigrams(stop_words)
@@ -110,11 +110,14 @@
110110 kor_trigram_count <- clean_text("korea_extraction.txt") %>% count_trigrams(stop_words)
111111
112112
113+#####################################################################
114+#####################################################################
115+#####################################################################
116+#####################################################################
113117
114118
115119
116-
117-ceta_monogram_count <- clean_text("ceta_extraction.txt") %>% count_monograms(stop_words)
120+ceta_unigram_count <- clean_text("ceta_extraction.txt") %>% count_unigrams(stop_words)
118121
119122
120123 ceta_bigram_count <- clean_text("ceta_extraction.txt") %>% count_bigrams(stop_words)
@@ -124,10 +127,28 @@
124127 ceta_trigram_count <- clean_text("ceta_extraction.txt") %>% count_trigrams(stop_words)
125128
126129
130+text_ceta_bigram <- clean_text("ceta_extraction.txt") %>% clean_bigrams(stop_words)
127131
128132
129133
130134
131135
132136
137+#####################################################################
138+#####################################################################
139+#####################################################################
140+#####################################################################
141+#####################################################################
142+
143+
144+parties_kor <- text_kor_bigram %>% filter(word1=="parties") %>% count(word1, word2, sort=T)
145+
146+
147+
148+parties_ceta <- text_ceta_bigram %>% filter(word1=="parties") %>% count(word1, word2, sort=T)
149+
150+
151+
152+
153+
133154 print("So far so good")