• R/O
  • HTTP
  • SSH
  • HTTPS

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

news4 - RSS aggrigation system


Commit MetaInfo

修订版2244893f997089e97f35e075de10ccad4cb33c5f (tree)
时间2012-11-07 05:33:20
作者hylom <hylom@hylo...>
Commiterhylom

Log Message

Merge branch 'master' into live

更改概述

差异

--- a/README
+++ b/README
@@ -14,9 +14,12 @@
1414  gnewsでサイトを生成するために、下記の設定ファイルが必要です。
1515
1616 === config.py ===
17- HTMLの生成先やサイト名、RSSの取得先などを設定するファイルです。PythonのDictionaryおよびArray形式で記述されています。
17+ HTMLの生成先やサイト名などを設定するファイルです。PythonのDictionaryおよびArray形式で記述されています。
1818  config.py.sampleをコピーしてconfig.pyを作成し、編集します。
1919
20+=== sources.ini ===
21+ RSSの取得先を設定するファイルです。ini形式で記述されています。セクション名が表示されるサイト名、urlパラメータがサイトのURL、sourcesパラメータがRSSの取得先、filtersが使用するフィルタ一覧(カンマ区切り)となります。
22+
2023 === install.conf ===
2124  関連ファイルのコピー先を指定するファイルです。
2225  install.conf.sampleをコピーしてintall.confを作成し、編集します。
--- a/config.py.sample
+++ b/config.py.sample
@@ -7,6 +7,7 @@ config = {
77 'output_directory': 'outputs',
88 'filter_directory': 'filters',
99 'pagination_unit': 20,
10+ 'log_level': 0,
1011 'index': {
1112 'template': 'index.tmpl.html',
1213 'output_directory': 'outputs/',
@@ -30,59 +31,7 @@ config = {
3031 'post_filters': [
3132 'cleanup',
3233 'trimming',
34+ 'remove_tracker'
3335 ],
3436 }
3537
36-target_rss = [
37- {
38- 'name': 'SourceForge.JP Magazine',
39- 'url': 'http://rss.rssad.jp/rss/sourceforge/magazine/rss',
40- 'source_url': 'http://sourceforge.jp/magazine/',
41- },
42- {
43- 'name': 'Slashdot Japan',
44- 'url': 'http://rss.rssad.jp/rss/slashdot/slashdot.rss',
45- 'source_url': 'http://slashdot.jp/',
46- 'filter': ['slashdotjp',],
47- },
48- {
49- 'name': 'ITmedia',
50- 'url': 'http://rss.rssad.jp/rss/itmtop/2.0/itmedia_all.xml',
51- 'source_url': 'http://www.itmedia.co.jp/',
52- 'filter': ['tagging', 'itmedia'],
53- },
54- {
55- 'name': 'So-netセキュリティ通信',
56- 'url': 'http://security-t.blog.so-net.ne.jp/index.xml',
57- 'source_url': 'http://security-t.blog.so-net.ne.jp/',
58- 'filter': ['tagging',],
59- },
60- {
61- 'name': 'Engadget Japanese',
62- 'url': 'http://japanese.engadget.com/rss.xml',
63- 'source_url': 'http://japanese.engadget.com/',
64- 'filter': ['tagging',],
65- },
66- {
67- 'name': 'ギズモード・ジャパン',
68- 'url': 'http://feeds.gizmodo.jp/rss/gizmodo/index.xml',
69- 'source_url': 'http://www.gizmodo.jp/',
70- 'filter': ['tagging',],
71- },
72- {
73- 'name': 'TechCrunch Japan',
74- 'url': 'http://jp.techcrunch.com/feed/',
75- 'source_url': 'http://jp.techcrunch.com/',
76- 'filter': ['tagging',],
77- },
78- ]
79-
80-
81-"""Template:
82- {
83- 'name': '',
84- 'url': '',
85- 'source_url': '',
86- 'filter': ['tagging',],
87- },
88-"""
--- /dev/null
+++ b/config2ini.py
@@ -0,0 +1,21 @@
1+#!/usr/bin/python
2+
3+from config import config as config, target_rss as target_rss
4+import ConfigParser
5+import sys
6+
7+def main():
8+ config = ConfigParser.SafeConfigParser()
9+ for item in target_rss:
10+ config.add_section(item["name"])
11+ config.set(item["name"], 'source', item["url"])
12+ config.set(item["name"], 'url', item["source_url"])
13+ if 'filter' in item:
14+ filters = ",".join(item["filter"])
15+ config.set(item["name"], 'filters', filters)
16+ config.write(sys.stdout)
17+
18+if __name__ == '__main__':
19+ main()
20+
21+
--- /dev/null
+++ b/configloader.py
@@ -0,0 +1,27 @@
1+# configloader.py
2+# -*- config: utf-8 -*-
3+
4+import ConfigParser
5+
6+CONFIG_FILE = 'sources.ini'
7+
8+def load():
9+ 'parse .ini file and create config object'
10+ config = ConfigParser.SafeConfigParser()
11+ fp = open(CONFIG_FILE, 'r')
12+ config.readfp(fp)
13+ fp.close()
14+ sources = []
15+ for section in config.sections():
16+ source = {}
17+ source["name"] = section
18+ source["source"] = config.get(section, 'source')
19+ source["url"] = config.get(section, 'url')
20+ if config.has_option(section, 'filters'):
21+ filters = config.get(section, 'filters').split(',')
22+ filters = [x.strip() for x in filters]
23+ source["filters"] = filters
24+ sources.append(source)
25+ return sources
26+
27+
--- a/css/gnews.css
+++ b/css/gnews.css
@@ -10,21 +10,33 @@ a {
1010 margin-bottom: 1em;
1111 }
1212
13+.entry-header .thumbnail {
14+ width: 100px;
15+ float: right;
16+ margin-left: 10px;
17+ margin-bottom: 10px;
18+ pagging: 1px;
19+}
20+
1321 .entry-continue {
1422 margin-bottom: 1em;
1523 }
1624 .entry-footer{
17- color: gray;
25+ color: #888;
1826 }
1927
2028 #site-header {
21- border-bottom: 1px solid gray;
29+ border-bottom: 1px solid #888;
2230 margin-bottom: 10px;
2331 }
2432
33+#site-header .last-update {
34+ color: #888;
35+}
36+
2537 #site-footer {
2638 margin-top: 10px;
27- color: gray;
39+ color: #888;
2840 text-align: center;
2941 }
3042
--- a/fetcher.py
+++ b/fetcher.py
@@ -5,7 +5,7 @@ import re
55
66 import feedparser
77 import dateutil.parser
8-from config import config as config, target_rss as target_rss
8+from config import config as config
99 from logger import log
1010
1111 class FeedFetcher(object):
@@ -15,13 +15,13 @@ class FeedFetcher(object):
1515
1616 def _fetch(self):
1717 'do fetch'
18- f = feedparser.parse(self._feed["url"])
18+ f = feedparser.parse(self._feed["source"])
1919 entries = []
2020 for e in f['entries']:
2121 entry = {
2222 # 'title': e.title.decode('utf8') if isinstance(e.title, str) else e.title,
2323 'title': e.title,
24- 'link': e.link,
24+ 'url': e.link,
2525 'body': e.description,
2626 'date': dateutil.parser.parse(e.updated),
2727 'feed': self._feed,
@@ -58,8 +58,8 @@ class FeedFetcher(object):
5858 entries = self._fetch()
5959 entries = self._apply_pre_filters(entries)
6060
61- if 'filter' in self._feed:
62- filters = self._feed.get('filter', None)
61+ if 'filters' in self._feed:
62+ filters = self._feed.get('filters', None)
6363 entries = self._apply_filters(filters, entries)
6464
6565 entries = self._apply_post_filters(entries)
--- /dev/null
+++ b/filters/remove_tracker.py
@@ -0,0 +1,15 @@
1+# remove images for tracking
2+# -*- coding: utf-8 -*-
3+
4+import re
5+
6+re_rssad_url = re.compile(r'^http://rss.rssad.jp/')
7+
8+def entry_filter(entry):
9+ if "images" in entry:
10+ for i in range(len(entry["images"])):
11+ if re_rssad_url.search(entry["images"][i]):
12+ entry["images"].pop(i)
13+
14+ return entry
15+
--- a/gnews.py
+++ b/gnews.py
@@ -1,20 +1,23 @@
11 #!/usr/bin/python
22 'gnews.py - google news clone'
33
4-from config import config as config, target_rss as target_rss
4+from config import config as config
55 import renderer
66 import fetcher
77 import os.path
88 import urllib
99 from logger import log
10+import configloader
11+
12+sources = configloader.load()
1013
1114 def main():
1215 "gnews's main function"
13- # TODO: argv check
16+# TODO: argv check
1417
1518 # fetch RSS feed
1619 entries = []
17- for feed in target_rss:
20+ for feed in sources:
1821 f = fetcher.FeedFetcher(feed)
1922 e = f.get_entries()
2023 entries.extend(e)
@@ -45,8 +48,14 @@ def main():
4548 for e in entries:
4649 log(e["date"])
4750
51+ call_plugin('pre_render', entries)
52+
4853 # do rendering
49- params = {'tags':tags, 'page':{}, 'sorted_tags':sorted_tags}
54+ params = {
55+ 'tags':tags,
56+ 'page':{},
57+ 'sorted_tags':sorted_tags
58+ }
5059
5160 # render index page
5261 do_rendering('index', 'index%s.html', entries, params)
@@ -57,11 +66,43 @@ def main():
5766 do_rendering('tags', tag + '%s.html', subentries, params)
5867
5968
69+def call_plugin(function_name, entries):
70+ "call plugin"
71+ for plugin in config['plugins']:
72+ mod = _get_plugin(plugin)
73+ f = mod.__getattribute__(function_name)
74+ f(entries)
75+
76+class PluginError(Exception):
77+ def __init__(self, value):
78+ self.value = value
79+ def __str__(self):
80+ return 'plugin "' + self.value + '" is not found.'
81+
82+def _get_plugin(plugin_name):
83+ 'load plugin by config settings'
84+
85+ # fallback when filter isn't defined
86+ if plugin_name is None:
87+ return lambda x:x
88+
89+ # import module
90+ mods = __import__(config['plugin_directory'],
91+ globals(),
92+ locals(),
93+ [plugin_name,])
94+ try:
95+ mod = mods.__getattribute__(plugin_name)
96+ except AttributeError:
97+ raise PluginError(plugin_name)
98+
99+ return mod
100+
60101
61102 def do_rendering(page_type, filename, entries, params):
62103 "rendering page"
63104
64- r = renderer.Renderer()
105+ r = renderer.Renderer(sources)
65106 tmpl = config[page_type]['template']
66107 output_dir = config[page_type]['output_directory']
67108
--- a/logger.py
+++ b/logger.py
@@ -1,6 +1,6 @@
11 'logger.py - log output utility'
22
3-from config import config, target_rss
3+from config import config
44
55 def log(*args):
66 "log helper function"
--- /dev/null
+++ b/plugins/hatebu_counter.py
@@ -0,0 +1,40 @@
1+#!/usr/bin/python
2+# -*- coding: utf-8
3+'plugin for hatena bookmark counter'
4+
5+#from __future__ import with_statement
6+
7+import xmlrpclib
8+import datetime
9+import time
10+import sys
11+
12+# see http://d.hatena.ne.jp/keyword/%a4%cf%a4%c6%a4%ca%a5%d6%a5%c3%a5%af%a5%de%a1%bc%a5%af%b7%ef%bf%f4%bc%e8%c6%c0API?kid=146686
13+
14+urls = []
15+counts = {}
16+
17+def pre_fetch():
18+ pass
19+
20+def pre_tag_aggregate(entries):
21+ pass
22+
23+def pre_render(entries):
24+ counts = []
25+ for i in range(0, len(entries), 50):
26+ urls = [x['url'] for x in entries[i:i+50]]
27+ c = _get_count(urls)
28+ for j in range(0, len(c)):
29+ entries[i+j]['url'] = c[j]
30+
31+def pre_quit(entries):
32+ pass
33+
34+def _get_count(urls):
35+ # urls can have max 50 items
36+ uri = "http://b.hatena.ne.jp/xmlrpc"
37+ server = xmlrpclib.ServerProxy(uri)
38+ t = server.bookmark.getCount(*urls)
39+ return t
40+
--- a/renderer.py
+++ b/renderer.py
@@ -7,17 +7,17 @@ from mako.lookup import TemplateLookup
77 from mako.exceptions import RichTraceback
88 import dateutil.parser
99
10-from config import config, target_rss
10+from config import config
1111 from propertizer import propertize
1212 from logger import log
1313
1414 def date_format(date):
15- #dt = dateutil.parser.parse(date)
1615 return date.strftime('%Y/%m/%d %H:%M')
1716
1817 class Renderer(object):
19- def __init__(self):
18+ def __init__(self, sources):
2019 self.template_dir = config['template_directory']
20+ self._sources = sources
2121
2222 def _get_template(self, template_name):
2323 'read template file'
@@ -38,8 +38,9 @@ class Renderer(object):
3838 'entries': entries,
3939 'params': params,
4040 'site': config['site_parameter'],
41- 'targets': target_rss,
41+ 'sources': self._sources,
4242 }
43+ kwargs['site']['last_update'] = datetime.datetime.utcnow()
4344 for key in kwargs:
4445 d = propertize(kwargs[key])
4546 kwargs[key] = d
--- /dev/null
+++ b/sources.ini.sample
@@ -0,0 +1,54 @@
1+[So-netセキュリティ通信]
2+url = http://security-t.blog.so-net.ne.jp/
3+source = http://security-t.blog.so-net.ne.jp/index.rdf
4+filters = tagging
5+
6+[Impress Watch]
7+url = http://www.watch.impress.co.jp/
8+source = http://rss.rssad.jp/rss/headline/headline.rdf
9+filters = tagging
10+
11+[japan.internet.com]
12+url = http://japan.internet.com/
13+source = http://rss.internetcom.jp/rss/japaninternetcom/index.rdf
14+filters = tagging
15+
16+[SourceForge.JP Magazine]
17+url = http://sourceforge.jp/magazine/
18+source = http://rss.rssad.jp/rss/sourceforge/magazine/rss
19+
20+[WIRED.jp]
21+url = http://wired.jp/
22+source = http://rss.rssad.jp/rss/h/wired/feed.rdf
23+filters = tagging
24+
25+[CNET Japan]
26+url = http://japan.cnet.com/
27+source = http://feeds.japan.cnet.com/rss/cnet/all.rdf
28+filters = tagging
29+
30+[TechCrunch Japan]
31+url = http://jp.techcrunch.com/
32+source = http://jp.techcrunch.com/feed/
33+filters = tagging
34+
35+[ギズモード・ジャパン]
36+url = http://www.gizmodo.jp/
37+source = http://feeds.gizmodo.jp/rss/gizmodo/index.xml
38+filters = tagging
39+
40+[Slashdot Japan]
41+url = http://slashdot.jp/
42+source = http://rss.rssad.jp/rss/slashdot/slashdot.rss
43+filters = slashdotjp
44+
45+[Engadget Japanese]
46+url = http://japanese.engadget.com/
47+source = http://japanese.engadget.com/rss.xml
48+filters = tagging
49+
50+[ITmedia]
51+url = http://www.itmedia.co.jp/
52+source = http://rss.rssad.jp/rss/itmtop/2.0/itmedia_all.xml
53+filters = tagging,itmedia
54+
--- a/templates/index.tmpl.html
+++ b/templates/index.tmpl.html
@@ -25,14 +25,17 @@ s.parentNode.insertBefore(ga, s);
2525 <div class="container">
2626
2727 <!-- タイトル -->
28- <div class="row">
29- <div class="span12">
30- <header id="site-header">
28+ <div class="row" id="site-header">
29+ <div class="span9">
30+ <header>
3131 <a href="${site.root}">
3232 <img id="sitelogo" src="${site.img_directory}/themesjp.png" alt="Themes.JP"> α
3333 </a>
3434 </header>
3535 </div>
36+ <div class="span3 last-update">
37+ last update: ${date_format(site.last_update)}
38+ </div>
3639 </div>
3740
3841 <!-- コンテンツ本体 -->
@@ -59,33 +62,32 @@ s.parentNode.insertBefore(ga, s);
5962 <div class="entry">
6063 <!-- ヘッダ -->
6164 <div class="entry-header">
65+ % if 'images' in entry and len(entry.images) > 0:
66+ <img class="thumbnail" src="${entry.images[0]}">
67+ % endif
6268 <h3>
63- <a href='${entry.link}' target="_blank_">${entry.title}</a>
69+ <a href='${entry.url}' target="_blank_">${entry.title}</a>
6470 </h3>
6571 </div>
6672
6773
6874 <!-- 本文テキスト -->
69- <div class="entry-body">${entry.body}</div>
75+ <div class="entry-body">
76+ ${entry.body}
77+ </div>
7078
7179 <!-- フッタ -->
7280 <div class="entry-footer">
7381 <div class="entry-continue">
74- <a href='${entry.link}'>[続きを読む]</a>
82+ <a href='${entry.url}'>[続きを読む]</a>
7583 </div>
7684 <div class="information">
77- <span>情報元:<a href='${entry.feed.source_url}'>${entry.feed.name}</a></span>
85+ <span>情報元:<a href='${entry.feed.url}'>${entry.feed.name}</a></span>
7886 <span>(${date_format(entry.date)})</span>
7987 <span>タグ:</span>
8088 % for tag in entry.tags:
8189 <span>${tag} </span>
8290 % endfor
83- % if 'images' in entry:
84- <span>画像:</span>
85- % for imgurl in entry.images:
86- <span><a href="${imgurl}">*</a></span>
87- % endfor
88- % endif
8991 </div>
9092 </div>
9193 </div>
@@ -115,8 +117,8 @@ s.parentNode.insertBefore(ga, s);
115117 <div class="feed-provider">
116118 <h3>情報提供サイト:</h3>
117119 <ul class="nav nav-pills nav-stacked">
118- % for item in targets:
119- <li><a href="${item.source_url}">${item.name}</a></li>
120+ % for item in sources:
121+ <li><a href="${item.source}">${item.name}</a></li>
120122 % endfor
121123 </ul>
122124 </div>