• R/O
  • SSH

提交

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修订版2d227eed6e79d0e063b7ffabe8b0dd27cb7bcfdc (tree)
时间2009-04-02 00:31:35
作者iselllo
Commiteriselllo

Log Message

A useful script to download arxiv preprints or to see the abstract or the authors. Try it with -h
to see the help.

更改概述

差异

diff -r 5b4ecfbe0f9d -r 2d227eed6e79 Python-codes/arxiv.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Python-codes/arxiv.py Wed Apr 01 15:31:35 2009 +0000
@@ -0,0 +1,252 @@
1+#! /usr/bin/python
2+
3+## arXiv script version 0.2
4+
5+## Copyright 2008 Tom Brown
6+
7+## This program is free software; you can redistribute it and/or
8+## modify it under the terms of the GNU General Public License as
9+## published by the Free Software Foundation; either version 3 of the
10+## License, or (at your option) any later version.
11+
12+## This program is distributed in the hope that it will be useful,
13+## but WITHOUT ANY WARRANTY; without even the implied warranty of
14+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+## GNU General Public License for more details.
16+
17+## You should have received a copy of the GNU General Public License
18+## along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
20+## See http://www.stringwiki.org/wiki/ArXiv_script for more usage
21+## instructions
22+
23+'''arXiv script
24+Usage:
25+python arxiv.py reference [ -htabcjdps ] [ --help ]
26+"reference" must be a standard arXiv reference, e.g. hep-th/9711200, 0705.0303.
27+Options:
28+-h, --help
29+displays this help message
30+-t
31+displays the title
32+-a
33+displays the author(s)
34+-b
35+displays the aBstract
36+-c
37+displays the comments
38+-j
39+displays the journal reference
40+-d
41+downloads the PDF
42+-p
43+downloads the PS
44+-s
45+downloads the source file
46+'''
47+
48+__version__ = "0.2"
49+__author__ = "Tom Brown"
50+__copyright__ = "Copyright 2008 Tom Brown, GNU GPL 3"
51+
52+
53+import sys, os, getopt, re, urllib,gzip
54+
55+
56+def findRefType(ref):
57+ ref = ref.replace('arxiv:','')
58+ if re.search(r'^[a-zA-Z\-]+/\d{7}$',ref):
59+ type = 'old-style eprint'
60+ elif re.search(r'^\d{7}$',ref):
61+ type = 'old-style eprint'
62+ ref = 'hep-th/' + ref
63+ elif re.search('^\d{4}\.\d{4}$',ref):
64+ type = 'new-style eprint'
65+ else:
66+ type = 'not arXiv'
67+
68+ return type, ref
69+
70+
71+
72+
73+def downloadPDF(ref,type,downloadPath):
74+ downloadPath = os.path.expanduser(downloadPath)
75+ if type == 'old-style eprint':
76+ urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
77+ elif type == 'new-style eprint':
78+ urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')
79+
80+
81+def downloadPS(ref,type,downloadPath):
82+ downloadPath = os.path.expanduser(downloadPath)
83+ filename = downloadPath + ref.replace('/','-')
84+ urllib.urlretrieve('http://arxiv.org/ps/' + ref, filename)
85+ gzipFile = gzip.GzipFile(filename)
86+ psFile = open(filename + ".ps","w")
87+ psFile.write(gzipFile.read())
88+ psFile.close()
89+ gzipFile.close()
90+ os.remove(filename)
91+
92+def downloadSource(ref,type,downloadPath):
93+ downloadPath = os.path.expanduser(downloadPath)
94+ filename = downloadPath + ref.replace('/','-')
95+ urllib.urlretrieve('http://arxiv.org/e-print/' + ref, filename + ".dum")
96+ gzipFile = gzip.GzipFile(filename + ".dum")
97+ sourceFile = open(filename,"w")
98+ sourceFile.write(gzipFile.read())
99+ sourceFile.close()
100+ gzipFile.close()
101+ os.remove(filename + ".dum")
102+
103+
104+def getTitle(html):
105+ title = html[html.find(">Title:</span>")+15:]
106+ title = title[:title.find("</h1>")]
107+ return title
108+
109+
110+def getAuthors(html):
111+ authors = html[html.find(">Authors:</span>"):]
112+ authors = authors[authors.find("\">")+2:]
113+ authors = authors[:authors.find("</div>")]
114+ authors = re.sub('<[^>]*>','',authors)
115+ authors = authors.replace("\n","")
116+ return authors
117+
118+
119+def getAbstract(html):
120+ abstract = html[html.find("Abstract:</span>")+17:]
121+ abstract = abstract[:abstract.find("</blockquote>")-1]
122+ return abstract
123+
124+def getComments(html):
125+ if html.count("comments") == 0:
126+ return "no comments"
127+ else:
128+ comments = html[html.find("comments\">")+10:]
129+ comments = comments[:comments.find("</td>")]
130+ return comments
131+
132+
133+def getJref(html):
134+ if html.count("jref") == 0:
135+ return "no journal reference"
136+ else:
137+ jref = html[html.find("jref\">")+6:]
138+ jref = jref[:jref.find("</td>")]
139+ return jref
140+
141+
142+
143+
144+if __name__ == "__main__":
145+
146+ authorOpt = 0
147+ titleOpt = 0
148+ abstractOpt = 0
149+ commentsOpt = 0
150+ jrefOpt = 0
151+ pdfOpt = 0
152+ psOpt = 0
153+ sourceOpt = 0
154+
155+ try:
156+ options, arguments = getopt.gnu_getopt(sys.argv[1:],
157+ 'hatbcjdpsv', ['help'])
158+ except getopt.error:
159+ print 'error: you tried to use an unknown option or the argument for an option that requires it was missing; try \'arxiv.py -h\' for more information'
160+ sys.exit(0)
161+
162+ for o,a in options:
163+ if o in ('-h','--help'):
164+ print __doc__
165+ sys.exit(0)
166+
167+ elif o == '-a':
168+ authorOpt = 1
169+
170+ elif o == '-t':
171+ titleOpt = 1
172+
173+ elif o == '-b':
174+ abstractOpt = 1
175+
176+ elif o == '-c':
177+ commentsOpt = 1
178+
179+ elif o == '-j':
180+ jrefOpt = 1
181+
182+ elif o == '-d':
183+ pdfOpt = 1
184+
185+ elif o == '-p':
186+ psOpt = 1
187+
188+ elif o == '-s':
189+ sourceOpt = 1
190+
191+
192+ if len(options) == 0:
193+ authorOpt = 1
194+ titleOpt = 1
195+ abstractOpt = 1
196+ commentsOpt = 1
197+ jrefOpt = 1
198+
199+
200+
201+ if len(arguments) != 1:
202+ print 'you didn\'t specify an arXiv reference; try \'arxiv.py -h\' for more information'
203+ sys.exit(0)
204+ else:
205+ ref=arguments[0]
206+
207+
208+
209+
210+
211+ type, ref = findRefType(ref)
212+
213+ if type=="not arXiv":
214+ print "type not of arXiv form"
215+ sys.exit(0)
216+
217+ if (authorOpt+titleOpt+abstractOpt+commentsOpt+jrefOpt > 0):
218+ htmlObject = urllib.urlopen('http://arxiv.org/abs/' + ref)
219+ html = htmlObject.read()
220+
221+ if titleOpt:
222+ title = getTitle(html)
223+ print title
224+
225+ if authorOpt:
226+ authors = getAuthors(html)
227+ print authors
228+
229+
230+ if abstractOpt:
231+ abstract = getAbstract(html)
232+ print abstract
233+
234+
235+ if commentsOpt:
236+ comments = getComments(html)
237+ print comments
238+
239+
240+ if jrefOpt:
241+ jref = getJref(html)
242+ print jref
243+
244+ if pdfOpt:
245+ downloadPDF(ref,type,"")
246+
247+ if psOpt:
248+ downloadPS(ref,type,"")
249+
250+ if sourceOpt:
251+ downloadSource(ref,type,"")
252+