1 | #!/usr/bin/env python
|
---|
2 | # -*- coding: ISO-8859-1 -*-
|
---|
3 | #
|
---|
4 | # TODO: Improve upon extension recognition by checking for mismatches in found targets
|
---|
5 | # and specified local file.
|
---|
6 | #
|
---|
7 |
|
---|
8 | # imports {{{
|
---|
9 | import os,sys,re
|
---|
10 | import urllib
|
---|
11 | from HTMLParser import HTMLParser
|
---|
12 | from urllib import FancyURLopener
|
---|
13 | # }}}
|
---|
14 | class MyHTMLParser(HTMLParser): #{{{
|
---|
15 |
|
---|
16 | def __init__(self, pattern):
|
---|
17 | HTMLParser.__init__(self)
|
---|
18 | self.matcher = re.compile(pattern)
|
---|
19 | self.targets = []
|
---|
20 |
|
---|
21 | def handle_starttag(self, tag, attrs):
|
---|
22 | for i in attrs:
|
---|
23 | if "href" == i[0] and str(self.matcher.match(i[1])) != "None":
|
---|
24 | self.targets.append(i[1])
|
---|
25 | #}}}
|
---|
26 | def main(argv=None): # {{{
|
---|
27 | # Separates the URL into a directory and the file or pattern based on the
|
---|
28 | # last appearance of '/'.
|
---|
29 | if len(sys.argv) > 1:
|
---|
30 | pivot = sys.argv[1].rfind("/")
|
---|
31 | url = (sys.argv[1])[:pivot]
|
---|
32 | pivot += 1
|
---|
33 | find = (sys.argv[1])[pivot:]
|
---|
34 | else:
|
---|
35 | print "******************************************************************************************************************************"
|
---|
36 | print "* Invalid input! *"
|
---|
37 | print "* *"
|
---|
38 | print "* Try: 'DownloadExternalPackage.py url [localFile]' *"
|
---|
39 | print "* *"
|
---|
40 | print "* Where 'URL' is the URL with an explicit package name or the URL followed by the truncated package name. And 'localFile' is *"
|
---|
41 | print "* the file name (including extension) that you would like to save as. *"
|
---|
42 | print "* *"
|
---|
43 | print "* Examples: *"
|
---|
44 | print "* *"
|
---|
45 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/petsc-2.3.2-p3.tar.gz' 'petsc-2.3.2-p3.tar.gz' *"
|
---|
46 | print "* *"
|
---|
47 | print "* This is the old style and the safest way to download a package. *"
|
---|
48 | print "* *"
|
---|
49 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' 'libtool.tar.gz' *"
|
---|
50 | print "* *"
|
---|
51 | print "* This is the new style. For packages like 'Libtool', which we never expect to be using multiple versions, this will *"
|
---|
52 | print "* download the most recent version and save it as the generic 'libtool.tar.gz'. *"
|
---|
53 | print "* *"
|
---|
54 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/gsl-1.' 'gsl-1.15.tar.gz' *"
|
---|
55 | print "* *"
|
---|
56 | print "* This is the new style. This is a demonstration of how this script can be used to disambiguate a package name if there *"
|
---|
57 | print "* are more than once package matching 'gsl-'. *"
|
---|
58 | print "* *"
|
---|
59 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' *"
|
---|
60 | print "* *"
|
---|
61 | print "* This is the new style. This will download a package with 'libtool' as a prefix and save it as its canonical name. *"
|
---|
62 | print "* *"
|
---|
63 | print "* *"
|
---|
64 | print "******************************************************************************************************************************"
|
---|
65 |
|
---|
66 | if len(sys.argv) > 2:
|
---|
67 | localFile=sys.argv[2]
|
---|
68 | print "Downloaded file will be saved as: " + localFile
|
---|
69 | else:
|
---|
70 | localFile = None
|
---|
71 | print "Downloaded file will saved with the same file name."
|
---|
72 |
|
---|
73 |
|
---|
74 | print "Looking for: " + find
|
---|
75 |
|
---|
76 | # As an extra precaution, if no extension is given for a particular package
|
---|
77 | # such as '.../libtool', then ensure that files found are of appropriate
|
---|
78 | # file extensions.
|
---|
79 | #
|
---|
80 | # WARNING: The external packages directory includes executable binaries with
|
---|
81 | # '.exe' extensions. As such, '.exe' is an acceptable suffix, but this is
|
---|
82 | # inherently dangerous since this script can be used to download from any
|
---|
83 | # valid website. Furthermore, if an individual attempts a "man-in-the-middle"
|
---|
84 | # attack, then the user would be capable of downloading executables from
|
---|
85 | # an untrusted source.
|
---|
86 | pattern = find + "[\w.-]*(\.tar\.gz|tar\.gz2|tgz|zip|exe)?"
|
---|
87 | parser = MyHTMLParser(pattern)
|
---|
88 |
|
---|
89 | # Creates a 'FancyURL' which allows the script to fail gracefully by catching
|
---|
90 | # HTTP error codes 30X and several 40X(where 'X' is a natural number).
|
---|
91 | urlObject = FancyURLopener()
|
---|
92 | obj = urlObject.open(url)
|
---|
93 | parser.feed(obj.read())
|
---|
94 |
|
---|
95 | # If a file pattern was used to describe the file that should be downloaded,
|
---|
96 | # then there is the potential for multiple file matches. Currently, the script
|
---|
97 | # will detect this ambiguity and print out all the matches, while informing
|
---|
98 | # the user that he must refine his search.
|
---|
99 | #
|
---|
100 | # TODO: Prompt the user to select from a list his/her preferred target.
|
---|
101 | if len(parser.targets) > 1:
|
---|
102 | print "Could not resolve your download due to the number of hits."
|
---|
103 | print "Refine your search."
|
---|
104 | for i in parser.targets:
|
---|
105 | print i
|
---|
106 |
|
---|
107 | elif len(parser.targets) == 1:
|
---|
108 | print "Found: " + parser.targets[0]
|
---|
109 | url += "/" + parser.targets[0]
|
---|
110 |
|
---|
111 | if localFile is None:
|
---|
112 | if os.path.exists(parser.targets[0]):
|
---|
113 | print "File " + parser.targets[0] + " already exists and will not be downloaded..."
|
---|
114 | else:
|
---|
115 | urllib.urlretrieve(url, parser.targets[0])
|
---|
116 | print "File saved as: " + parser.targets[0]
|
---|
117 | else:
|
---|
118 | if os.path.exists(localFile):
|
---|
119 | print "File "+ localFile +" already exists and will not be downloaded..."
|
---|
120 | else:
|
---|
121 | if parser.targets[0] == localFile:
|
---|
122 | print "File found and destination match."
|
---|
123 | elif parser.matcher.match(localFile) != "None":
|
---|
124 | print "File found matches destination pattern."
|
---|
125 | else:
|
---|
126 | print "WARNING: the file found \'" + parser.targets[0] + "\' does not match \'" + localFile + "\'"
|
---|
127 | print "Ensure the downloaded version is suitable."
|
---|
128 |
|
---|
129 | urllib.urlretrieve(url, localFile)
|
---|
130 | print "File saved as: " + localFile
|
---|
131 |
|
---|
132 | else:
|
---|
133 | print "No matches found!"
|
---|
134 |
|
---|
135 | obj.close()
|
---|
136 | # End 'main' function. }}}
|
---|
137 |
|
---|
138 | if __name__ == "__main__":
|
---|
139 | main()
|
---|