source: issm/trunk/scripts/DownloadExternalPackage.py@ 14310

Last change on this file since 14310 was 14310, checked in by Mathieu Morlighem, 12 years ago

merged trunk-jpl and trunk for revision 14308

  • Property svn:executable set to *
File size: 7.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: ISO-8859-1 -*-
3#
4# TODO: Improve upon extension recognition by checking for mismatches in found targets
5# and specified local file.
6#
7
8# imports {{{
9import os,sys,re
10import urllib
11from HTMLParser import HTMLParser
12from urllib import FancyURLopener
13# }}}
14class MyHTMLParser(HTMLParser): #{{{
15
16 def __init__(self, pattern):
17 HTMLParser.__init__(self)
18 self.matcher = re.compile(pattern)
19 self.targets = []
20
21 def handle_starttag(self, tag, attrs):
22 for i in attrs:
23 if "href" == i[0] and str(self.matcher.match(i[1])) != "None":
24 self.targets.append(i[1])
25#}}}
26def main(argv=None): # {{{
27 # Separates the URL into a directory and the file or pattern based on the
28 # last appearance of '/'.
29 if len(sys.argv) > 1:
30 pivot = sys.argv[1].rfind("/")
31 url = (sys.argv[1])[:pivot]
32 pivot += 1
33 find = (sys.argv[1])[pivot:]
34 else:
35 print "******************************************************************************************************************************"
36 print "* Invalid input! *"
37 print "* *"
38 print "* Try: 'DownloadExternalPackage.py url [localFile]' *"
39 print "* *"
40 print "* Where 'URL' is the URL with an explicit package name or the URL followed by the truncated package name. And 'localFile' is *"
41 print "* the file name (including extension) that you would like to save as. *"
42 print "* *"
43 print "* Examples: *"
44 print "* *"
45 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/petsc-2.3.2-p3.tar.gz' 'petsc-2.3.2-p3.tar.gz' *"
46 print "* *"
47 print "* This is the old style and the safest way to download a package. *"
48 print "* *"
49 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' 'libtool.tar.gz' *"
50 print "* *"
51 print "* This is the new style. For packages like 'Libtool', which we never expect to be using multiple versions, this will *"
52 print "* download the most recent version and save it as the generic 'libtool.tar.gz'. *"
53 print "* *"
54 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/gsl-1.' 'gsl-1.15.tar.gz' *"
55 print "* *"
56 print "* This is the new style. This is a demonstration of how this script can be used to disambiguate a package name if there *"
57 print "* are more than once package matching 'gsl-'. *"
58 print "* *"
59 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' *"
60 print "* *"
61 print "* This is the new style. This will download a package with 'libtool' as a prefix and save it as its canonical name. *"
62 print "* *"
63 print "* *"
64 print "******************************************************************************************************************************"
65
66 if len(sys.argv) > 2:
67 localFile=sys.argv[2]
68 print "Downloaded file will be saved as: " + localFile
69 else:
70 localFile = None
71 print "Downloaded file will saved with the same file name."
72
73
74 print "Looking for: " + find
75
76 # As an extra precaution, if no extension is given for a particular package
77 # such as '.../libtool', then ensure that files found are of appropriate
78 # file extensions.
79 #
80 # WARNING: The external packages directory includes executable binaries with
81 # '.exe' extensions. As such, '.exe' is an acceptable suffix, but this is
82 # inherently dangerous since this script can be used to download from any
83 # valid website. Furthermore, if an individual attempts a "man-in-the-middle"
84 # attack, then the user would be capable of downloading executables from
85 # an untrusted source.
86 pattern = find + "[\w.-]*(\.tar\.gz|tar\.gz2|tgz|zip|exe)?"
87 parser = MyHTMLParser(pattern)
88
89 # Creates a 'FancyURL' which allows the script to fail gracefully by catching
90 # HTTP error codes 30X and several 40X(where 'X' is a natural number).
91 urlObject = FancyURLopener()
92 obj = urlObject.open(url)
93 parser.feed(obj.read())
94
95 # If a file pattern was used to describe the file that should be downloaded,
96 # then there is the potential for multiple file matches. Currently, the script
97 # will detect this ambiguity and print out all the matches, while informing
98 # the user that he must refine his search.
99 #
100 # TODO: Prompt the user to select from a list his/her preferred target.
101 if len(parser.targets) > 1:
102 print "Could not resolve your download due to the number of hits."
103 print "Refine your search."
104 for i in parser.targets:
105 print i
106
107 elif len(parser.targets) == 1:
108 print "Found: " + parser.targets[0]
109 url += "/" + parser.targets[0]
110
111 if localFile is None:
112 if os.path.exists(parser.targets[0]):
113 print "File " + parser.targets[0] + " already exists and will not be downloaded..."
114 else:
115 urllib.urlretrieve(url, parser.targets[0])
116 print "File saved as: " + parser.targets[0]
117 else:
118 if os.path.exists(localFile):
119 print "File "+ localFile +" already exists and will not be downloaded..."
120 else:
121 if parser.targets[0] == localFile:
122 print "File found and destination match."
123 elif parser.matcher.match(localFile) != "None":
124 print "File found matches destination pattern."
125 else:
126 print "WARNING: the file found \'" + parser.targets[0] + "\' does not match \'" + localFile + "\'"
127 print "Ensure the downloaded version is suitable."
128
129 urllib.urlretrieve(url, localFile)
130 print "File saved as: " + localFile
131
132 else:
133 print "No matches found!"
134
135 obj.close()
136# End 'main' function. }}}
137
138if __name__ == "__main__":
139 main()
Note: See TracBrowser for help on using the repository browser.