| 1 | #!/usr/bin/env python | 
|---|
| 2 | # -*- coding: ISO-8859-1 -*- | 
|---|
| 3 | # | 
|---|
| 4 | # TODO: Improve upon extension recognition by checking for mismatches in found targets | 
|---|
| 5 | # and specified local file. | 
|---|
| 6 | # | 
|---|
| 7 |  | 
|---|
| 8 | # imports {{{ | 
|---|
| 9 | import os,sys,re | 
|---|
| 10 | import urllib | 
|---|
| 11 | from HTMLParser import HTMLParser | 
|---|
| 12 | from urllib import FancyURLopener | 
|---|
| 13 | # }}} | 
|---|
| 14 | class MyHTMLParser(HTMLParser): #{{{ | 
|---|
| 15 |  | 
|---|
| 16 | def __init__(self, pattern): | 
|---|
| 17 | HTMLParser.__init__(self) | 
|---|
| 18 | self.matcher = re.compile(pattern) | 
|---|
| 19 | self.targets = [] | 
|---|
| 20 |  | 
|---|
| 21 | def handle_starttag(self, tag, attrs): | 
|---|
| 22 | for i in attrs: | 
|---|
| 23 | if "href" == i[0] and str(self.matcher.match(i[1])) != "None": | 
|---|
| 24 | self.targets.append(i[1]) | 
|---|
| 25 | #}}} | 
|---|
| 26 | def main(argv=None): # {{{ | 
|---|
| 27 | # Separates the URL into a directory and the file or pattern based on the | 
|---|
| 28 | # last appearance of '/'. | 
|---|
| 29 | if len(sys.argv) > 1: | 
|---|
| 30 | pivot = sys.argv[1].rfind("/") | 
|---|
| 31 | url = (sys.argv[1])[:pivot] | 
|---|
| 32 | pivot += 1 | 
|---|
| 33 | find = (sys.argv[1])[pivot:] | 
|---|
| 34 | else: | 
|---|
| 35 | print "******************************************************************************************************************************" | 
|---|
| 36 | print "* Invalid input!                                                                                                             *" | 
|---|
| 37 | print "*                                                                                                                            *" | 
|---|
| 38 | print "* Try: 'DownloadExternalPackage.py url [localFile]'                                                                          *" | 
|---|
| 39 | print "*                                                                                                                            *" | 
|---|
| 40 | print "* Where 'URL' is the URL with an explicit package name or the URL followed by the truncated package name. And 'localFile' is *" | 
|---|
| 41 | print "* the file name (including extension) that you would like to save as.                                                        *" | 
|---|
| 42 | print "*                                                                                                                            *" | 
|---|
| 43 | print "* Examples:                                                                                                                  *" | 
|---|
| 44 | print "*                                                                                                                            *" | 
|---|
| 45 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/petsc-2.3.2-p3.tar.gz' 'petsc-2.3.2-p3.tar.gz' *" | 
|---|
| 46 | print "*                                                                                                                            *" | 
|---|
| 47 | print "*     This is the old style and the safest way to download a package.                                                        *" | 
|---|
| 48 | print "*                                                                                                                            *" | 
|---|
| 49 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' 'libtool.tar.gz'                      *" | 
|---|
| 50 | print "*                                                                                                                            *" | 
|---|
| 51 | print "*     This is the new style. For packages like 'Libtool', which we never expect to be using multiple versions, this will     *" | 
|---|
| 52 | print "*     download the most recent version and save it as the generic 'libtool.tar.gz'.                                          *" | 
|---|
| 53 | print "*                                                                                                                            *" | 
|---|
| 54 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/gsl-1.' 'gsl-1.15.tar.gz'                      *" | 
|---|
| 55 | print "*                                                                                                                            *" | 
|---|
| 56 | print "*     This is the new style. This is a demonstration of how this script can be used to disambiguate a package name if there  *" | 
|---|
| 57 | print "*     are more than once package matching 'gsl-'.                                                                            *" | 
|---|
| 58 | print "*                                                                                                                            *" | 
|---|
| 59 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool'                                       *" | 
|---|
| 60 | print "*                                                                                                                            *" | 
|---|
| 61 | print "*     This is the new style. This will download a package with 'libtool' as a prefix and save it as its canonical name.      *" | 
|---|
| 62 | print "*                                                                                                                            *" | 
|---|
| 63 | print "*                                                                                                                            *" | 
|---|
| 64 | print "******************************************************************************************************************************" | 
|---|
| 65 |  | 
|---|
| 66 | if len(sys.argv) > 2: | 
|---|
| 67 | localFile=sys.argv[2] | 
|---|
| 68 | print "Downloaded file will be saved as: " + localFile | 
|---|
| 69 | else: | 
|---|
| 70 | localFile = None | 
|---|
| 71 | print "Downloaded file will saved with the same file name." | 
|---|
| 72 |  | 
|---|
| 73 |  | 
|---|
| 74 | print "Looking for: " + find | 
|---|
| 75 |  | 
|---|
| 76 | # As an extra precaution, if no extension is given for a particular package | 
|---|
| 77 | # such as '.../libtool', then ensure that files found are of appropriate | 
|---|
| 78 | # file extensions. | 
|---|
| 79 | # | 
|---|
| 80 | # WARNING: The external packages directory includes executable binaries with | 
|---|
| 81 | # '.exe' extensions. As such, '.exe' is an acceptable suffix, but this is | 
|---|
| 82 | # inherently dangerous since this script can be used to download from any | 
|---|
| 83 | # valid website. Furthermore, if an individual attempts a "man-in-the-middle" | 
|---|
| 84 | # attack, then the user would be capable of downloading executables from | 
|---|
| 85 | # an untrusted source. | 
|---|
| 86 | pattern = find + "[\w.-]*(\.tar\.gz|tar\.gz2|tgz|zip|exe)?" | 
|---|
| 87 | parser = MyHTMLParser(pattern) | 
|---|
| 88 |  | 
|---|
| 89 | # Creates a 'FancyURL' which allows the script to fail gracefully by catching | 
|---|
| 90 | # HTTP error codes 30X and several 40X(where 'X' is a natural number). | 
|---|
| 91 | urlObject = FancyURLopener() | 
|---|
| 92 | obj = urlObject.open(url) | 
|---|
| 93 | parser.feed(obj.read()) | 
|---|
| 94 |  | 
|---|
| 95 | # If a file pattern was used to describe the file that should be downloaded, | 
|---|
| 96 | # then there is the potential for multiple file matches. Currently, the script | 
|---|
| 97 | # will detect this ambiguity and print out all the matches, while informing | 
|---|
| 98 | # the user that he must refine his search. | 
|---|
| 99 | # | 
|---|
| 100 | # TODO: Prompt the user to select from a list his/her preferred target. | 
|---|
| 101 | if len(parser.targets) > 1: | 
|---|
| 102 | print "Could not resolve your download due to the number of hits." | 
|---|
| 103 | print "Refine your search." | 
|---|
| 104 | for i in parser.targets: | 
|---|
| 105 | print i | 
|---|
| 106 |  | 
|---|
| 107 | elif len(parser.targets) == 1: | 
|---|
| 108 | print "Found: " + parser.targets[0] | 
|---|
| 109 | url += "/" + parser.targets[0] | 
|---|
| 110 |  | 
|---|
| 111 | if localFile is None: | 
|---|
| 112 | if os.path.exists(parser.targets[0]): | 
|---|
| 113 | print "File " + parser.targets[0] + " already exists and will not be downloaded..." | 
|---|
| 114 | else: | 
|---|
| 115 | urllib.urlretrieve(url, parser.targets[0]) | 
|---|
| 116 | print "File saved as: " + parser.targets[0] | 
|---|
| 117 | else: | 
|---|
| 118 | if os.path.exists(localFile): | 
|---|
| 119 | print "File "+ localFile +" already exists and will not be downloaded..." | 
|---|
| 120 | else: | 
|---|
| 121 | if parser.targets[0] == localFile: | 
|---|
| 122 | print "File found and destination match." | 
|---|
| 123 | elif parser.matcher.match(localFile) != "None": | 
|---|
| 124 | print "File found matches destination pattern." | 
|---|
| 125 | else: | 
|---|
| 126 | print "WARNING: the file found \'" + parser.targets[0] + "\' does not match \'" + localFile + "\'" | 
|---|
| 127 | print "Ensure the downloaded version is suitable." | 
|---|
| 128 |  | 
|---|
| 129 | urllib.urlretrieve(url, localFile) | 
|---|
| 130 | print "File saved as: " + localFile | 
|---|
| 131 |  | 
|---|
| 132 | else: | 
|---|
| 133 | print "No matches found!" | 
|---|
| 134 |  | 
|---|
| 135 | obj.close() | 
|---|
| 136 | # End 'main' function. }}} | 
|---|
| 137 |  | 
|---|
| 138 | if __name__ == "__main__": | 
|---|
| 139 | main() | 
|---|