| 1 | #!/usr/bin/env python
|
|---|
| 2 | # -*- coding: ISO-8859-1 -*-
|
|---|
| 3 | #
|
|---|
| 4 | # TODO: Improve upon extension recognition by checking for mismatches in found targets
|
|---|
| 5 | # and specified local file.
|
|---|
| 6 | #
|
|---|
| 7 |
|
|---|
| 8 | # imports {{{
|
|---|
| 9 | import os,sys,re
|
|---|
| 10 | import urllib
|
|---|
| 11 | from HTMLParser import HTMLParser
|
|---|
| 12 | from urllib import FancyURLopener
|
|---|
| 13 | # }}}
|
|---|
| 14 | class MyHTMLParser(HTMLParser): #{{{
|
|---|
| 15 |
|
|---|
| 16 | def __init__(self, pattern):
|
|---|
| 17 | HTMLParser.__init__(self)
|
|---|
| 18 | self.matcher = re.compile(pattern)
|
|---|
| 19 | self.targets = []
|
|---|
| 20 |
|
|---|
| 21 | def handle_starttag(self, tag, attrs):
|
|---|
| 22 | for i in attrs:
|
|---|
| 23 | if "href" == i[0] and str(self.matcher.match(i[1])) != "None":
|
|---|
| 24 | self.targets.append(i[1])
|
|---|
| 25 | #}}}
|
|---|
| 26 | def main(argv=None): # {{{
|
|---|
| 27 | # Separates the URL into a directory and the file or pattern based on the
|
|---|
| 28 | # last appearance of '/'.
|
|---|
| 29 | if len(sys.argv) > 1:
|
|---|
| 30 | pivot = sys.argv[1].rfind("/")
|
|---|
| 31 | url = (sys.argv[1])[:pivot]
|
|---|
| 32 | pivot += 1
|
|---|
| 33 | find = (sys.argv[1])[pivot:]
|
|---|
| 34 | else:
|
|---|
| 35 | print "******************************************************************************************************************************"
|
|---|
| 36 | print "* Invalid input! *"
|
|---|
| 37 | print "* *"
|
|---|
| 38 | print "* Try: 'DownloadExternalPackage.py url [localFile]' *"
|
|---|
| 39 | print "* *"
|
|---|
| 40 | print "* Where 'URL' is the URL with an explicit package name or the URL followed by the truncated package name. And 'localFile' is *"
|
|---|
| 41 | print "* the file name (including extension) that you would like to save as. *"
|
|---|
| 42 | print "* *"
|
|---|
| 43 | print "* Examples: *"
|
|---|
| 44 | print "* *"
|
|---|
| 45 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/petsc-2.3.2-p3.tar.gz' 'petsc-2.3.2-p3.tar.gz' *"
|
|---|
| 46 | print "* *"
|
|---|
| 47 | print "* This is the old style and the safest way to download a package. *"
|
|---|
| 48 | print "* *"
|
|---|
| 49 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' 'libtool.tar.gz' *"
|
|---|
| 50 | print "* *"
|
|---|
| 51 | print "* This is the new style. For packages like 'Libtool', which we never expect to be using multiple versions, this will *"
|
|---|
| 52 | print "* download the most recent version and save it as the generic 'libtool.tar.gz'. *"
|
|---|
| 53 | print "* *"
|
|---|
| 54 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/gsl-1.' 'gsl-1.15.tar.gz' *"
|
|---|
| 55 | print "* *"
|
|---|
| 56 | print "* This is the new style. This is a demonstration of how this script can be used to disambiguate a package name if there *"
|
|---|
| 57 | print "* are more than once package matching 'gsl-'. *"
|
|---|
| 58 | print "* *"
|
|---|
| 59 | print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' *"
|
|---|
| 60 | print "* *"
|
|---|
| 61 | print "* This is the new style. This will download a package with 'libtool' as a prefix and save it as its canonical name. *"
|
|---|
| 62 | print "* *"
|
|---|
| 63 | print "* *"
|
|---|
| 64 | print "******************************************************************************************************************************"
|
|---|
| 65 |
|
|---|
| 66 | if len(sys.argv) > 2:
|
|---|
| 67 | localFile=sys.argv[2]
|
|---|
| 68 | print "Downloaded file will be saved as: " + localFile
|
|---|
| 69 | else:
|
|---|
| 70 | localFile = None
|
|---|
| 71 | print "Downloaded file will saved with the same file name."
|
|---|
| 72 |
|
|---|
| 73 |
|
|---|
| 74 | print "Looking for: " + find
|
|---|
| 75 |
|
|---|
| 76 | # As an extra precaution, if no extension is given for a particular package
|
|---|
| 77 | # such as '.../libtool', then ensure that files found are of appropriate
|
|---|
| 78 | # file extensions.
|
|---|
| 79 | #
|
|---|
| 80 | # WARNING: The external packages directory includes executable binaries with
|
|---|
| 81 | # '.exe' extensions. As such, '.exe' is an acceptable suffix, but this is
|
|---|
| 82 | # inherently dangerous since this script can be used to download from any
|
|---|
| 83 | # valid website. Furthermore, if an individual attempts a "man-in-the-middle"
|
|---|
| 84 | # attack, then the user would be capable of downloading executables from
|
|---|
| 85 | # an untrusted source.
|
|---|
| 86 | pattern = find + "[\w.-]*(\.tar\.gz|tar\.gz2|tgz|zip|exe)?"
|
|---|
| 87 | parser = MyHTMLParser(pattern)
|
|---|
| 88 |
|
|---|
| 89 | # Creates a 'FancyURL' which allows the script to fail gracefully by catching
|
|---|
| 90 | # HTTP error codes 30X and several 40X(where 'X' is a natural number).
|
|---|
| 91 | urlObject = FancyURLopener()
|
|---|
| 92 | obj = urlObject.open(url)
|
|---|
| 93 | parser.feed(obj.read())
|
|---|
| 94 |
|
|---|
| 95 | # If a file pattern was used to describe the file that should be downloaded,
|
|---|
| 96 | # then there is the potential for multiple file matches. Currently, the script
|
|---|
| 97 | # will detect this ambiguity and print out all the matches, while informing
|
|---|
| 98 | # the user that he must refine his search.
|
|---|
| 99 | #
|
|---|
| 100 | # TODO: Prompt the user to select from a list his/her preferred target.
|
|---|
| 101 | if len(parser.targets) > 1:
|
|---|
| 102 | print "Could not resolve your download due to the number of hits."
|
|---|
| 103 | print "Refine your search."
|
|---|
| 104 | for i in parser.targets:
|
|---|
| 105 | print i
|
|---|
| 106 |
|
|---|
| 107 | elif len(parser.targets) == 1:
|
|---|
| 108 | print "Found: " + parser.targets[0]
|
|---|
| 109 | url += "/" + parser.targets[0]
|
|---|
| 110 |
|
|---|
| 111 | if localFile is None:
|
|---|
| 112 | if os.path.exists(parser.targets[0]):
|
|---|
| 113 | print "File " + parser.targets[0] + " already exists and will not be downloaded..."
|
|---|
| 114 | else:
|
|---|
| 115 | urllib.urlretrieve(url, parser.targets[0])
|
|---|
| 116 | print "File saved as: " + parser.targets[0]
|
|---|
| 117 | else:
|
|---|
| 118 | if os.path.exists(localFile):
|
|---|
| 119 | print "File "+ localFile +" already exists and will not be downloaded..."
|
|---|
| 120 | else:
|
|---|
| 121 | if parser.targets[0] == localFile:
|
|---|
| 122 | print "File found and destination match."
|
|---|
| 123 | elif parser.matcher.match(localFile) != "None":
|
|---|
| 124 | print "File found matches destination pattern."
|
|---|
| 125 | else:
|
|---|
| 126 | print "WARNING: the file found \'" + parser.targets[0] + "\' does not match \'" + localFile + "\'"
|
|---|
| 127 | print "Ensure the downloaded version is suitable."
|
|---|
| 128 |
|
|---|
| 129 | urllib.urlretrieve(url, localFile)
|
|---|
| 130 | print "File saved as: " + localFile
|
|---|
| 131 |
|
|---|
| 132 | else:
|
|---|
| 133 | print "No matches found!"
|
|---|
| 134 |
|
|---|
| 135 | obj.close()
|
|---|
| 136 | # End 'main' function. }}}
|
|---|
| 137 |
|
|---|
| 138 | if __name__ == "__main__":
|
|---|
| 139 | main()
|
|---|