Changeset 14213
- Timestamp:
- 01/07/13 16:44:27 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified issm/trunk-jpl/scripts/DownloadExternalPackage.py ¶
r12881 r14213 6 6 # 7 7 8 # imports {{{ 8 9 import os,sys,re 9 10 import urllib 10 11 from HTMLParser import HTMLParser 11 12 from urllib import FancyURLopener 12 13 # Start class myHTMLParser 14 class MyHTMLParser(HTMLParser): 13 # }}} 14 class MyHTMLParser(HTMLParser): #{{{ 15 15 16 16 def __init__(self, pattern): … … 23 23 if "href" == i[0] and str(self.matcher.match(i[1])) != "None": 24 24 self.targets.append(i[1]) 25 # End class myHTMLParser 25 #}}} 26 def main(argv=None): # {{{ 27 # Separates the URL into a directory and the file or pattern based on the 28 # last appearance of '/'. 29 if len(sys.argv) > 1: 30 pivot = sys.argv[1].rfind("/") 31 url = (sys.argv[1])[:pivot] 32 pivot += 1 33 find = (sys.argv[1])[pivot:] 34 else: 35 print "******************************************************************************************************************************" 36 print "* Invalid input! *" 37 print "* *" 38 print "* Try: 'DownloadExternalPackage.py url [localFile]' *" 39 print "* *" 40 print "* Where 'URL' is the URL with an explicit package name or the URL followed by the truncated package name. And 'localFile' is *" 41 print "* the file name (including extension) that you would like to save as. *" 42 print "* *" 43 print "* Examples: *" 44 print "* *" 45 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/petsc-2.3.2-p3.tar.gz' 'petsc-2.3.2-p3.tar.gz' *" 46 print "* *" 47 print "* This is the old style and the safest way to download a package. *" 48 print "* *" 49 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' 'libtool.tar.gz' *" 50 print "* *" 51 print "* This is the new style. For packages like 'Libtool', which we never expect to be using multiple versions, this will *" 52 print "* download the most recent version and save it as the generic 'libtool.tar.gz'. *" 53 print "* *" 54 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/gsl-1.' 'gsl-1.15.tar.gz' *" 55 print "* *" 56 print "* This is the new style. This is a demonstration of how this script can be used to disambiguate a package name if there *" 57 print "* are more than once package matching 'gsl-'. *" 58 print "* *" 59 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' *" 60 print "* *" 61 print "* This is the new style. This will download a package with 'libtool' as a prefix and save it as its canonical name. *" 62 print "* *" 63 print "* *" 64 print "******************************************************************************************************************************" 65 66 if len(sys.argv) > 2: 67 localFile=sys.argv[2] 68 print "Downloaded file will be saved as: " + localFile 69 else: 70 localFile = None 71 print "Downloaded file will saved with the same file name." 72 73 74 print "Looking for: " + find 75 76 # As an extra precaution, if no extension is given for a particular package 77 # such as '.../libtool', then ensure that files found are of appropriate 78 # file extensions. 79 # 80 # WARNING: The external packages directory includes executable binaries with 81 # '.exe' extensions. As such, '.exe' is an acceptable suffix, but this is 82 # inherently dangerous since this script can be used to download from any 83 # valid website. Furthermore, if an individual attempts a "man-in-the-middle" 84 # attack, then the user would be capable of downloading executables from 85 # an untrusted source. 86 pattern = find + "[\w.-]*(\.tar\.gz|tar\.gz2|tgz|zip|exe)?" 87 parser = MyHTMLParser(pattern) 88 89 # Creates a 'FancyURL' which allows the script to fail gracefully by catching 90 # HTTP error codes 30X and several 40X(where 'X' is a natural number). 91 urlObject = FancyURLopener() 92 obj = urlObject.open(url) 93 parser.feed(obj.read()) 94 95 # If a file pattern was used to describe the file that should be downloaded, 96 # then there is the potential for multiple file matches. Currently, the script 97 # will detect this ambiguity and print out all the matches, while informing 98 # the user that he must refine his search. 99 # 100 # TODO: Prompt the user to select from a list his/her preferred target. 101 if len(parser.targets) > 1: 102 print "Could not resolve your download due to the number of hits." 103 print "Refine your search." 104 for i in parser.targets: 105 print i 106 107 elif len(parser.targets) == 1: 108 print "Found: " + parser.targets[0] 109 url += "/" + parser.targets[0] 110 111 if localFile is None: 112 if os.path.exists(parser.targets[0]): 113 print "File " + parser.targets[0] + " already exists and will not be downloaded..." 114 else: 115 urllib.urlretrieve(url, parser.targets[0]) 116 print "File saved as: " + parser.targets[0] 117 else: 118 if os.path.exists(localFile): 119 print "File "+ localFile +" already exists and will not be downloaded..." 120 else: 121 if parser.targets[0] == localFile: 122 print "File found and destination match." 123 elif parser.matcher.match(localFile) != "None": 124 print "File found matches destination pattern." 125 else: 126 print "WARNING: the file found \'" + parser.targets[0] + "\' does not match \'" + localFile + "\'" 127 print "Ensure the downloaded version is suitable." 128 129 urllib.urlretrieve(url, localFile) 130 print "File saved as: " + localFile 131 132 else: 133 print "No matches found!" 134 135 obj.close() 136 # End 'main' function. }}} 26 137 27 # Separates the URL into a directory and the file or pattern based on the 28 # last appearance of '/'. 29 if len(sys.argv) > 1: 30 pivot = sys.argv[1].rfind("/") 31 url = (sys.argv[1])[:pivot] 32 pivot += 1 33 find = (sys.argv[1])[pivot:] 34 else: 35 print "******************************************************************************************************************************" 36 print "* Invalid input! *" 37 print "* *" 38 print "* Try: 'DownloadExternalPackage.py url [localFile]' *" 39 print "* *" 40 print "* Where 'URL' is the URL with an explicit package name or the URL followed by the truncated package name. And 'localFile' is *" 41 print "* the file name (including extension) that you would like to save as. *" 42 print "* *" 43 print "* Examples: *" 44 print "* *" 45 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/petsc-2.3.2-p3.tar.gz' 'petsc-2.3.2-p3.tar.gz' *" 46 print "* *" 47 print "* This is the old style and the safest way to download a package. *" 48 print "* *" 49 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' 'libtool.tar.gz' *" 50 print "* *" 51 print "* This is the new style. For packages like 'Libtool', which we never expect to be using multiple versions, this will *" 52 print "* download the most recent version and save it as the generic 'libtool.tar.gz'. *" 53 print "* *" 54 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/gsl-1.' 'gsl-1.15.tar.gz' *" 55 print "* *" 56 print "* This is the new style. This is a demonstration of how this script can be used to disambiguate a package name if there *" 57 print "* are more than once package matching 'gsl-'. *" 58 print "* *" 59 print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' *" 60 print "* *" 61 print "* This is the new style. This will download a package with 'libtool' as a prefix and save it as its canonical name. *" 62 print "* *" 63 print "* *" 64 print "******************************************************************************************************************************" 65 66 if len(sys.argv) > 2: 67 localFile=sys.argv[2] 68 print "Downloaded file will saved as: " + localFile 69 else: 70 localFile = None 71 print "Downloaded file will saved with the same file name." 72 73 74 print "Looking for " + find 75 76 # As an extra precaution, if no extension is given for a particular package 77 # such as '.../libtool', then ensure that files found are of appropriate 78 # file extensions. 79 # 80 # WARNING: The external packages directory includes executable binaries with 81 # '.exe' extensions. As such, '.exe' is an acceptable suffix, but this is 82 # inherently dangerous since this script can be used to download from any 83 # valid website. Furthermore, if an individual attempts a "man-in-the-middle" 84 # attack, then the user would be capable of downloading executables from 85 # an untrusted source. 86 pattern = find + "[\w.-]*(\.tar\.gz|tar\.gz2|tgz|zip|exe)?" 87 parser = MyHTMLParser(pattern) 88 89 # Creates a 'FancyURL' which allows the script to fail gracefully by catching 90 # HTTP error codes 30X and several 40X(where 'X' is a natural number). 91 urlObject = FancyURLopener() 92 obj = urlObject.open(url) 93 parser.feed(obj.read()) 94 95 # If a file pattern was used to describe the file that should be downloaded, 96 # then there is the potential for multiple file matches. Currently, the script 97 # will detect this ambiguity and print out all the matches, while informing 98 # the user that he must refine his search. 99 # 100 # TODO: Prompt the user to select from a list his/her preferred target. 101 if len(parser.targets) > 1: 102 print "Could not resolve your download due to the number of hits." 103 print "Refine your search." 104 for i in parser.targets: 105 print i 106 107 elif len(parser.targets) == 1: 108 print "Found: " + parser.targets[0] 109 url += "/" + parser.targets[0] 110 111 if localFile is None: 112 if os.path.exists(parser.targets[0]): 113 print "File " + parser.targets[0] + " already exists and will not be downloaded..." 114 else: 115 urllib.urlretrieve(url, parser.targets[0]) 116 print "File saved as: " + parser.targets[0] 117 else: 118 if os.path.exists(localFile): 119 print "File "+ localFile +" already exists and will not be downloaded..." 120 else: 121 if parser.targets[0] == localFile: 122 print "File found and destination match." 123 elif parser.matcher.match(localFile) != "None": 124 print "File found matches destination pattern." 125 else: 126 print "WARNING: the file found \'" + parser.targets[0] + "\' does not match \'" + localFile + "\'" 127 print "Ensure the downloaded version is suitable." 128 129 urllib.urlretrieve(url, localFile) 130 print "File saved as: " + localFile 131 132 else: 133 print "No matches found!" 134 135 obj.close() 138 if __name__ == "__main__": 139 main()
Note:
See TracChangeset
for help on using the changeset viewer.