1*fb1b10abSAndroid Build Coastguard Worker# Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2*fb1b10abSAndroid Build Coastguard Worker# 3*fb1b10abSAndroid Build Coastguard Worker# Use of this source code is governed by a BSD-style license 4*fb1b10abSAndroid Build Coastguard Worker# that can be found in the LICENSE file in the root of the source 5*fb1b10abSAndroid Build Coastguard Worker# tree. An additional intellectual property rights grant can be found 6*fb1b10abSAndroid Build Coastguard Worker# in the file PATENTS. All contributing project authors may 7*fb1b10abSAndroid Build Coastguard Worker# be found in the AUTHORS file in the root of the source tree. 8*fb1b10abSAndroid Build Coastguard Worker# 9*fb1b10abSAndroid Build Coastguard Worker# This simple script pulls test files from the webm homepage 10*fb1b10abSAndroid Build Coastguard Worker# It is intelligent enough to only pull files if 11*fb1b10abSAndroid Build Coastguard Worker# 1) File / test_data folder does not exist 12*fb1b10abSAndroid Build Coastguard Worker# 2) SHA mismatch 13*fb1b10abSAndroid Build Coastguard Worker 14*fb1b10abSAndroid Build Coastguard Workerimport pycurl 15*fb1b10abSAndroid Build Coastguard Workerimport csv 16*fb1b10abSAndroid Build Coastguard Workerimport hashlib 17*fb1b10abSAndroid Build Coastguard Workerimport re 18*fb1b10abSAndroid Build Coastguard Workerimport os.path 19*fb1b10abSAndroid Build Coastguard Workerimport time 20*fb1b10abSAndroid Build Coastguard Workerimport itertools 21*fb1b10abSAndroid Build Coastguard Workerimport sys 22*fb1b10abSAndroid Build Coastguard Workerimport getopt 23*fb1b10abSAndroid Build Coastguard Worker 24*fb1b10abSAndroid Build Coastguard Worker#globals 25*fb1b10abSAndroid Build Coastguard Workerurl = '' 26*fb1b10abSAndroid Build Coastguard Workerfile_list_path = '' 27*fb1b10abSAndroid Build Coastguard Workerlocal_resource_path = '' 28*fb1b10abSAndroid Build Coastguard Worker 29*fb1b10abSAndroid Build Coastguard Worker# Helper functions: 30*fb1b10abSAndroid Build Coastguard Worker# A simple function which returns the sha hash of a file in hex 31*fb1b10abSAndroid Build Coastguard Workerdef get_file_sha(filename): 32*fb1b10abSAndroid Build Coastguard Worker try: 33*fb1b10abSAndroid Build Coastguard Worker sha_hash = hashlib.sha1() 34*fb1b10abSAndroid Build Coastguard Worker with open(filename, 'rb') as file: 35*fb1b10abSAndroid Build Coastguard Worker buf = file.read(HASH_CHUNK) 36*fb1b10abSAndroid Build Coastguard Worker while len(buf) > 0: 37*fb1b10abSAndroid Build Coastguard Worker sha_hash.update(buf) 38*fb1b10abSAndroid Build Coastguard Worker buf = file.read(HASH_CHUNK) 39*fb1b10abSAndroid Build Coastguard Worker return sha_hash.hexdigest() 40*fb1b10abSAndroid Build Coastguard Worker except IOError: 41*fb1b10abSAndroid Build Coastguard Worker print("Error reading " + filename) 42*fb1b10abSAndroid Build Coastguard Worker 43*fb1b10abSAndroid Build Coastguard Worker# Downloads a file from a url, and then checks the sha against the passed 44*fb1b10abSAndroid Build Coastguard Worker# in sha 45*fb1b10abSAndroid Build Coastguard Workerdef download_and_check_sha(url, filename, sha): 46*fb1b10abSAndroid Build Coastguard Worker path = os.path.join(local_resource_path, filename) 47*fb1b10abSAndroid Build Coastguard Worker fp = open(path, "wb") 48*fb1b10abSAndroid Build Coastguard Worker curl = pycurl.Curl() 49*fb1b10abSAndroid Build Coastguard Worker curl.setopt(pycurl.URL, url + "/" + filename) 50*fb1b10abSAndroid Build Coastguard Worker curl.setopt(pycurl.WRITEDATA, fp) 51*fb1b10abSAndroid Build Coastguard Worker curl.perform() 52*fb1b10abSAndroid Build Coastguard Worker curl.close() 53*fb1b10abSAndroid Build Coastguard Worker fp.close() 54*fb1b10abSAndroid Build Coastguard Worker return get_file_sha(path) == sha 55*fb1b10abSAndroid Build Coastguard Worker 56*fb1b10abSAndroid Build Coastguard Worker#constants 57*fb1b10abSAndroid Build Coastguard Workerftp_retries = 3 58*fb1b10abSAndroid Build Coastguard Worker 59*fb1b10abSAndroid Build Coastguard WorkerSHA_COL = 0 60*fb1b10abSAndroid Build Coastguard WorkerNAME_COL = 1 61*fb1b10abSAndroid Build Coastguard WorkerEXPECTED_COL = 2 62*fb1b10abSAndroid Build Coastguard WorkerHASH_CHUNK = 65536 63*fb1b10abSAndroid Build Coastguard Worker 64*fb1b10abSAndroid Build Coastguard Worker# Main script 65*fb1b10abSAndroid Build Coastguard Workertry: 66*fb1b10abSAndroid Build Coastguard Worker opts, args = \ 67*fb1b10abSAndroid Build Coastguard Worker getopt.getopt(sys.argv[1:], \ 68*fb1b10abSAndroid Build Coastguard Worker "u:i:o:", ["url=", "input_csv=", "output_dir="]) 69*fb1b10abSAndroid Build Coastguard Workerexcept: 70*fb1b10abSAndroid Build Coastguard Worker print('get_files.py -u <url> -i <input_csv> -o <output_dir>') 71*fb1b10abSAndroid Build Coastguard Worker sys.exit(2) 72*fb1b10abSAndroid Build Coastguard Worker 73*fb1b10abSAndroid Build Coastguard Workerfor opt, arg in opts: 74*fb1b10abSAndroid Build Coastguard Worker if opt == '-u': 75*fb1b10abSAndroid Build Coastguard Worker url = arg 76*fb1b10abSAndroid Build Coastguard Worker elif opt in ("-i", "--input_csv"): 77*fb1b10abSAndroid Build Coastguard Worker file_list_path = os.path.join(arg) 78*fb1b10abSAndroid Build Coastguard Worker elif opt in ("-o", "--output_dir"): 79*fb1b10abSAndroid Build Coastguard Worker local_resource_path = os.path.join(arg) 80*fb1b10abSAndroid Build Coastguard Worker 81*fb1b10abSAndroid Build Coastguard Workerif len(sys.argv) != 7: 82*fb1b10abSAndroid Build Coastguard Worker print("Expects two paths and a url!") 83*fb1b10abSAndroid Build Coastguard Worker exit(1) 84*fb1b10abSAndroid Build Coastguard Worker 85*fb1b10abSAndroid Build Coastguard Workerif not os.path.isdir(local_resource_path): 86*fb1b10abSAndroid Build Coastguard Worker os.makedirs(local_resource_path) 87*fb1b10abSAndroid Build Coastguard Worker 88*fb1b10abSAndroid Build Coastguard Workerfile_list_csv = open(file_list_path, "rb") 89*fb1b10abSAndroid Build Coastguard Worker 90*fb1b10abSAndroid Build Coastguard Worker# Our 'csv' file uses multiple spaces as a delimiter, python's 91*fb1b10abSAndroid Build Coastguard Worker# csv class only uses single character delimiters, so we convert them below 92*fb1b10abSAndroid Build Coastguard Workerfile_list_reader = csv.reader((re.sub(' +', ' ', line.decode('utf-8')) \ 93*fb1b10abSAndroid Build Coastguard Worker for line in file_list_csv), delimiter = ' ') 94*fb1b10abSAndroid Build Coastguard Worker 95*fb1b10abSAndroid Build Coastguard Workerfile_shas = [] 96*fb1b10abSAndroid Build Coastguard Workerfile_names = [] 97*fb1b10abSAndroid Build Coastguard Worker 98*fb1b10abSAndroid Build Coastguard Workerfor row in file_list_reader: 99*fb1b10abSAndroid Build Coastguard Worker if len(row) != EXPECTED_COL: 100*fb1b10abSAndroid Build Coastguard Worker continue 101*fb1b10abSAndroid Build Coastguard Worker file_shas.append(row[SHA_COL]) 102*fb1b10abSAndroid Build Coastguard Worker file_names.append(row[NAME_COL]) 103*fb1b10abSAndroid Build Coastguard Worker 104*fb1b10abSAndroid Build Coastguard Workerfile_list_csv.close() 105*fb1b10abSAndroid Build Coastguard Worker 106*fb1b10abSAndroid Build Coastguard Worker# Download files, only if they don't already exist and have correct shas 107*fb1b10abSAndroid Build Coastguard Workerfor filename, sha in zip(file_names, file_shas): 108*fb1b10abSAndroid Build Coastguard Worker filename = filename.lstrip('*') 109*fb1b10abSAndroid Build Coastguard Worker path = os.path.join(local_resource_path, filename) 110*fb1b10abSAndroid Build Coastguard Worker if os.path.isfile(path) \ 111*fb1b10abSAndroid Build Coastguard Worker and get_file_sha(path) == sha: 112*fb1b10abSAndroid Build Coastguard Worker print(path + ' exists, skipping') 113*fb1b10abSAndroid Build Coastguard Worker continue 114*fb1b10abSAndroid Build Coastguard Worker for retry in range(0, ftp_retries): 115*fb1b10abSAndroid Build Coastguard Worker print("Downloading " + path) 116*fb1b10abSAndroid Build Coastguard Worker if not download_and_check_sha(url, filename, sha): 117*fb1b10abSAndroid Build Coastguard Worker print("Sha does not match, retrying...") 118*fb1b10abSAndroid Build Coastguard Worker else: 119*fb1b10abSAndroid Build Coastguard Worker break 120