1#!/usr/bin/env python 2# 3# Scrape SDP UUIDs from Bluetooth SIG page 4# Copyright 2017 BlueKitchen GmbH 5# 6 7from lxml import html 8import datetime 9import requests 10import sys 11import os 12import codecs 13import re 14 15program_info = ''' 16BTstack SDP UUID Scraper for BTstack 17Copyright 2017, BlueKitchen GmbH 18''' 19 20header = ''' 21/** 22 * bluetooth_sdp.h generated from Bluetooth SIG website for BTstack 23 */ 24 25#ifndef __BLUETOOTH_SDP_H 26#define __BLUETOOTH_SDP_H 27''' 28 29page_info = ''' 30/** 31 * Assigned numbers from {page} 32 */ 33''' 34 35trailer = ''' 36#endif 37''' 38 39# Convert CamelCase to snake_case from http://stackoverflow.com/a/1176023 40def camel_to_underscore(name): 41 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) 42 return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).upper() 43 44def create_pretty_define(name): 45 name = name.replace(' - ', '_') 46 name = name.replace(' ', '_') 47 name = name.replace('/','') 48 name = name.replace('(','_') 49 name = name.replace(')','') 50 name = name.replace('-','_') 51 name = name.replace('PnP', 'PNP') 52 return camel_to_underscore(name).replace('__','_').replace('3_D','3D').replace('L2_CAP','L2CAP') 53 54def clean_remark(remark): 55 return " ".join(remark.split()) 56 57def process_table(fout, table, pattern): 58 rows = table.getchildren() 59 for row in rows: 60 columns = row.getchildren() 61 name = columns[0].text_content().encode('ascii','ignore') 62 value = columns[1].text_content().encode('ascii','ignore') 63 remark = columns[2].text_content().encode('ascii','ignore') 64 # skip table headers 65 if name == "Protocol Name": 66 continue 67 if name == "Service Class Name": 68 continue 69 # skip table footers 70 if value.startswith('(Max value '): 71 continue 72 name = create_pretty_define(name) 73 remark = clean_remark(remark) 74 fout.write(pattern % (name, value, remark)) 75 # print("'%s' = '%s' -- %s" % (name, value, remark)) 76 fout.write('\n') 77 78def scrape_page(fout, url): 79 print("Parsing %s" % url) 80 81 fout.write(page_info.format(page=url)) 82 83 # get from web 84 # r = requests.get(url) 85 # content = r.text 86 # test: fetch from local file 'service-discovery.html' 87 f = codecs.open("service-discovery.html", "r", "utf-8") 88 content = f.read(); 89 90 tree = html.fromstring(content) 91 92 # process tables 93 tables = tree.xpath('//table/tbody') 94 index = 0 95 for table in tables: 96 # table_name = table_names[index] 97 index = index + 1 98 99 # 2 - Protocol Identifiers 100 if index == 2: 101 fout.write('//\n') 102 fout.write('// Protocol Identifiers\n') 103 fout.write('//\n') 104 process_table(fout, table, '#define BLUETOOTH_PROTOCOL_%-55s %s // %s\n') 105 106 # 3 - Service Classes 107 if index == 3: 108 fout.write('//\n') 109 fout.write('// Service Classes\n') 110 fout.write('//\n') 111 process_table(fout, table, '#define BLUEROOTH_SERVICE_CLASS_%-50s %s // %s\n') 112 113btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 114gen_path = btstack_root + '/src/bluetooth_sdp.h' 115 116print(program_info) 117 118with open(gen_path, 'wt') as fout: 119 fout.write(header.format(datetime=str(datetime.datetime.now()))) 120 scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/service-discovery') 121 fout.write(trailer) 122 123print('Scraping successful!\n')