1#!/usr/bin/env python 2# 3# Scrape SDP UUIDs from Bluetooth SIG page 4# Copyright 2017 BlueKitchen GmbH 5# 6 7from lxml import html 8import datetime 9import requests 10import sys 11import os 12import codecs 13import re 14 15program_info = ''' 16BTstack SDP UUID Scraper for BTstack 17Copyright 2017, BlueKitchen GmbH 18''' 19 20header = ''' 21/** 22 * bluetooth_sdp.h generated from Bluetooth SIG website for BTstack 23 */ 24 25#ifndef __BLUETOOTH_SDP_H 26#define __BLUETOOTH_SDP_H 27''' 28 29page_info = ''' 30/** 31 * Assigned numbers from {page} 32 */ 33''' 34 35trailer = ''' 36#endif 37''' 38 39# Convert CamelCase to snake_case from http://stackoverflow.com/a/1176023 40def camel_to_underscore(name): 41 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) 42 return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).upper() 43 44def create_pretty_define(name): 45 name = name.replace(' - ', '_') 46 name = name.replace(' ', '_') 47 name = name.replace('/','') 48 name = name.replace('(','_') 49 name = name.replace(')','') 50 name = name.replace('-','_') 51 name = name.replace('PnP', 'PNP') 52 return camel_to_underscore(name).replace('__','_').replace('3_D','3D').replace('L2_CAP','L2CAP') 53 54def clean_remark(remark): 55 return " ".join(remark.split()) 56 57def process_table(fout, tbody, pattern): 58 rows = tbody.getchildren() 59 for row in rows: 60 columns = row.getchildren() 61 name = columns[0].text_content().encode('ascii','ignore') 62 value = columns[1].text_content().encode('ascii','ignore') 63 remark = columns[2].text_content().encode('ascii','ignore') 64 # skip tbody headers 65 if name == "Protocol Name": 66 continue 67 if name == "Service Class Name": 68 continue 69 # skip tbody footers 70 if value.startswith('(Max value '): 71 continue 72 name = create_pretty_define(name) 73 remark = clean_remark(remark) 74 fout.write(pattern % (name, value, remark)) 75 print("'%s' = '%s' -- %s" % (name, value, remark)) 76 fout.write('\n') 77 78def scrape_page(fout, url): 79 print("Parsing %s" % url) 80 81 fout.write(page_info.format(page=url)) 82 83 # get from web 84 # r = requests.get(url) 85 # content = r.text 86 87 # test: fetch from local file 'service-discovery.html' 88 f = codecs.open("service-discovery.html", "r", "utf-8") 89 content = f.read(); 90 91 tree = html.fromstring(content) 92 93 # Protocol Identifiers 94 fout.write('//\n') 95 fout.write('// Protocol Identifiers\n') 96 fout.write('//\n') 97 tables = tree.xpath("//table[preceding-sibling::h3 = 'Protocol Identifiers']") 98 tbody = tables[0].getchildren()[0] 99 process_table(fout, tbody, '#define BLUETOOTH_PROTOCOL_%-55s %s // %s\n') 100 101 # Service Classes 102 fout.write('//\n') 103 fout.write('// Service Classes\n') 104 fout.write('//\n') 105 tables = tree.xpath("//table[preceding-sibling::h3 = 'Protocol Identifiers']") 106 tbody = tables[1].getchildren()[0] 107 process_table(fout, tbody, '#define BLUEROOTH_SERVICE_CLASS_%-50s %s // %s\n') 108 109btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 110gen_path = btstack_root + '/src/bluetooth_sdp.h' 111 112print(program_info) 113 114with open(gen_path, 'wt') as fout: 115 fout.write(header.format(datetime=str(datetime.datetime.now()))) 116 scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/service-discovery') 117 fout.write(trailer) 118 119print('Scraping successful!\n')