1*d1935f69SMatthias Ringwald#!/usr/bin/env python 2*d1935f69SMatthias Ringwald# 3*d1935f69SMatthias Ringwald# Scrape SDP UUIDs from Bluetooth SIG page 4*d1935f69SMatthias Ringwald# Copyright 2017 BlueKitchen GmbH 5*d1935f69SMatthias Ringwald# 6*d1935f69SMatthias Ringwald 7*d1935f69SMatthias Ringwaldfrom lxml import html 8*d1935f69SMatthias Ringwaldimport datetime 9*d1935f69SMatthias Ringwaldimport requests 10*d1935f69SMatthias Ringwaldimport sys 11*d1935f69SMatthias Ringwaldimport os 12*d1935f69SMatthias Ringwaldimport codecs 13*d1935f69SMatthias Ringwaldimport re 14*d1935f69SMatthias Ringwald 15*d1935f69SMatthias Ringwaldprogram_info = ''' 16*d1935f69SMatthias RingwaldBTstack SDP UUID Scraper for BTstack 17*d1935f69SMatthias RingwaldCopyright 2017, BlueKitchen GmbH 18*d1935f69SMatthias Ringwald''' 19*d1935f69SMatthias Ringwald 20*d1935f69SMatthias Ringwaldheader = ''' 21*d1935f69SMatthias Ringwald/** 22*d1935f69SMatthias Ringwald * bluetooth_sdp.h generated from Bluetooth SIG website for BTstack 23*d1935f69SMatthias Ringwald */ 24*d1935f69SMatthias Ringwald 25*d1935f69SMatthias Ringwald#ifndef __BLUETOOTH_SDP_H 26*d1935f69SMatthias Ringwald#define __BLUETOOTH_SDP_H 27*d1935f69SMatthias Ringwald''' 28*d1935f69SMatthias Ringwald 29*d1935f69SMatthias Ringwaldpage_info = ''' 30*d1935f69SMatthias Ringwald/** 31*d1935f69SMatthias Ringwald * Assigned numbers from {page} 32*d1935f69SMatthias Ringwald */ 33*d1935f69SMatthias Ringwald''' 34*d1935f69SMatthias Ringwald 35*d1935f69SMatthias Ringwaldtrailer = ''' 36*d1935f69SMatthias Ringwald#endif 37*d1935f69SMatthias Ringwald''' 38*d1935f69SMatthias Ringwald 39*d1935f69SMatthias Ringwald# Convert CamelCase to snake_case from http://stackoverflow.com/a/1176023 40*d1935f69SMatthias Ringwalddef camel_to_underscore(name): 41*d1935f69SMatthias Ringwald s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) 42*d1935f69SMatthias Ringwald return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).upper() 43*d1935f69SMatthias Ringwald 44*d1935f69SMatthias Ringwalddef create_pretty_define(name): 45*d1935f69SMatthias Ringwald name = name.replace(' - ', '_') 46*d1935f69SMatthias Ringwald name = name.replace(' ', '_') 47*d1935f69SMatthias Ringwald name = name.replace('/','') 48*d1935f69SMatthias Ringwald name = name.replace('(','_') 49*d1935f69SMatthias Ringwald name = name.replace(')','') 50*d1935f69SMatthias Ringwald name = name.replace('-','_') 51*d1935f69SMatthias Ringwald name = name.replace('PnP', 'PNP') 52*d1935f69SMatthias Ringwald return camel_to_underscore(name).replace('__','_').replace('3_D','3D').replace('L2_CAP','L2CAP') 53*d1935f69SMatthias Ringwald 54*d1935f69SMatthias Ringwalddef clean_remark(remark): 55*d1935f69SMatthias Ringwald return " ".join(remark.split()) 56*d1935f69SMatthias Ringwald 57*d1935f69SMatthias Ringwalddef process_table(fout, table, pattern): 58*d1935f69SMatthias Ringwald rows = table.getchildren() 59*d1935f69SMatthias Ringwald for row in rows: 60*d1935f69SMatthias Ringwald columns = row.getchildren() 61*d1935f69SMatthias Ringwald name = columns[0].text_content().encode('ascii','ignore') 62*d1935f69SMatthias Ringwald value = columns[1].text_content().encode('ascii','ignore') 63*d1935f69SMatthias Ringwald remark = columns[2].text_content().encode('ascii','ignore') 64*d1935f69SMatthias Ringwald # skip table headers 65*d1935f69SMatthias Ringwald if name == "Protocol Name": 66*d1935f69SMatthias Ringwald continue 67*d1935f69SMatthias Ringwald if name == "Service Class Name": 68*d1935f69SMatthias Ringwald continue 69*d1935f69SMatthias Ringwald # skip table footers 70*d1935f69SMatthias Ringwald if value.startswith('(Max value '): 71*d1935f69SMatthias Ringwald continue 72*d1935f69SMatthias Ringwald name = create_pretty_define(name) 73*d1935f69SMatthias Ringwald remark = clean_remark(remark) 74*d1935f69SMatthias Ringwald fout.write(pattern % (name, value, remark)) 75*d1935f69SMatthias Ringwald # print("'%s' = '%s' -- %s" % (name, value, remark)) 76*d1935f69SMatthias Ringwald fout.write('\n') 77*d1935f69SMatthias Ringwald 78*d1935f69SMatthias Ringwalddef scrape_page(fout, url): 79*d1935f69SMatthias Ringwald print("Parsing %s" % url) 80*d1935f69SMatthias Ringwald 81*d1935f69SMatthias Ringwald fout.write(page_info.format(page=url)) 82*d1935f69SMatthias Ringwald 83*d1935f69SMatthias Ringwald # get from web 84*d1935f69SMatthias Ringwald # r = requests.get(url) 85*d1935f69SMatthias Ringwald # content = r.text 86*d1935f69SMatthias Ringwald # test: fetch from local file 'service-discovery.html' 87*d1935f69SMatthias Ringwald f = codecs.open("service-discovery.html", "r", "utf-8") 88*d1935f69SMatthias Ringwald content = f.read(); 89*d1935f69SMatthias Ringwald 90*d1935f69SMatthias Ringwald tree = html.fromstring(content) 91*d1935f69SMatthias Ringwald 92*d1935f69SMatthias Ringwald # process tables 93*d1935f69SMatthias Ringwald tables = tree.xpath('//table/tbody') 94*d1935f69SMatthias Ringwald index = 0 95*d1935f69SMatthias Ringwald for table in tables: 96*d1935f69SMatthias Ringwald # table_name = table_names[index] 97*d1935f69SMatthias Ringwald index = index + 1 98*d1935f69SMatthias Ringwald 99*d1935f69SMatthias Ringwald # 2 - Protocol Identifiers 100*d1935f69SMatthias Ringwald if index == 2: 101*d1935f69SMatthias Ringwald fout.write('//\n') 102*d1935f69SMatthias Ringwald fout.write('// Protocol Identifiers\n') 103*d1935f69SMatthias Ringwald fout.write('//\n') 104*d1935f69SMatthias Ringwald process_table(fout, table, '#define BLUETOOTH_PROTOCOL_%-55s %s // %s\n') 105*d1935f69SMatthias Ringwald 106*d1935f69SMatthias Ringwald # 3 - Service Classes 107*d1935f69SMatthias Ringwald if index == 3: 108*d1935f69SMatthias Ringwald fout.write('//\n') 109*d1935f69SMatthias Ringwald fout.write('// Service Classes\n') 110*d1935f69SMatthias Ringwald fout.write('//\n') 111*d1935f69SMatthias Ringwald process_table(fout, table, '#define BLUEROOTH_SERVICE_CLASS_%-50s %s // %s\n') 112*d1935f69SMatthias Ringwald 113*d1935f69SMatthias Ringwaldbtstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 114*d1935f69SMatthias Ringwaldgen_path = btstack_root + '/src/bluetooth_sdp.h' 115*d1935f69SMatthias Ringwald 116*d1935f69SMatthias Ringwaldprint(program_info) 117*d1935f69SMatthias Ringwald 118*d1935f69SMatthias Ringwaldwith open(gen_path, 'wt') as fout: 119*d1935f69SMatthias Ringwald fout.write(header.format(datetime=str(datetime.datetime.now()))) 120*d1935f69SMatthias Ringwald scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/service-discovery') 121*d1935f69SMatthias Ringwald fout.write(trailer) 122*d1935f69SMatthias Ringwald 123*d1935f69SMatthias Ringwaldprint('Scraping successful!\n')