1779b256eSMatthias Ringwald#!/usr/bin/env python 2779b256eSMatthias Ringwald# 3779b256eSMatthias Ringwald# Scrape GATT UUIDs from Bluetooth SIG page 4779b256eSMatthias Ringwald# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers 5779b256eSMatthias Ringwald# 6779b256eSMatthias Ringwald# Copyright 2017 BlueKitchen GmbH 7779b256eSMatthias Ringwald# 8779b256eSMatthias Ringwald 9779b256eSMatthias Ringwaldfrom lxml import html 10779b256eSMatthias Ringwaldimport datetime 11779b256eSMatthias Ringwaldimport requests 12779b256eSMatthias Ringwaldimport sys 13779b256eSMatthias Ringwaldimport codecs 14779b256eSMatthias Ringwaldimport os 15779b256eSMatthias Ringwaldimport re 16779b256eSMatthias Ringwald 17ff7cea0bSMatthias Ringwaldheaders = {'user-agent': 'curl/7.63.0'} 18ff7cea0bSMatthias Ringwald 19779b256eSMatthias Ringwaldprogram_info = ''' 20779b256eSMatthias RingwaldBTstack Company ID Scraper for BTstack 21779b256eSMatthias RingwaldCopyright 2017, BlueKitchen GmbH 22779b256eSMatthias Ringwald''' 23779b256eSMatthias Ringwald 24779b256eSMatthias Ringwaldheader = ''' 25779b256eSMatthias Ringwald/** 26b436f177SMatthias Ringwald * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_company_id.py 27b436f177SMatthias Ringwald * {datetime} 28779b256eSMatthias Ringwald */ 29779b256eSMatthias Ringwald 30*80e33422SMatthias Ringwald#ifndef BLUETOOTH_COMPANY_ID_H 31*80e33422SMatthias Ringwald#define BLUETOOTH_COMPANY_ID_H 32779b256eSMatthias Ringwald''' 33779b256eSMatthias Ringwald 34779b256eSMatthias Ringwaldpage_info = ''' 35779b256eSMatthias Ringwald/** 36779b256eSMatthias Ringwald * Assigned numbers from {page} 37779b256eSMatthias Ringwald */ 38779b256eSMatthias Ringwald''' 39779b256eSMatthias Ringwald 40779b256eSMatthias Ringwaldtrailer = ''' 41779b256eSMatthias Ringwald#endif 42779b256eSMatthias Ringwald''' 43779b256eSMatthias Ringwald 44779b256eSMatthias Ringwaldtags = [] 45779b256eSMatthias Ringwald 46ff7cea0bSMatthias Ringwalddef strip_non_ascii(string): 47ff7cea0bSMatthias Ringwald stripped = (c for c in string if 0 < ord(c) < 127) 48ff7cea0bSMatthias Ringwald return ''.join(stripped) 49ff7cea0bSMatthias Ringwald 50779b256eSMatthias Ringwalddef create_name(company): 51ff7cea0bSMatthias Ringwald # limit to ascii 52ff7cea0bSMatthias Ringwald company = strip_non_ascii(company) 53779b256eSMatthias Ringwald # remove parts in braces 54779b256eSMatthias Ringwald p = re.compile('\(.*\)') 55779b256eSMatthias Ringwald tag = p.sub('',company).rstrip().upper() 56779b256eSMatthias Ringwald tag = tag.replace('&',' AND ') 57779b256eSMatthias Ringwald tag = tag.replace(''','') 58779b256eSMatthias Ringwald tag = tag.replace('"',' ') 59779b256eSMatthias Ringwald tag = tag.replace('+',' AND ') 60779b256eSMatthias Ringwald tag = tag.replace(' - ', ' ') 61779b256eSMatthias Ringwald tag = tag.replace('/', ' ') 62779b256eSMatthias Ringwald tag = tag.replace(';',' ') 63779b256eSMatthias Ringwald tag = tag.replace(',','') 64779b256eSMatthias Ringwald tag = tag.replace('.', '') 65779b256eSMatthias Ringwald tag = tag.replace('-','_') 66779b256eSMatthias Ringwald tag = tag.replace(' ',' ') 67779b256eSMatthias Ringwald tag = tag.replace(' ',' ') 68779b256eSMatthias Ringwald tag = tag.replace(' ',' ') 69779b256eSMatthias Ringwald tag = tag.replace(' ','_') 70ff7cea0bSMatthias Ringwald tag = tag.replace('&','AND') 71ff7cea0bSMatthias Ringwald tag = tag.replace("'","_") 72ff7cea0bSMatthias Ringwald tag = tag.replace('"','_') 73ff7cea0bSMatthias Ringwald tag = tag.replace('!','_') 74779b256eSMatthias Ringwald return "BLUETOOTH_COMPANY_ID_" + tag 75779b256eSMatthias Ringwald 76779b256eSMatthias Ringwalddef scrape_page(fout, url): 77ff7cea0bSMatthias Ringwald global headers 78ff7cea0bSMatthias Ringwald 79779b256eSMatthias Ringwald print("Parsing %s" % url) 80779b256eSMatthias Ringwald fout.write(page_info.format(page=url)) 81779b256eSMatthias Ringwald 82779b256eSMatthias Ringwald # get from web 8344dacee2SMatthias Ringwald r = requests.get(url, headers=headers) 8444dacee2SMatthias Ringwald content = r.text 85779b256eSMatthias Ringwald 86779b256eSMatthias Ringwald # test: fetch from local file 'service-discovery.html' 8744dacee2SMatthias Ringwald # f = codecs.open("company-identifiers.html", "r", "utf-8") 8844dacee2SMatthias Ringwald # content = f.read(); 89779b256eSMatthias Ringwald 90779b256eSMatthias Ringwald tree = html.fromstring(content) 91ff7cea0bSMatthias Ringwald rows = tree.xpath('//table/tbody/tr') 92779b256eSMatthias Ringwald for row in rows: 93ff7cea0bSMatthias Ringwald children = row.getchildren() 94ff7cea0bSMatthias Ringwald id_hex = children[1].text_content() 95ff7cea0bSMatthias Ringwald company = create_name(children[2].text_content()) 96779b256eSMatthias Ringwald if company in tags: 97779b256eSMatthias Ringwald company = company+"2" 98779b256eSMatthias Ringwald else: 99779b256eSMatthias Ringwald tags.append(company) 100779b256eSMatthias Ringwald fout.write("#define %-80s %s\n" % (company, id_hex)) 101779b256eSMatthias Ringwald 102ff7cea0bSMatthias Ringwald # map CSR onto QTIL 103ff7cea0bSMatthias Ringwald fout.write("#define BLUETOOTH_COMPANY_ID_CAMBRIDGE_SILICON_RADIO BLUETOOTH_COMPANY_ID_QUALCOMM_TECHNOLOGIES_INTERNATIONAL_LTD\n") 104ff7cea0bSMatthias Ringwald 105779b256eSMatthias Ringwaldbtstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 106779b256eSMatthias Ringwaldgen_path = btstack_root + '/src/bluetooth_company_id.h' 107779b256eSMatthias Ringwald 108779b256eSMatthias Ringwaldprint(program_info) 109779b256eSMatthias Ringwald 110779b256eSMatthias Ringwaldwith open(gen_path, 'wt') as fout: 111779b256eSMatthias Ringwald fout.write(header.format(datetime=str(datetime.datetime.now()))) 112779b256eSMatthias Ringwald scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers') 113779b256eSMatthias Ringwald fout.write(trailer) 114779b256eSMatthias Ringwald 115779b256eSMatthias Ringwaldprint('Scraping successful!\n') 116