1*779b256eSMatthias Ringwald#!/usr/bin/env python 2*779b256eSMatthias Ringwald# 3*779b256eSMatthias Ringwald# Scrape GATT UUIDs from Bluetooth SIG page 4*779b256eSMatthias Ringwald# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers 5*779b256eSMatthias Ringwald# 6*779b256eSMatthias Ringwald# Copyright 2017 BlueKitchen GmbH 7*779b256eSMatthias Ringwald# 8*779b256eSMatthias Ringwald 9*779b256eSMatthias Ringwaldfrom lxml import html 10*779b256eSMatthias Ringwaldimport datetime 11*779b256eSMatthias Ringwaldimport requests 12*779b256eSMatthias Ringwaldimport sys 13*779b256eSMatthias Ringwaldimport codecs 14*779b256eSMatthias Ringwaldimport os 15*779b256eSMatthias Ringwaldimport re 16*779b256eSMatthias Ringwald 17*779b256eSMatthias Ringwaldprogram_info = ''' 18*779b256eSMatthias RingwaldBTstack Company ID Scraper for BTstack 19*779b256eSMatthias RingwaldCopyright 2017, BlueKitchen GmbH 20*779b256eSMatthias Ringwald''' 21*779b256eSMatthias Ringwald 22*779b256eSMatthias Ringwaldheader = ''' 23*779b256eSMatthias Ringwald/** 24*779b256eSMatthias Ringwald * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack 25*779b256eSMatthias Ringwald */ 26*779b256eSMatthias Ringwald 27*779b256eSMatthias Ringwald#ifndef __BLUETOOTH_COMPANY_ID_H 28*779b256eSMatthias Ringwald#define __BLUETOOTH_COMPANY_ID_H 29*779b256eSMatthias Ringwald''' 30*779b256eSMatthias Ringwald 31*779b256eSMatthias Ringwaldpage_info = ''' 32*779b256eSMatthias Ringwald/** 33*779b256eSMatthias Ringwald * Assigned numbers from {page} 34*779b256eSMatthias Ringwald */ 35*779b256eSMatthias Ringwald''' 36*779b256eSMatthias Ringwald 37*779b256eSMatthias Ringwaldtrailer = ''' 38*779b256eSMatthias Ringwald#endif 39*779b256eSMatthias Ringwald''' 40*779b256eSMatthias Ringwald 41*779b256eSMatthias Ringwaldtags = [] 42*779b256eSMatthias Ringwald 43*779b256eSMatthias Ringwalddef create_name(company): 44*779b256eSMatthias Ringwald # remove parts in braces 45*779b256eSMatthias Ringwald p = re.compile('\(.*\)') 46*779b256eSMatthias Ringwald tag = p.sub('',company).rstrip().upper() 47*779b256eSMatthias Ringwald tag = tag.replace('&',' AND ') 48*779b256eSMatthias Ringwald tag = tag.replace(''','') 49*779b256eSMatthias Ringwald tag = tag.replace('"',' ') 50*779b256eSMatthias Ringwald tag = tag.replace('+',' AND ') 51*779b256eSMatthias Ringwald tag = tag.replace(' - ', ' ') 52*779b256eSMatthias Ringwald tag = tag.replace('/', ' ') 53*779b256eSMatthias Ringwald tag = tag.replace(';',' ') 54*779b256eSMatthias Ringwald tag = tag.replace(',','') 55*779b256eSMatthias Ringwald tag = tag.replace('.', '') 56*779b256eSMatthias Ringwald tag = tag.replace('-','_') 57*779b256eSMatthias Ringwald tag = tag.replace(' ',' ') 58*779b256eSMatthias Ringwald tag = tag.replace(' ',' ') 59*779b256eSMatthias Ringwald tag = tag.replace(' ',' ') 60*779b256eSMatthias Ringwald tag = tag.replace(' ','_') 61*779b256eSMatthias Ringwald return "BLUETOOTH_COMPANY_ID_" + tag 62*779b256eSMatthias Ringwald 63*779b256eSMatthias Ringwalddef scrape_page(fout, url): 64*779b256eSMatthias Ringwald print("Parsing %s" % url) 65*779b256eSMatthias Ringwald fout.write(page_info.format(page=url)) 66*779b256eSMatthias Ringwald 67*779b256eSMatthias Ringwald # get from web 68*779b256eSMatthias Ringwald r = requests.get(url) 69*779b256eSMatthias Ringwald content = r.text 70*779b256eSMatthias Ringwald 71*779b256eSMatthias Ringwald # test: fetch from local file 'service-discovery.html' 72*779b256eSMatthias Ringwald # f = codecs.open("company-identifiers.html", "r", "utf-8") 73*779b256eSMatthias Ringwald # content = f.read(); 74*779b256eSMatthias Ringwald 75*779b256eSMatthias Ringwald tree = html.fromstring(content) 76*779b256eSMatthias Ringwald # get all java script 77*779b256eSMatthias Ringwald rows = tree.xpath('//script') 78*779b256eSMatthias Ringwald for row in rows: 79*779b256eSMatthias Ringwald script = row.text_content() 80*779b256eSMatthias Ringwald if not 'DataTable' in script: 81*779b256eSMatthias Ringwald continue 82*779b256eSMatthias Ringwald start_tag = 'data: [' 83*779b256eSMatthias Ringwald end_tag = '["65535","0xFFFF",' 84*779b256eSMatthias Ringwald start = script.find(start_tag) 85*779b256eSMatthias Ringwald end = script.find(end_tag) 86*779b256eSMatthias Ringwald company_list = script[start + len(start_tag):end] 87*779b256eSMatthias Ringwald for entry in company_list.split('],'): 88*779b256eSMatthias Ringwald if len(entry) < 5: 89*779b256eSMatthias Ringwald break 90*779b256eSMatthias Ringwald entry = entry[1:] 91*779b256eSMatthias Ringwald fields = entry.split('","') 92*779b256eSMatthias Ringwald id_hex = fields[1] 93*779b256eSMatthias Ringwald company = create_name(fields[2][:-1]) 94*779b256eSMatthias Ringwald if company in tags: 95*779b256eSMatthias Ringwald company = company + "2" 96*779b256eSMatthias Ringwald else: 97*779b256eSMatthias Ringwald tags.append(company) 98*779b256eSMatthias Ringwald if len(company) < 2: 99*779b256eSMatthias Ringwald continue 100*779b256eSMatthias Ringwald fout.write("#define %-80s %s\n" % (company, id_hex)) 101*779b256eSMatthias Ringwald 102*779b256eSMatthias Ringwaldbtstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 103*779b256eSMatthias Ringwaldgen_path = btstack_root + '/src/bluetooth_company_id.h' 104*779b256eSMatthias Ringwald 105*779b256eSMatthias Ringwaldprint(program_info) 106*779b256eSMatthias Ringwald 107*779b256eSMatthias Ringwaldwith open(gen_path, 'wt') as fout: 108*779b256eSMatthias Ringwald fout.write(header.format(datetime=str(datetime.datetime.now()))) 109*779b256eSMatthias Ringwald scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers') 110*779b256eSMatthias Ringwald fout.write(trailer) 111*779b256eSMatthias Ringwald 112*779b256eSMatthias Ringwaldprint('Scraping successful!\n')