1#!/usr/bin/env python 2# 3# Scrape GATT UUIDs from Bluetooth SIG page 4# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers 5# 6# Copyright 2017 BlueKitchen GmbH 7# 8 9from lxml import html 10import datetime 11import requests 12import sys 13import codecs 14import os 15import re 16 17program_info = ''' 18BTstack Company ID Scraper for BTstack 19Copyright 2017, BlueKitchen GmbH 20''' 21 22header = ''' 23/** 24 * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_company_id.py 25 * {datetime} 26 */ 27 28#ifndef __BLUETOOTH_COMPANY_ID_H 29#define __BLUETOOTH_COMPANY_ID_H 30''' 31 32page_info = ''' 33/** 34 * Assigned numbers from {page} 35 */ 36''' 37 38trailer = ''' 39#endif 40''' 41 42tags = [] 43 44def create_name(company): 45 # remove parts in braces 46 p = re.compile('\(.*\)') 47 tag = p.sub('',company).rstrip().upper() 48 tag = tag.replace('&',' AND ') 49 tag = tag.replace(''','') 50 tag = tag.replace('"',' ') 51 tag = tag.replace('+',' AND ') 52 tag = tag.replace(' - ', ' ') 53 tag = tag.replace('/', ' ') 54 tag = tag.replace(';',' ') 55 tag = tag.replace(',','') 56 tag = tag.replace('.', '') 57 tag = tag.replace('-','_') 58 tag = tag.replace(' ',' ') 59 tag = tag.replace(' ',' ') 60 tag = tag.replace(' ',' ') 61 tag = tag.replace(' ','_') 62 return "BLUETOOTH_COMPANY_ID_" + tag 63 64def scrape_page(fout, url): 65 print("Parsing %s" % url) 66 fout.write(page_info.format(page=url)) 67 68 # get from web 69 r = requests.get(url) 70 content = r.text 71 72 # test: fetch from local file 'service-discovery.html' 73 # f = codecs.open("company-identifiers.html", "r", "utf-8") 74 # content = f.read(); 75 76 tree = html.fromstring(content) 77 # get all java script 78 rows = tree.xpath('//script') 79 for row in rows: 80 script = row.text_content() 81 if not 'DataTable' in script: 82 continue 83 start_tag = 'data: [' 84 end_tag = '["65535","0xFFFF",' 85 start = script.find(start_tag) 86 end = script.find(end_tag) 87 company_list = script[start + len(start_tag):end] 88 for entry in company_list.split('],'): 89 if len(entry) < 5: 90 break 91 entry = entry[1:] 92 fields = entry.split('","') 93 id_hex = fields[1] 94 company = create_name(fields[2][:-1]) 95 if company in tags: 96 company = company + "2" 97 else: 98 tags.append(company) 99 if len(company) < 2: 100 continue 101 fout.write("#define %-80s %s\n" % (company, id_hex)) 102 103btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 104gen_path = btstack_root + '/src/bluetooth_company_id.h' 105 106print(program_info) 107 108with open(gen_path, 'wt') as fout: 109 fout.write(header.format(datetime=str(datetime.datetime.now()))) 110 scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers') 111 fout.write(trailer) 112 113print('Scraping successful!\n') 114