1#!/usr/bin/env python3 2# 3# Scrape GATT UUIDs from Bluetooth SIG page 4# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers 5# 6# Copyright 2017 BlueKitchen GmbH 7# 8 9from lxml import html 10import datetime 11import requests 12import sys 13import codecs 14import os 15import re 16 17headers = {'user-agent': 'curl/7.63.0'} 18 19program_info = ''' 20BTstack Company ID Scraper for BTstack 21Copyright 2017, BlueKitchen GmbH 22''' 23 24header = ''' 25/** 26 * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_company_id.py 27 * {datetime} 28 */ 29 30#ifndef BLUETOOTH_COMPANY_ID_H 31#define BLUETOOTH_COMPANY_ID_H 32''' 33 34page_info = ''' 35/** 36 * Assigned numbers from {page} 37 */ 38''' 39 40trailer = ''' 41#endif 42''' 43 44tags = [] 45 46def strip_non_ascii(string): 47 stripped = (c for c in string if 0 < ord(c) < 127) 48 return ''.join(stripped) 49 50def create_name(company): 51 # limit to ascii 52 company = strip_non_ascii(company) 53 # remove parts in braces 54 p = re.compile('\(.*\)') 55 tag = p.sub('',company).rstrip().upper() 56 tag = tag.replace('&',' AND ') 57 tag = tag.replace(''','') 58 tag = tag.replace('"',' ') 59 tag = tag.replace('+',' AND ') 60 tag = tag.replace(' - ', ' ') 61 tag = tag.replace('/', ' ') 62 tag = tag.replace(';',' ') 63 tag = tag.replace(',','') 64 tag = tag.replace('.', '') 65 tag = tag.replace('-','_') 66 tag = tag.replace(' ',' ') 67 tag = tag.replace(' ',' ') 68 tag = tag.replace(' ',' ') 69 tag = tag.replace(' ','_') 70 tag = tag.replace('&','AND') 71 tag = tag.replace("'","_") 72 tag = tag.replace('"','_') 73 tag = tag.replace('!','_') 74 return "BLUETOOTH_COMPANY_ID_" + tag 75 76def scrape_page(fout, url): 77 global headers 78 79 print("Parsing %s" % url) 80 fout.write(page_info.format(page=url.replace('https://',''))) 81 82 # get from web 83 r = requests.get(url, headers=headers) 84 content = r.text 85 86 # test: fetch from local file 'service-discovery.html' 87 # f = codecs.open("company-identifiers.html", "r", "utf-8") 88 # content = f.read(); 89 90 tree = html.fromstring(content) 91 rows = tree.xpath('//table/tbody/tr') 92 for row in rows: 93 children = row.getchildren() 94 id_hex = children[1].text_content() 95 company = create_name(children[2].text_content()) 96 if company in tags: 97 company = company+"2" 98 else: 99 tags.append(company) 100 fout.write("#define %-80s %s\n" % (company, id_hex)) 101 102 # map CSR onto QTIL 103 fout.write("#define BLUETOOTH_COMPANY_ID_CAMBRIDGE_SILICON_RADIO BLUETOOTH_COMPANY_ID_QUALCOMM_TECHNOLOGIES_INTERNATIONAL_LTD\n") 104 105btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 106gen_path = btstack_root + '/src/bluetooth_company_id.h' 107 108print(program_info) 109 110with open(gen_path, 'wt') as fout: 111 fout.write(header.format(datetime=str(datetime.datetime.now()))) 112 scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers') 113 fout.write(trailer) 114 115print('Scraping successful!\n') 116