1#!/usr/bin/env python 2# 3# Scrape GATT UUIDs from Bluetooth SIG page 4# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers 5# 6# Copyright 2017 BlueKitchen GmbH 7# 8 9from lxml import html 10import datetime 11import requests 12import sys 13import codecs 14import os 15import re 16 17program_info = ''' 18BTstack Company ID Scraper for BTstack 19Copyright 2017, BlueKitchen GmbH 20''' 21 22header = ''' 23/** 24 * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack 25 */ 26 27#ifndef __BLUETOOTH_COMPANY_ID_H 28#define __BLUETOOTH_COMPANY_ID_H 29''' 30 31page_info = ''' 32/** 33 * Assigned numbers from {page} 34 */ 35''' 36 37trailer = ''' 38#endif 39''' 40 41tags = [] 42 43def create_name(company): 44 # remove parts in braces 45 p = re.compile('\(.*\)') 46 tag = p.sub('',company).rstrip().upper() 47 tag = tag.replace('&',' AND ') 48 tag = tag.replace(''','') 49 tag = tag.replace('"',' ') 50 tag = tag.replace('+',' AND ') 51 tag = tag.replace(' - ', ' ') 52 tag = tag.replace('/', ' ') 53 tag = tag.replace(';',' ') 54 tag = tag.replace(',','') 55 tag = tag.replace('.', '') 56 tag = tag.replace('-','_') 57 tag = tag.replace(' ',' ') 58 tag = tag.replace(' ',' ') 59 tag = tag.replace(' ',' ') 60 tag = tag.replace(' ','_') 61 return "BLUETOOTH_COMPANY_ID_" + tag 62 63def scrape_page(fout, url): 64 print("Parsing %s" % url) 65 fout.write(page_info.format(page=url)) 66 67 # get from web 68 r = requests.get(url) 69 content = r.text 70 71 # test: fetch from local file 'service-discovery.html' 72 # f = codecs.open("company-identifiers.html", "r", "utf-8") 73 # content = f.read(); 74 75 tree = html.fromstring(content) 76 # get all java script 77 rows = tree.xpath('//script') 78 for row in rows: 79 script = row.text_content() 80 if not 'DataTable' in script: 81 continue 82 start_tag = 'data: [' 83 end_tag = '["65535","0xFFFF",' 84 start = script.find(start_tag) 85 end = script.find(end_tag) 86 company_list = script[start + len(start_tag):end] 87 for entry in company_list.split('],'): 88 if len(entry) < 5: 89 break 90 entry = entry[1:] 91 fields = entry.split('","') 92 id_hex = fields[1] 93 company = create_name(fields[2][:-1]) 94 if company in tags: 95 company = company + "2" 96 else: 97 tags.append(company) 98 if len(company) < 2: 99 continue 100 fout.write("#define %-80s %s\n" % (company, id_hex)) 101 102btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 103gen_path = btstack_root + '/src/bluetooth_company_id.h' 104 105print(program_info) 106 107with open(gen_path, 'wt') as fout: 108 fout.write(header.format(datetime=str(datetime.datetime.now()))) 109 scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers') 110 fout.write(trailer) 111 112print('Scraping successful!\n')