1#!/usr/bin/env python3 2# 3# Scrape GATT UUIDs from Bluetooth SIG page 4# https://www.bluetooth.com/specifications/assigned-numbers/logical-link-control/ 5# 6# Copyright 2019 BlueKitchen GmbH 7# 8 9from lxml import html 10import datetime 11import requests 12import sys 13import codecs 14import os 15import re 16 17headers = {'user-agent': 'curl/7.63.0'} 18 19program_info = ''' 20BTstack PSM Scraper 21Copyright 2019, BlueKitchen GmbH 22''' 23 24header = ''' 25/** 26 * bluetooth_psm.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_psm.py 27 * {datetime} 28 */ 29 30#ifndef BLUETOOTH_PSM_H 31#define BLUETOOTH_PSM_H 32''' 33 34page_info = ''' 35/** 36 * Assigned numbers from {page} 37 */ 38''' 39 40trailer = ''' 41#endif 42''' 43 44tags = [] 45 46def strip_non_ascii(string): 47 stripped = (c for c in string if 0 < ord(c) < 127) 48 return ''.join(stripped) 49 50def create_name(psm): 51 # limit to ascii 52 psm = strip_non_ascii(psm) 53 # remove parts in braces 54 p = re.compile('\(.*\)') 55 tag = p.sub('',psm).rstrip().upper() 56 tag = tag.replace('-', '_') 57 return "BLUETOOTH_PSM_" + tag 58 59def scrape_page(fout, url): 60 global headers 61 62 print("Parsing %s" % url) 63 fout.write(page_info.format(page=url.replace('https://',''))) 64 65 # get from web 66 r = requests.get(url, headers=headers) 67 content = r.text 68 69 # test: fetch from local file 'index.html' 70 # f = codecs.open("index.html", "r", "utf-8") 71 # content = f.read(); 72 73 tree = html.fromstring(content) 74 rows = tree.xpath('//table/tbody/tr') 75 for row in rows: 76 children = row.getchildren() 77 psm = children[0].text_content() 78 79 # abort when second table starts 80 if (psm == '0x0000-0xFFFF'): 81 break 82 83 id_hex = children[1].text_content().replace(u'\u200b','') 84 fout.write("#define %-80s %s\n" % (create_name(psm), id_hex)) 85 86btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 87gen_path = btstack_root + '/src/bluetooth_psm.h' 88 89print(program_info) 90 91with open(gen_path, 'wt') as fout: 92 fout.write(header.format(datetime=str(datetime.datetime.now()))) 93 scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/logical-link-control/') 94 fout.write(trailer) 95 96print('Scraping successful!\n') 97