1*5c544019SMatthias Ringwald#!/usr/bin/env python3 2b08c8c49SMatthias Ringwald# 3b08c8c49SMatthias Ringwald# Scrape GATT UUIDs from Bluetooth SIG page 4b08c8c49SMatthias Ringwald# https://www.bluetooth.com/specifications/assigned-numbers/logical-link-control/ 5b08c8c49SMatthias Ringwald# 6b08c8c49SMatthias Ringwald# Copyright 2019 BlueKitchen GmbH 7b08c8c49SMatthias Ringwald# 8b08c8c49SMatthias Ringwald 9b08c8c49SMatthias Ringwaldfrom lxml import html 10b08c8c49SMatthias Ringwaldimport datetime 11b08c8c49SMatthias Ringwaldimport requests 12b08c8c49SMatthias Ringwaldimport sys 13b08c8c49SMatthias Ringwaldimport codecs 14b08c8c49SMatthias Ringwaldimport os 15b08c8c49SMatthias Ringwaldimport re 16b08c8c49SMatthias Ringwald 17b08c8c49SMatthias Ringwaldheaders = {'user-agent': 'curl/7.63.0'} 18b08c8c49SMatthias Ringwald 19b08c8c49SMatthias Ringwaldprogram_info = ''' 20b08c8c49SMatthias RingwaldBTstack PSM Scraper 21b08c8c49SMatthias RingwaldCopyright 2019, BlueKitchen GmbH 22b08c8c49SMatthias Ringwald''' 23b08c8c49SMatthias Ringwald 24b08c8c49SMatthias Ringwaldheader = ''' 25b08c8c49SMatthias Ringwald/** 26b08c8c49SMatthias Ringwald * bluetooth_psm.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_psm.py 27b08c8c49SMatthias Ringwald * {datetime} 28b08c8c49SMatthias Ringwald */ 29b08c8c49SMatthias Ringwald 30b08c8c49SMatthias Ringwald#ifndef BLUETOOTH_PSM_H 31b08c8c49SMatthias Ringwald#define BLUETOOTH_PSM_H 32b08c8c49SMatthias Ringwald''' 33b08c8c49SMatthias Ringwald 34b08c8c49SMatthias Ringwaldpage_info = ''' 35b08c8c49SMatthias Ringwald/** 36b08c8c49SMatthias Ringwald * Assigned numbers from {page} 37b08c8c49SMatthias Ringwald */ 38b08c8c49SMatthias Ringwald''' 39b08c8c49SMatthias Ringwald 40b08c8c49SMatthias Ringwaldtrailer = ''' 41b08c8c49SMatthias Ringwald#endif 42b08c8c49SMatthias Ringwald''' 43b08c8c49SMatthias Ringwald 44b08c8c49SMatthias Ringwaldtags = [] 45b08c8c49SMatthias Ringwald 46b08c8c49SMatthias Ringwalddef strip_non_ascii(string): 47b08c8c49SMatthias Ringwald stripped = (c for c in string if 0 < ord(c) < 127) 48b08c8c49SMatthias Ringwald return ''.join(stripped) 49b08c8c49SMatthias Ringwald 50b08c8c49SMatthias Ringwalddef create_name(psm): 51b08c8c49SMatthias Ringwald # limit to ascii 52b08c8c49SMatthias Ringwald psm = strip_non_ascii(psm) 53b08c8c49SMatthias Ringwald # remove parts in braces 54b08c8c49SMatthias Ringwald p = re.compile('\(.*\)') 55b08c8c49SMatthias Ringwald tag = p.sub('',psm).rstrip().upper() 56b08c8c49SMatthias Ringwald tag = tag.replace('-', '_') 57b08c8c49SMatthias Ringwald return "BLUETOOTH_PSM_" + tag 58b08c8c49SMatthias Ringwald 59b08c8c49SMatthias Ringwalddef scrape_page(fout, url): 60b08c8c49SMatthias Ringwald global headers 61b08c8c49SMatthias Ringwald 62b08c8c49SMatthias Ringwald print("Parsing %s" % url) 63b08c8c49SMatthias Ringwald fout.write(page_info.format(page=url.replace('https://',''))) 64b08c8c49SMatthias Ringwald 65b08c8c49SMatthias Ringwald # get from web 66b08c8c49SMatthias Ringwald r = requests.get(url, headers=headers) 67b08c8c49SMatthias Ringwald content = r.text 68b08c8c49SMatthias Ringwald 69b08c8c49SMatthias Ringwald # test: fetch from local file 'index.html' 70b08c8c49SMatthias Ringwald # f = codecs.open("index.html", "r", "utf-8") 71b08c8c49SMatthias Ringwald # content = f.read(); 72b08c8c49SMatthias Ringwald 73b08c8c49SMatthias Ringwald tree = html.fromstring(content) 74b08c8c49SMatthias Ringwald rows = tree.xpath('//table/tbody/tr') 75b08c8c49SMatthias Ringwald for row in rows: 76b08c8c49SMatthias Ringwald children = row.getchildren() 77b08c8c49SMatthias Ringwald psm = children[0].text_content() 78b08c8c49SMatthias Ringwald 79b08c8c49SMatthias Ringwald # abort when second table starts 80b08c8c49SMatthias Ringwald if (psm == '0x0000-0xFFFF'): 81b08c8c49SMatthias Ringwald break 82b08c8c49SMatthias Ringwald 83b08c8c49SMatthias Ringwald id_hex = children[1].text_content().replace(u'\u200b','') 84b08c8c49SMatthias Ringwald fout.write("#define %-80s %s\n" % (create_name(psm), id_hex)) 85b08c8c49SMatthias Ringwald 86b08c8c49SMatthias Ringwaldbtstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 87b08c8c49SMatthias Ringwaldgen_path = btstack_root + '/src/bluetooth_psm.h' 88b08c8c49SMatthias Ringwald 89b08c8c49SMatthias Ringwaldprint(program_info) 90b08c8c49SMatthias Ringwald 91b08c8c49SMatthias Ringwaldwith open(gen_path, 'wt') as fout: 92b08c8c49SMatthias Ringwald fout.write(header.format(datetime=str(datetime.datetime.now()))) 93b08c8c49SMatthias Ringwald scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/logical-link-control/') 94b08c8c49SMatthias Ringwald fout.write(trailer) 95b08c8c49SMatthias Ringwald 96b08c8c49SMatthias Ringwaldprint('Scraping successful!\n') 97