1#!/usr/bin/env python3 2# 3# Scrape GAP Data Types from Bluetooth SIG page 4# Copyright 2016 BlueKitchen GmbH 5# 6 7from lxml import html 8import datetime 9import re 10import requests 11import sys 12import os 13 14headers = {'user-agent': 'curl/7.63.0'} 15 16program_info = ''' 17BTstack Data Types Scraper for BTstack 18Copyright 2016, BlueKitchen GmbH 19''' 20 21header = '''/** 22 * bluetooth_data_types.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_data_types.py 23 * {url} 24 * {datetime} 25 */ 26 27#ifndef BLUETOOTH_DATA_TYPES_H 28#define BLUETOOTH_DATA_TYPES_H 29 30''' 31 32trailer = ''' 33#endif 34''' 35 36def clean(tag): 37 # << 0xab 38 # >> 0xbb 39 # \n 40 # non-visible whitespace 0x200b 41 # non-vicible whitespace 0xa0 42 return tag.replace(u'\xab','').replace(u'\xbb','').replace(u'\u200b','').replace('\n','').replace(u'\xa0',' ').strip() 43 44def scrape_page(fout, url): 45 print("Parsing %s" % url) 46 page = requests.get(url, headers=headers) 47 tree = html.fromstring(page.content) 48 49 print('') 50 print('%-48s | %s' % ("Data Type Name", "Data Type Value")) 51 print('-' * 70) 52 53 # get all <tr> elements in <table id="table3"> 54 rows = tree.xpath('//table/tbody/tr') 55 for row in rows: 56 children = row.getchildren() 57 data_type_value = children[0].text_content() 58 data_type_name = children[1].text_content() 59 # table with references to where it was used 60 61 if (data_type_value == 'Data Type Value'): 62 continue 63 64 # clean up 65 data_type_name = clean(data_type_name) 66 data_type_value = clean(data_type_value) 67 68 tag = data_type_name 69 # uppper 70 tag = tag.upper() 71 # collapse ' - ' into ' ' 72 tag = tag.replace(' - ', ' ') 73 # drop dashes otherwise 74 tag = tag.replace('-',' ') 75 # collect multiple spaces 76 tag = re.sub('\s+', ' ', tag).strip() 77 # replace space with under score 78 tag =tag.replace(' ', '_') 79 fout.write("#define BLUETOOTH_DATA_TYPE_%-50s %s // %s\n" % (tag, data_type_value, data_type_name)) 80 print("%-48s | %s" % (data_type_name, data_type_value)) 81 82btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') 83gen_path = btstack_root + '/src/bluetooth_data_types.h' 84 85print(program_info) 86 87with open(gen_path, 'wt') as fout: 88 url = 'https://www.bluetooth.com/specifications/assigned-numbers/generic-access-profile' 89 fout.write(header.format(datetime=str(datetime.datetime.now()), url=url.replace('https://',''))) 90 scrape_page(fout, url) 91 fout.write(trailer) 92 93print('') 94print('Scraping successful into %s!\n' % gen_path) 95