#!/usr/bin/env python3 # # Scrape GAP Data Types from Bluetooth SIG page # Copyright 2016 BlueKitchen GmbH # from lxml import html import datetime import re import requests import sys import os headers = {'user-agent': 'curl/7.63.0'} program_info = ''' BTstack Data Types Scraper for BTstack Copyright 2016, BlueKitchen GmbH ''' header = '''/** * bluetooth_data_types.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_data_types.py * {url} * {datetime} */ #ifndef BLUETOOTH_DATA_TYPES_H #define BLUETOOTH_DATA_TYPES_H ''' trailer = ''' #endif ''' def clean(tag): # << 0xab # >> 0xbb # \n # non-visible whitespace 0x200b # non-vicible whitespace 0xa0 return tag.replace(u'\xab','').replace(u'\xbb','').replace(u'\u200b','').replace('\n','').replace(u'\xa0',' ').strip() def scrape_page(fout, url): print("Parsing %s" % url) page = requests.get(url, headers=headers) tree = html.fromstring(page.content) print('') print('%-48s | %s' % ("Data Type Name", "Data Type Value")) print('-' * 70) # get all elements in rows = tree.xpath('//table/tbody/tr') for row in rows: children = row.getchildren() data_type_value = children[0].text_content() data_type_name = children[1].text_content() # table with references to where it was used if (data_type_value == 'Data Type Value'): continue # clean up data_type_name = clean(data_type_name) data_type_value = clean(data_type_value) tag = data_type_name # uppper tag = tag.upper() # collapse ' - ' into ' ' tag = tag.replace(' - ', ' ') # drop dashes otherwise tag = tag.replace('-',' ') # collect multiple spaces tag = re.sub('\s+', ' ', tag).strip() # replace space with under score tag =tag.replace(' ', '_') fout.write("#define BLUETOOTH_DATA_TYPE_%-50s %s // %s\n" % (tag, data_type_value, data_type_name)) print("%-48s | %s" % (data_type_name, data_type_value)) btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..') gen_path = btstack_root + '/src/bluetooth_data_types.h' print(program_info) with open(gen_path, 'wt') as fout: url = 'https://www.bluetooth.com/specifications/assigned-numbers/generic-access-profile' fout.write(header.format(datetime=str(datetime.datetime.now()), url=url.replace('https://',''))) scrape_page(fout, url) fout.write(trailer) print('') print('Scraping successful into %s!\n' % gen_path)