#!/usr/bin/env python3
#
# Scrape GAP Data Types from Bluetooth SIG page
# Copyright 2016 BlueKitchen GmbH
#
from lxml import html
import datetime
import re
import requests
import sys
import os
headers = {'user-agent': 'curl/7.63.0'}
program_info = '''
BTstack Data Types Scraper for BTstack
Copyright 2016, BlueKitchen GmbH
'''
header = '''/**
* bluetooth_data_types.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_data_types.py
* {url}
* {datetime}
*/
#ifndef BLUETOOTH_DATA_TYPES_H
#define BLUETOOTH_DATA_TYPES_H
'''
trailer = '''
#endif
'''
def clean(tag):
# << 0xab
# >> 0xbb
# \n
# non-visible whitespace 0x200b
# non-vicible whitespace 0xa0
return tag.replace(u'\xab','').replace(u'\xbb','').replace(u'\u200b','').replace('\n','').replace(u'\xa0',' ').strip()
def scrape_page(fout, url):
print("Parsing %s" % url)
page = requests.get(url, headers=headers)
tree = html.fromstring(page.content)
print('')
print('%-48s | %s' % ("Data Type Name", "Data Type Value"))
print('-' * 70)
# get all
elements in
rows = tree.xpath('//table/tbody/tr')
for row in rows:
children = row.getchildren()
data_type_value = children[0].text_content()
data_type_name = children[1].text_content()
# table with references to where it was used
if (data_type_value == 'Data Type Value'):
continue
# clean up
data_type_name = clean(data_type_name)
data_type_value = clean(data_type_value)
tag = data_type_name
# uppper
tag = tag.upper()
# collapse ' - ' into ' '
tag = tag.replace(' - ', ' ')
# drop dashes otherwise
tag = tag.replace('-',' ')
# collect multiple spaces
tag = re.sub('\s+', ' ', tag).strip()
# replace space with under score
tag =tag.replace(' ', '_')
fout.write("#define BLUETOOTH_DATA_TYPE_%-50s %s // %s\n" % (tag, data_type_value, data_type_name))
print("%-48s | %s" % (data_type_name, data_type_value))
btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..')
gen_path = btstack_root + '/src/bluetooth_data_types.h'
print(program_info)
with open(gen_path, 'wt') as fout:
url = 'https://www.bluetooth.com/specifications/assigned-numbers/generic-access-profile'
fout.write(header.format(datetime=str(datetime.datetime.now()), url=url.replace('https://','')))
scrape_page(fout, url)
fout.write(trailer)
print('')
print('Scraping successful into %s!\n' % gen_path)