xref: /btstack/tool/bluetooth_data_types.py (revision 5c54401929043df982e040eba652f5fd7763ce15)
1#!/usr/bin/env python3
2#
3# Scrape GAP Data Types from Bluetooth SIG page
4# Copyright 2016 BlueKitchen GmbH
5#
6
7from lxml import html
8import datetime
9import re
10import requests
11import sys
12import os
13
14headers = {'user-agent': 'curl/7.63.0'}
15
16program_info = '''
17BTstack Data Types Scraper for BTstack
18Copyright 2016, BlueKitchen GmbH
19'''
20
21header = '''/**
22 * bluetooth_data_types.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_data_types.py
23 * {url}
24 * {datetime}
25 */
26
27#ifndef BLUETOOTH_DATA_TYPES_H
28#define BLUETOOTH_DATA_TYPES_H
29
30'''
31
32trailer = '''
33#endif
34'''
35
36def clean(tag):
37    # << 0xab
38    # >> 0xbb
39    # \n
40    # non-visible whitespace 0x200b
41    # non-vicible whitespace 0xa0
42    return tag.replace(u'\xab','').replace(u'\xbb','').replace(u'\u200b','').replace('\n','').replace(u'\xa0',' ').strip()
43
44def scrape_page(fout, url):
45    print("Parsing %s" % url)
46    page = requests.get(url, headers=headers)
47    tree = html.fromstring(page.content)
48
49    print('')
50    print('%-48s | %s' % ("Data Type Name", "Data Type Value"))
51    print('-' * 70)
52
53    # get all <tr> elements in <table id="table3">
54    rows = tree.xpath('//table/tbody/tr')
55    for row in rows:
56        children = row.getchildren()
57        data_type_value = children[0].text_content()
58        data_type_name  = children[1].text_content()
59        # table with references to where it was used
60
61        if (data_type_value == 'Data Type Value'):
62            continue
63
64        # clean up
65        data_type_name = clean(data_type_name)
66        data_type_value = clean(data_type_value)
67
68        tag = data_type_name
69        # uppper
70        tag = tag.upper()
71        # collapse ' - ' into ' '
72        tag = tag.replace(' - ', ' ')
73        # drop dashes otherwise
74        tag = tag.replace('-',' ')
75        # collect multiple spaces
76        tag = re.sub('\s+', ' ', tag).strip()
77        # replace space with under score
78        tag =tag.replace(' ', '_')
79        fout.write("#define BLUETOOTH_DATA_TYPE_%-50s %s // %s\n" %  (tag, data_type_value, data_type_name))
80        print("%-48s | %s" % (data_type_name, data_type_value))
81
82btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..')
83gen_path = btstack_root + '/src/bluetooth_data_types.h'
84
85print(program_info)
86
87with open(gen_path, 'wt') as fout:
88    url = 'https://www.bluetooth.com/specifications/assigned-numbers/generic-access-profile'
89    fout.write(header.format(datetime=str(datetime.datetime.now()), url=url.replace('https://','')))
90    scrape_page(fout, url)
91    fout.write(trailer)
92
93print('')
94print('Scraping successful into %s!\n' % gen_path)
95