xref: /btstack/tool/bluetooth_company_id.py (revision 1882d12dc36ae7b48f7f4ed3fd8d029fba3b08b1)
1#!/usr/bin/env python
2#
3# Scrape GATT UUIDs from Bluetooth SIG page
4# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers
5#
6# Copyright 2017 BlueKitchen GmbH
7#
8
9from lxml import html
10import datetime
11import requests
12import sys
13import codecs
14import os
15import re
16
17program_info = '''
18BTstack Company ID Scraper for BTstack
19Copyright 2017, BlueKitchen GmbH
20'''
21
22header = '''
23/**
24 * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_company_id.py
25 * {datetime}
26 */
27
28#ifndef __BLUETOOTH_COMPANY_ID_H
29#define __BLUETOOTH_COMPANY_ID_H
30'''
31
32page_info = '''
33/**
34 * Assigned numbers from {page}
35 */
36'''
37
38trailer = '''
39#endif
40'''
41
42tags = []
43
44def create_name(company):
45    # remove parts in braces
46    p = re.compile('\(.*\)')
47    tag = p.sub('',company).rstrip().upper()
48    tag = tag.replace('&',' AND ')
49    tag = tag.replace(''','')
50    tag = tag.replace('"',' ')
51    tag = tag.replace('+',' AND ')
52    tag = tag.replace(' - ', ' ')
53    tag = tag.replace('/', ' ')
54    tag = tag.replace(';',' ')
55    tag = tag.replace(',','')
56    tag = tag.replace('.', '')
57    tag = tag.replace('-','_')
58    tag = tag.replace('  ',' ')
59    tag = tag.replace('  ',' ')
60    tag = tag.replace('  ',' ')
61    tag = tag.replace(' ','_')
62    return "BLUETOOTH_COMPANY_ID_" + tag
63
64def scrape_page(fout, url):
65    print("Parsing %s" % url)
66    fout.write(page_info.format(page=url))
67
68    # get from web
69    r = requests.get(url)
70    content = r.text
71
72    # test: fetch from local file 'service-discovery.html'
73    # f = codecs.open("company-identifiers.html", "r", "utf-8")
74    # content = f.read();
75
76    tree = html.fromstring(content)
77    # get all java script
78    rows = tree.xpath('//script')
79    for row in rows:
80        script = row.text_content()
81        if not 'DataTable' in script:
82            continue
83        start_tag = 'data:  ['
84        end_tag   = '["65535","0xFFFF",'
85        start = script.find(start_tag)
86        end   = script.find(end_tag)
87        company_list = script[start + len(start_tag):end]
88        for entry in company_list.split('],'):
89            if len(entry) < 5:
90                break
91            entry = entry[1:]
92            fields = entry.split('","')
93            id_hex = fields[1]
94            company = create_name(fields[2][:-1])
95            if company in tags:
96                company = company + "2"
97            else:
98                tags.append(company)
99            if len(company) < 2:
100                continue
101            fout.write("#define %-80s %s\n" %  (company, id_hex))
102
103btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..')
104gen_path = btstack_root + '/src/bluetooth_company_id.h'
105
106print(program_info)
107
108with open(gen_path, 'wt') as fout:
109    fout.write(header.format(datetime=str(datetime.datetime.now())))
110    scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers')
111    fout.write(trailer)
112
113print('Scraping successful!\n')
114