xref: /btstack/tool/bluetooth_company_id.py (revision cd5f23a3250874824c01a2b3326a9522fea3f99f)
1#!/usr/bin/env python3
2#
3# Scrape GATT UUIDs from Bluetooth SIG page
4# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers
5#
6# Copyright 2017 BlueKitchen GmbH
7#
8
9from lxml import html
10import datetime
11import requests
12import sys
13import codecs
14import os
15import re
16
17headers = {'user-agent': 'curl/7.63.0'}
18
19program_info = '''
20BTstack Company ID Scraper for BTstack
21Copyright 2017, BlueKitchen GmbH
22'''
23
24header = '''
25/**
26 * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_company_id.py
27 * {datetime}
28 */
29
30#ifndef BLUETOOTH_COMPANY_ID_H
31#define BLUETOOTH_COMPANY_ID_H
32'''
33
34page_info = '''
35/**
36 * Assigned numbers from {page}
37 */
38'''
39
40trailer = '''
41#endif
42'''
43
44tags = []
45
46def strip_non_ascii(string):
47    stripped = (c for c in string if 0 < ord(c) < 127)
48    return ''.join(stripped)
49
50def create_name(company):
51    # limit to ascii
52    company = strip_non_ascii(company)
53    # remove parts in braces
54    p = re.compile('\(.*\)')
55    tag = p.sub('',company).rstrip().upper()
56    tag = tag.replace('&AMP;',' AND ')
57    tag = tag.replace('&#39;','')
58    tag = tag.replace('&QUOT;',' ')
59    tag = tag.replace('+',' AND ')
60    tag = tag.replace(' - ', ' ')
61    tag = tag.replace('/', ' ')
62    tag = tag.replace(';',' ')
63    tag = tag.replace(',','')
64    tag = tag.replace('.', '')
65    tag = tag.replace('-','_')
66    tag = tag.replace('  ',' ')
67    tag = tag.replace('  ',' ')
68    tag = tag.replace('  ',' ')
69    tag = tag.replace(' ','_')
70    tag = tag.replace('&','AND')
71    tag = tag.replace("'","_")
72    tag = tag.replace('"','_')
73    tag = tag.replace('!','_')
74    return "BLUETOOTH_COMPANY_ID_" + tag
75
76def scrape_page(fout, url):
77    global headers
78
79    print("Parsing %s" % url)
80    fout.write(page_info.format(page=url.replace('https://','')))
81
82    # get from web
83    r = requests.get(url, headers=headers)
84    content = r.text
85
86    # test: fetch from local file 'service-discovery.html'
87    # f = codecs.open("company-identifiers.html", "r", "utf-8")
88    # content = f.read();
89
90    tree = html.fromstring(content)
91    rows = tree.xpath('//table/tbody/tr')
92    for row in rows:
93        children = row.getchildren()
94        id_hex  = children[1].text_content()
95        company = create_name(children[2].text_content())
96        if company in tags:
97            company = company+"2"
98        else:
99            tags.append(company)
100        fout.write("#define %-80s %s\n" %  (company, id_hex))
101
102    # map CSR onto QTIL
103    fout.write("#define BLUETOOTH_COMPANY_ID_CAMBRIDGE_SILICON_RADIO BLUETOOTH_COMPANY_ID_QUALCOMM_TECHNOLOGIES_INTERNATIONAL_LTD\n")
104
105btstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..')
106gen_path = btstack_root + '/src/bluetooth_company_id.h'
107
108print(program_info)
109
110with open(gen_path, 'wt') as fout:
111    fout.write(header.format(datetime=str(datetime.datetime.now())))
112    scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers')
113    fout.write(trailer)
114
115print('Scraping successful!\n')
116