xref: /btstack/tool/bluetooth_company_id.py (revision 80e33422a96c028b3a9c308fc4b9b874712dafb4)
1779b256eSMatthias Ringwald#!/usr/bin/env python
2779b256eSMatthias Ringwald#
3779b256eSMatthias Ringwald# Scrape GATT UUIDs from Bluetooth SIG page
4779b256eSMatthias Ringwald# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers
5779b256eSMatthias Ringwald#
6779b256eSMatthias Ringwald# Copyright 2017 BlueKitchen GmbH
7779b256eSMatthias Ringwald#
8779b256eSMatthias Ringwald
9779b256eSMatthias Ringwaldfrom lxml import html
10779b256eSMatthias Ringwaldimport datetime
11779b256eSMatthias Ringwaldimport requests
12779b256eSMatthias Ringwaldimport sys
13779b256eSMatthias Ringwaldimport codecs
14779b256eSMatthias Ringwaldimport os
15779b256eSMatthias Ringwaldimport re
16779b256eSMatthias Ringwald
17ff7cea0bSMatthias Ringwaldheaders = {'user-agent': 'curl/7.63.0'}
18ff7cea0bSMatthias Ringwald
19779b256eSMatthias Ringwaldprogram_info = '''
20779b256eSMatthias RingwaldBTstack Company ID Scraper for BTstack
21779b256eSMatthias RingwaldCopyright 2017, BlueKitchen GmbH
22779b256eSMatthias Ringwald'''
23779b256eSMatthias Ringwald
24779b256eSMatthias Ringwaldheader = '''
25779b256eSMatthias Ringwald/**
26b436f177SMatthias Ringwald * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_company_id.py
27b436f177SMatthias Ringwald * {datetime}
28779b256eSMatthias Ringwald */
29779b256eSMatthias Ringwald
30*80e33422SMatthias Ringwald#ifndef BLUETOOTH_COMPANY_ID_H
31*80e33422SMatthias Ringwald#define BLUETOOTH_COMPANY_ID_H
32779b256eSMatthias Ringwald'''
33779b256eSMatthias Ringwald
34779b256eSMatthias Ringwaldpage_info = '''
35779b256eSMatthias Ringwald/**
36779b256eSMatthias Ringwald * Assigned numbers from {page}
37779b256eSMatthias Ringwald */
38779b256eSMatthias Ringwald'''
39779b256eSMatthias Ringwald
40779b256eSMatthias Ringwaldtrailer = '''
41779b256eSMatthias Ringwald#endif
42779b256eSMatthias Ringwald'''
43779b256eSMatthias Ringwald
44779b256eSMatthias Ringwaldtags = []
45779b256eSMatthias Ringwald
46ff7cea0bSMatthias Ringwalddef strip_non_ascii(string):
47ff7cea0bSMatthias Ringwald    stripped = (c for c in string if 0 < ord(c) < 127)
48ff7cea0bSMatthias Ringwald    return ''.join(stripped)
49ff7cea0bSMatthias Ringwald
50779b256eSMatthias Ringwalddef create_name(company):
51ff7cea0bSMatthias Ringwald    # limit to ascii
52ff7cea0bSMatthias Ringwald    company = strip_non_ascii(company)
53779b256eSMatthias Ringwald    # remove parts in braces
54779b256eSMatthias Ringwald    p = re.compile('\(.*\)')
55779b256eSMatthias Ringwald    tag = p.sub('',company).rstrip().upper()
56779b256eSMatthias Ringwald    tag = tag.replace('&AMP;',' AND ')
57779b256eSMatthias Ringwald    tag = tag.replace('&#39;','')
58779b256eSMatthias Ringwald    tag = tag.replace('&QUOT;',' ')
59779b256eSMatthias Ringwald    tag = tag.replace('+',' AND ')
60779b256eSMatthias Ringwald    tag = tag.replace(' - ', ' ')
61779b256eSMatthias Ringwald    tag = tag.replace('/', ' ')
62779b256eSMatthias Ringwald    tag = tag.replace(';',' ')
63779b256eSMatthias Ringwald    tag = tag.replace(',','')
64779b256eSMatthias Ringwald    tag = tag.replace('.', '')
65779b256eSMatthias Ringwald    tag = tag.replace('-','_')
66779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
67779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
68779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
69779b256eSMatthias Ringwald    tag = tag.replace(' ','_')
70ff7cea0bSMatthias Ringwald    tag = tag.replace('&','AND')
71ff7cea0bSMatthias Ringwald    tag = tag.replace("'","_")
72ff7cea0bSMatthias Ringwald    tag = tag.replace('"','_')
73ff7cea0bSMatthias Ringwald    tag = tag.replace('!','_')
74779b256eSMatthias Ringwald    return "BLUETOOTH_COMPANY_ID_" + tag
75779b256eSMatthias Ringwald
76779b256eSMatthias Ringwalddef scrape_page(fout, url):
77ff7cea0bSMatthias Ringwald    global headers
78ff7cea0bSMatthias Ringwald
79779b256eSMatthias Ringwald    print("Parsing %s" % url)
80779b256eSMatthias Ringwald    fout.write(page_info.format(page=url))
81779b256eSMatthias Ringwald
82779b256eSMatthias Ringwald    # get from web
8344dacee2SMatthias Ringwald    r = requests.get(url, headers=headers)
8444dacee2SMatthias Ringwald    content = r.text
85779b256eSMatthias Ringwald
86779b256eSMatthias Ringwald    # test: fetch from local file 'service-discovery.html'
8744dacee2SMatthias Ringwald    # f = codecs.open("company-identifiers.html", "r", "utf-8")
8844dacee2SMatthias Ringwald    # content = f.read();
89779b256eSMatthias Ringwald
90779b256eSMatthias Ringwald    tree = html.fromstring(content)
91ff7cea0bSMatthias Ringwald    rows = tree.xpath('//table/tbody/tr')
92779b256eSMatthias Ringwald    for row in rows:
93ff7cea0bSMatthias Ringwald        children = row.getchildren()
94ff7cea0bSMatthias Ringwald        id_hex  = children[1].text_content()
95ff7cea0bSMatthias Ringwald        company = create_name(children[2].text_content())
96779b256eSMatthias Ringwald        if company in tags:
97779b256eSMatthias Ringwald            company = company+"2"
98779b256eSMatthias Ringwald        else:
99779b256eSMatthias Ringwald            tags.append(company)
100779b256eSMatthias Ringwald        fout.write("#define %-80s %s\n" %  (company, id_hex))
101779b256eSMatthias Ringwald
102ff7cea0bSMatthias Ringwald    # map CSR onto QTIL
103ff7cea0bSMatthias Ringwald    fout.write("#define BLUETOOTH_COMPANY_ID_CAMBRIDGE_SILICON_RADIO BLUETOOTH_COMPANY_ID_QUALCOMM_TECHNOLOGIES_INTERNATIONAL_LTD\n")
104ff7cea0bSMatthias Ringwald
105779b256eSMatthias Ringwaldbtstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..')
106779b256eSMatthias Ringwaldgen_path = btstack_root + '/src/bluetooth_company_id.h'
107779b256eSMatthias Ringwald
108779b256eSMatthias Ringwaldprint(program_info)
109779b256eSMatthias Ringwald
110779b256eSMatthias Ringwaldwith open(gen_path, 'wt') as fout:
111779b256eSMatthias Ringwald    fout.write(header.format(datetime=str(datetime.datetime.now())))
112779b256eSMatthias Ringwald    scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers')
113779b256eSMatthias Ringwald    fout.write(trailer)
114779b256eSMatthias Ringwald
115779b256eSMatthias Ringwaldprint('Scraping successful!\n')
116