xref: /btstack/tool/bluetooth_company_id.py (revision ff7cea0b7e01509996067b2ffc2282a8bd733440)
1779b256eSMatthias Ringwald#!/usr/bin/env python
2779b256eSMatthias Ringwald#
3779b256eSMatthias Ringwald# Scrape GATT UUIDs from Bluetooth SIG page
4779b256eSMatthias Ringwald# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers
5779b256eSMatthias Ringwald#
6779b256eSMatthias Ringwald# Copyright 2017 BlueKitchen GmbH
7779b256eSMatthias Ringwald#
8779b256eSMatthias Ringwald
9779b256eSMatthias Ringwaldfrom lxml import html
10779b256eSMatthias Ringwaldimport datetime
11779b256eSMatthias Ringwaldimport requests
12779b256eSMatthias Ringwaldimport sys
13779b256eSMatthias Ringwaldimport codecs
14779b256eSMatthias Ringwaldimport os
15779b256eSMatthias Ringwaldimport re
16779b256eSMatthias Ringwald
17*ff7cea0bSMatthias Ringwaldheaders = {'user-agent': 'curl/7.63.0'}
18*ff7cea0bSMatthias Ringwald
19779b256eSMatthias Ringwaldprogram_info = '''
20779b256eSMatthias RingwaldBTstack Company ID Scraper for BTstack
21779b256eSMatthias RingwaldCopyright 2017, BlueKitchen GmbH
22779b256eSMatthias Ringwald'''
23779b256eSMatthias Ringwald
24779b256eSMatthias Ringwaldheader = '''
25779b256eSMatthias Ringwald/**
26b436f177SMatthias Ringwald * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack by tool/bluetooth_company_id.py
27b436f177SMatthias Ringwald * {datetime}
28779b256eSMatthias Ringwald */
29779b256eSMatthias Ringwald
30779b256eSMatthias Ringwald#ifndef __BLUETOOTH_COMPANY_ID_H
31779b256eSMatthias Ringwald#define __BLUETOOTH_COMPANY_ID_H
32779b256eSMatthias Ringwald'''
33779b256eSMatthias Ringwald
34779b256eSMatthias Ringwaldpage_info = '''
35779b256eSMatthias Ringwald/**
36779b256eSMatthias Ringwald * Assigned numbers from {page}
37779b256eSMatthias Ringwald */
38779b256eSMatthias Ringwald'''
39779b256eSMatthias Ringwald
40779b256eSMatthias Ringwaldtrailer = '''
41779b256eSMatthias Ringwald#endif
42779b256eSMatthias Ringwald'''
43779b256eSMatthias Ringwald
44779b256eSMatthias Ringwaldtags = []
45779b256eSMatthias Ringwald
46*ff7cea0bSMatthias Ringwalddef strip_non_ascii(string):
47*ff7cea0bSMatthias Ringwald    stripped = (c for c in string if 0 < ord(c) < 127)
48*ff7cea0bSMatthias Ringwald    return ''.join(stripped)
49*ff7cea0bSMatthias Ringwald
50779b256eSMatthias Ringwalddef create_name(company):
51*ff7cea0bSMatthias Ringwald    # limit to ascii
52*ff7cea0bSMatthias Ringwald    company = strip_non_ascii(company)
53779b256eSMatthias Ringwald    # remove parts in braces
54779b256eSMatthias Ringwald    p = re.compile('\(.*\)')
55779b256eSMatthias Ringwald    tag = p.sub('',company).rstrip().upper()
56779b256eSMatthias Ringwald    tag = tag.replace('&AMP;',' AND ')
57779b256eSMatthias Ringwald    tag = tag.replace('&#39;','')
58779b256eSMatthias Ringwald    tag = tag.replace('&QUOT;',' ')
59779b256eSMatthias Ringwald    tag = tag.replace('+',' AND ')
60779b256eSMatthias Ringwald    tag = tag.replace(' - ', ' ')
61779b256eSMatthias Ringwald    tag = tag.replace('/', ' ')
62779b256eSMatthias Ringwald    tag = tag.replace(';',' ')
63779b256eSMatthias Ringwald    tag = tag.replace(',','')
64779b256eSMatthias Ringwald    tag = tag.replace('.', '')
65779b256eSMatthias Ringwald    tag = tag.replace('-','_')
66779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
67779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
68779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
69779b256eSMatthias Ringwald    tag = tag.replace(' ','_')
70*ff7cea0bSMatthias Ringwald    tag = tag.replace('&','AND')
71*ff7cea0bSMatthias Ringwald    tag = tag.replace("'","_")
72*ff7cea0bSMatthias Ringwald    tag = tag.replace('"','_')
73*ff7cea0bSMatthias Ringwald    tag = tag.replace('!','_')
74779b256eSMatthias Ringwald    return "BLUETOOTH_COMPANY_ID_" + tag
75779b256eSMatthias Ringwald
76779b256eSMatthias Ringwalddef scrape_page(fout, url):
77*ff7cea0bSMatthias Ringwald    global headers
78*ff7cea0bSMatthias Ringwald
79779b256eSMatthias Ringwald    print("Parsing %s" % url)
80779b256eSMatthias Ringwald    fout.write(page_info.format(page=url))
81779b256eSMatthias Ringwald
82779b256eSMatthias Ringwald    # get from web
83*ff7cea0bSMatthias Ringwald    # r = requests.get(url, headers=headers)
84*ff7cea0bSMatthias Ringwald    # content = r.text
85779b256eSMatthias Ringwald
86779b256eSMatthias Ringwald    # test: fetch from local file 'service-discovery.html'
87*ff7cea0bSMatthias Ringwald    f = codecs.open("company-identifiers.html", "r", "utf-8")
88*ff7cea0bSMatthias Ringwald    content = f.read();
89779b256eSMatthias Ringwald
90779b256eSMatthias Ringwald    tree = html.fromstring(content)
91*ff7cea0bSMatthias Ringwald    rows = tree.xpath('//table/tbody/tr')
92779b256eSMatthias Ringwald    for row in rows:
93*ff7cea0bSMatthias Ringwald        children = row.getchildren()
94*ff7cea0bSMatthias Ringwald        id_hex  = children[1].text_content()
95*ff7cea0bSMatthias Ringwald        company = create_name(children[2].text_content())
96779b256eSMatthias Ringwald        if company in tags:
97779b256eSMatthias Ringwald            company = company+"2"
98779b256eSMatthias Ringwald        else:
99779b256eSMatthias Ringwald            tags.append(company)
100779b256eSMatthias Ringwald        fout.write("#define %-80s %s\n" %  (company, id_hex))
101779b256eSMatthias Ringwald
102*ff7cea0bSMatthias Ringwald    # map CSR onto QTIL
103*ff7cea0bSMatthias Ringwald    fout.write("#define BLUETOOTH_COMPANY_ID_CAMBRIDGE_SILICON_RADIO BLUETOOTH_COMPANY_ID_QUALCOMM_TECHNOLOGIES_INTERNATIONAL_LTD\n")
104*ff7cea0bSMatthias Ringwald
105779b256eSMatthias Ringwaldbtstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..')
106779b256eSMatthias Ringwaldgen_path = btstack_root + '/src/bluetooth_company_id.h'
107779b256eSMatthias Ringwald
108779b256eSMatthias Ringwaldprint(program_info)
109779b256eSMatthias Ringwald
110779b256eSMatthias Ringwaldwith open(gen_path, 'wt') as fout:
111779b256eSMatthias Ringwald    fout.write(header.format(datetime=str(datetime.datetime.now())))
112779b256eSMatthias Ringwald    scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers')
113779b256eSMatthias Ringwald    fout.write(trailer)
114779b256eSMatthias Ringwald
115779b256eSMatthias Ringwaldprint('Scraping successful!\n')
116