xref: /btstack/tool/bluetooth_company_id.py (revision 779b256e497e6474bd4442b42f9661d8d4577675)
1*779b256eSMatthias Ringwald#!/usr/bin/env python
2*779b256eSMatthias Ringwald#
3*779b256eSMatthias Ringwald# Scrape GATT UUIDs from Bluetooth SIG page
4*779b256eSMatthias Ringwald# https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers
5*779b256eSMatthias Ringwald#
6*779b256eSMatthias Ringwald# Copyright 2017 BlueKitchen GmbH
7*779b256eSMatthias Ringwald#
8*779b256eSMatthias Ringwald
9*779b256eSMatthias Ringwaldfrom lxml import html
10*779b256eSMatthias Ringwaldimport datetime
11*779b256eSMatthias Ringwaldimport requests
12*779b256eSMatthias Ringwaldimport sys
13*779b256eSMatthias Ringwaldimport codecs
14*779b256eSMatthias Ringwaldimport os
15*779b256eSMatthias Ringwaldimport re
16*779b256eSMatthias Ringwald
17*779b256eSMatthias Ringwaldprogram_info = '''
18*779b256eSMatthias RingwaldBTstack Company ID Scraper for BTstack
19*779b256eSMatthias RingwaldCopyright 2017, BlueKitchen GmbH
20*779b256eSMatthias Ringwald'''
21*779b256eSMatthias Ringwald
22*779b256eSMatthias Ringwaldheader = '''
23*779b256eSMatthias Ringwald/**
24*779b256eSMatthias Ringwald * bluetooth_company_id.h generated from Bluetooth SIG website for BTstack
25*779b256eSMatthias Ringwald */
26*779b256eSMatthias Ringwald
27*779b256eSMatthias Ringwald#ifndef __BLUETOOTH_COMPANY_ID_H
28*779b256eSMatthias Ringwald#define __BLUETOOTH_COMPANY_ID_H
29*779b256eSMatthias Ringwald'''
30*779b256eSMatthias Ringwald
31*779b256eSMatthias Ringwaldpage_info = '''
32*779b256eSMatthias Ringwald/**
33*779b256eSMatthias Ringwald * Assigned numbers from {page}
34*779b256eSMatthias Ringwald */
35*779b256eSMatthias Ringwald'''
36*779b256eSMatthias Ringwald
37*779b256eSMatthias Ringwaldtrailer = '''
38*779b256eSMatthias Ringwald#endif
39*779b256eSMatthias Ringwald'''
40*779b256eSMatthias Ringwald
41*779b256eSMatthias Ringwaldtags = []
42*779b256eSMatthias Ringwald
43*779b256eSMatthias Ringwalddef create_name(company):
44*779b256eSMatthias Ringwald    # remove parts in braces
45*779b256eSMatthias Ringwald    p = re.compile('\(.*\)')
46*779b256eSMatthias Ringwald    tag = p.sub('',company).rstrip().upper()
47*779b256eSMatthias Ringwald    tag = tag.replace('&',' AND ')
48*779b256eSMatthias Ringwald    tag = tag.replace(''','')
49*779b256eSMatthias Ringwald    tag = tag.replace('"',' ')
50*779b256eSMatthias Ringwald    tag = tag.replace('+',' AND ')
51*779b256eSMatthias Ringwald    tag = tag.replace(' - ', ' ')
52*779b256eSMatthias Ringwald    tag = tag.replace('/', ' ')
53*779b256eSMatthias Ringwald    tag = tag.replace(';',' ')
54*779b256eSMatthias Ringwald    tag = tag.replace(',','')
55*779b256eSMatthias Ringwald    tag = tag.replace('.', '')
56*779b256eSMatthias Ringwald    tag = tag.replace('-','_')
57*779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
58*779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
59*779b256eSMatthias Ringwald    tag = tag.replace('  ',' ')
60*779b256eSMatthias Ringwald    tag = tag.replace(' ','_')
61*779b256eSMatthias Ringwald    return "BLUETOOTH_COMPANY_ID_" + tag
62*779b256eSMatthias Ringwald
63*779b256eSMatthias Ringwalddef scrape_page(fout, url):
64*779b256eSMatthias Ringwald    print("Parsing %s" % url)
65*779b256eSMatthias Ringwald    fout.write(page_info.format(page=url))
66*779b256eSMatthias Ringwald
67*779b256eSMatthias Ringwald    # get from web
68*779b256eSMatthias Ringwald    r = requests.get(url)
69*779b256eSMatthias Ringwald    content = r.text
70*779b256eSMatthias Ringwald
71*779b256eSMatthias Ringwald    # test: fetch from local file 'service-discovery.html'
72*779b256eSMatthias Ringwald    # f = codecs.open("company-identifiers.html", "r", "utf-8")
73*779b256eSMatthias Ringwald    # content = f.read();
74*779b256eSMatthias Ringwald
75*779b256eSMatthias Ringwald    tree = html.fromstring(content)
76*779b256eSMatthias Ringwald    # get all java script
77*779b256eSMatthias Ringwald    rows = tree.xpath('//script')
78*779b256eSMatthias Ringwald    for row in rows:
79*779b256eSMatthias Ringwald        script = row.text_content()
80*779b256eSMatthias Ringwald        if not 'DataTable' in script:
81*779b256eSMatthias Ringwald            continue
82*779b256eSMatthias Ringwald        start_tag = 'data:  ['
83*779b256eSMatthias Ringwald        end_tag   = '["65535","0xFFFF",'
84*779b256eSMatthias Ringwald        start = script.find(start_tag)
85*779b256eSMatthias Ringwald        end   = script.find(end_tag)
86*779b256eSMatthias Ringwald        company_list = script[start + len(start_tag):end]
87*779b256eSMatthias Ringwald        for entry in company_list.split('],'):
88*779b256eSMatthias Ringwald            if len(entry) < 5:
89*779b256eSMatthias Ringwald                break
90*779b256eSMatthias Ringwald            entry = entry[1:]
91*779b256eSMatthias Ringwald            fields = entry.split('","')
92*779b256eSMatthias Ringwald            id_hex = fields[1]
93*779b256eSMatthias Ringwald            company = create_name(fields[2][:-1])
94*779b256eSMatthias Ringwald            if company in tags:
95*779b256eSMatthias Ringwald                company = company + "2"
96*779b256eSMatthias Ringwald            else:
97*779b256eSMatthias Ringwald                tags.append(company)
98*779b256eSMatthias Ringwald            if len(company) < 2:
99*779b256eSMatthias Ringwald                continue
100*779b256eSMatthias Ringwald            fout.write("#define %-80s %s\n" %  (company, id_hex))
101*779b256eSMatthias Ringwald
102*779b256eSMatthias Ringwaldbtstack_root = os.path.abspath(os.path.dirname(sys.argv[0]) + '/..')
103*779b256eSMatthias Ringwaldgen_path = btstack_root + '/src/bluetooth_company_id.h'
104*779b256eSMatthias Ringwald
105*779b256eSMatthias Ringwaldprint(program_info)
106*779b256eSMatthias Ringwald
107*779b256eSMatthias Ringwaldwith open(gen_path, 'wt') as fout:
108*779b256eSMatthias Ringwald    fout.write(header.format(datetime=str(datetime.datetime.now())))
109*779b256eSMatthias Ringwald    scrape_page(fout, 'https://www.bluetooth.com/specifications/assigned-numbers/company-identifiers')
110*779b256eSMatthias Ringwald    fout.write(trailer)
111*779b256eSMatthias Ringwald
112*779b256eSMatthias Ringwaldprint('Scraping successful!\n')