xref: /aosp_15_r20/external/rappor/bin/hash_candidates.py (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/usr/bin/python
2*2abb3134SXin Li#
3*2abb3134SXin Li# Copyright 2014 Google Inc. All rights reserved.
4*2abb3134SXin Li#
5*2abb3134SXin Li# Licensed under the Apache License, Version 2.0 (the "License");
6*2abb3134SXin Li# you may not use this file except in compliance with the License.
7*2abb3134SXin Li# You may obtain a copy of the License at
8*2abb3134SXin Li#
9*2abb3134SXin Li#     http://www.apache.org/licenses/LICENSE-2.0
10*2abb3134SXin Li#
11*2abb3134SXin Li# Unless required by applicable law or agreed to in writing, software
12*2abb3134SXin Li# distributed under the License is distributed on an "AS IS" BASIS,
13*2abb3134SXin Li# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*2abb3134SXin Li# See the License for the specific language governing permissions and
15*2abb3134SXin Li# limitations under the License.
16*2abb3134SXin Li
17*2abb3134SXin Li"""
18*2abb3134SXin LiGiven a list of candidates on stdin, produce a file of hashes ("map file").
19*2abb3134SXin Li"""
20*2abb3134SXin Li
21*2abb3134SXin Liimport csv
22*2abb3134SXin Liimport sys
23*2abb3134SXin Li
24*2abb3134SXin Liimport rappor
25*2abb3134SXin Li
26*2abb3134SXin Li
27*2abb3134SXin Lidef HashCandidates(params, stdin, stdout):
28*2abb3134SXin Li  num_bloombits = params.num_bloombits
29*2abb3134SXin Li  csv_out = csv.writer(stdout)
30*2abb3134SXin Li
31*2abb3134SXin Li  for line in stdin:
32*2abb3134SXin Li    word = line.strip()
33*2abb3134SXin Li    row = [word]
34*2abb3134SXin Li    for cohort in xrange(params.num_cohorts):
35*2abb3134SXin Li      bloom_bits = rappor.get_bloom_bits(word, cohort, params.num_hashes,
36*2abb3134SXin Li                                         num_bloombits)
37*2abb3134SXin Li      for bit_to_set in bloom_bits:
38*2abb3134SXin Li        # bits are indexed from 1.  Add a fixed offset for each cohort.
39*2abb3134SXin Li        # NOTE: This detail could be omitted from the map file format, and done
40*2abb3134SXin Li        # in R.
41*2abb3134SXin Li        row.append(cohort * num_bloombits + (bit_to_set + 1))
42*2abb3134SXin Li    csv_out.writerow(row)
43*2abb3134SXin Li
44*2abb3134SXin Li
45*2abb3134SXin Lidef main(argv):
46*2abb3134SXin Li  try:
47*2abb3134SXin Li    filename = argv[1]
48*2abb3134SXin Li  except IndexError:
49*2abb3134SXin Li    raise RuntimeError('Usage: hash_candidates.py <params file>')
50*2abb3134SXin Li  with open(filename) as f:
51*2abb3134SXin Li    try:
52*2abb3134SXin Li      params = rappor.Params.from_csv(f)
53*2abb3134SXin Li    except rappor.Error as e:
54*2abb3134SXin Li      raise RuntimeError(e)
55*2abb3134SXin Li
56*2abb3134SXin Li  HashCandidates(params, sys.stdin, sys.stdout)
57*2abb3134SXin Li
58*2abb3134SXin Li
59*2abb3134SXin Liif __name__ == '__main__':
60*2abb3134SXin Li  try:
61*2abb3134SXin Li    main(sys.argv)
62*2abb3134SXin Li  except RuntimeError, e:
63*2abb3134SXin Li    print >>sys.stderr, e.args[0]
64*2abb3134SXin Li    sys.exit(1)
65