1#!/usr/bin/python 2# 3# Copyright 2014 Google Inc. All rights reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18Given a list of candidates on stdin, produce a file of hashes ("map file"). 19""" 20 21import csv 22import sys 23 24import rappor 25 26 27def HashCandidates(params, stdin, stdout): 28 num_bloombits = params.num_bloombits 29 csv_out = csv.writer(stdout) 30 31 for line in stdin: 32 word = line.strip() 33 row = [word] 34 for cohort in xrange(params.num_cohorts): 35 bloom_bits = rappor.get_bloom_bits(word, cohort, params.num_hashes, 36 num_bloombits) 37 for bit_to_set in bloom_bits: 38 # bits are indexed from 1. Add a fixed offset for each cohort. 39 # NOTE: This detail could be omitted from the map file format, and done 40 # in R. 41 row.append(cohort * num_bloombits + (bit_to_set + 1)) 42 csv_out.writerow(row) 43 44 45def main(argv): 46 try: 47 filename = argv[1] 48 except IndexError: 49 raise RuntimeError('Usage: hash_candidates.py <params file>') 50 with open(filename) as f: 51 try: 52 params = rappor.Params.from_csv(f) 53 except rappor.Error as e: 54 raise RuntimeError(e) 55 56 HashCandidates(params, sys.stdin, sys.stdout) 57 58 59if __name__ == '__main__': 60 try: 61 main(sys.argv) 62 except RuntimeError, e: 63 print >>sys.stderr, e.args[0] 64 sys.exit(1) 65