1*9e965d6fSRomain Jobredeaux// Copyright 2018 The Bazel Authors. All rights reserved. 2*9e965d6fSRomain Jobredeaux// 3*9e965d6fSRomain Jobredeaux// Licensed under the Apache License, Version 2.0 (the "License"); 4*9e965d6fSRomain Jobredeaux// you may not use this file except in compliance with the License. 5*9e965d6fSRomain Jobredeaux// You may obtain a copy of the License at 6*9e965d6fSRomain Jobredeaux// 7*9e965d6fSRomain Jobredeaux// http://www.apache.org/licenses/LICENSE-2.0 8*9e965d6fSRomain Jobredeaux// 9*9e965d6fSRomain Jobredeaux// Unless required by applicable law or agreed to in writing, software 10*9e965d6fSRomain Jobredeaux// distributed under the License is distributed on an "AS IS" BASIS, 11*9e965d6fSRomain Jobredeaux// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*9e965d6fSRomain Jobredeaux// See the License for the specific language governing permissions and 13*9e965d6fSRomain Jobredeaux// limitations under the License. 14*9e965d6fSRomain Jobredeaux 15*9e965d6fSRomain Jobredeaux// Package shard provides functions to help sharding your data. 16*9e965d6fSRomain Jobredeauxpackage shard 17*9e965d6fSRomain Jobredeaux 18*9e965d6fSRomain Jobredeauximport ( 19*9e965d6fSRomain Jobredeaux "archive/zip" 20*9e965d6fSRomain Jobredeaux "errors" 21*9e965d6fSRomain Jobredeaux "fmt" 22*9e965d6fSRomain Jobredeaux "hash/fnv" 23*9e965d6fSRomain Jobredeaux "io" 24*9e965d6fSRomain Jobredeaux "strings" 25*9e965d6fSRomain Jobredeaux) 26*9e965d6fSRomain Jobredeaux 27*9e965d6fSRomain Jobredeaux// Func converts a name and a number of shards into a particular shard index. 28*9e965d6fSRomain Jobredeauxtype Func func(name string, shardCount int) int 29*9e965d6fSRomain Jobredeaux 30*9e965d6fSRomain Jobredeaux// FNV uses the FNV hash algo on the provided string and mods its result by shardCount. 31*9e965d6fSRomain Jobredeauxfunc FNV(name string, shardCount int) int { 32*9e965d6fSRomain Jobredeaux h := fnv.New32() 33*9e965d6fSRomain Jobredeaux h.Write([]byte(name)) 34*9e965d6fSRomain Jobredeaux return int(h.Sum32()) % shardCount 35*9e965d6fSRomain Jobredeaux} 36*9e965d6fSRomain Jobredeaux 37*9e965d6fSRomain Jobredeaux// MakeSepFunc creates a shard function that takes a substring from 0 to the last occurrence of 38*9e965d6fSRomain Jobredeaux// separator from the name to be sharded, and passes that onto the provided shard function. 39*9e965d6fSRomain Jobredeauxfunc MakeSepFunc(sep string, s Func) Func { 40*9e965d6fSRomain Jobredeaux return func(name string, shardCount int) int { 41*9e965d6fSRomain Jobredeaux idx := strings.LastIndex(name, sep) 42*9e965d6fSRomain Jobredeaux if idx == -1 { 43*9e965d6fSRomain Jobredeaux return s(name, shardCount) 44*9e965d6fSRomain Jobredeaux } 45*9e965d6fSRomain Jobredeaux return s(name[:idx], shardCount) 46*9e965d6fSRomain Jobredeaux } 47*9e965d6fSRomain Jobredeaux} 48*9e965d6fSRomain Jobredeaux 49*9e965d6fSRomain Jobredeaux// ZipShard takes a given zip reader, and shards its content across the provided io.Writers 50*9e965d6fSRomain Jobredeaux// utilizing the provided SharderFunc. 51*9e965d6fSRomain Jobredeauxfunc ZipShard(r *zip.Reader, zws []*zip.Writer, fn Func) error { 52*9e965d6fSRomain Jobredeaux sc := len(zws) 53*9e965d6fSRomain Jobredeaux if sc == 0 { 54*9e965d6fSRomain Jobredeaux return errors.New("no output writers") 55*9e965d6fSRomain Jobredeaux } 56*9e965d6fSRomain Jobredeaux 57*9e965d6fSRomain Jobredeaux for _, f := range r.File { 58*9e965d6fSRomain Jobredeaux if !f.Mode().IsRegular() { 59*9e965d6fSRomain Jobredeaux continue 60*9e965d6fSRomain Jobredeaux } 61*9e965d6fSRomain Jobredeaux si := fn(f.Name, sc) 62*9e965d6fSRomain Jobredeaux if si < 0 || si > sc { 63*9e965d6fSRomain Jobredeaux return fmt.Errorf("s.Shard(%s, %d) yields invalid shard index: %d", f.Name, sc, si) 64*9e965d6fSRomain Jobredeaux } 65*9e965d6fSRomain Jobredeaux zw := zws[si] 66*9e965d6fSRomain Jobredeaux var rc io.ReadCloser 67*9e965d6fSRomain Jobredeaux rc, err := f.Open() 68*9e965d6fSRomain Jobredeaux if err != nil { 69*9e965d6fSRomain Jobredeaux return fmt.Errorf("%s: could not open: %v", f.Name, err) 70*9e965d6fSRomain Jobredeaux } 71*9e965d6fSRomain Jobredeaux var zo io.Writer 72*9e965d6fSRomain Jobredeaux zo, err = zw.CreateHeader(&zip.FileHeader{ 73*9e965d6fSRomain Jobredeaux Name: f.Name, 74*9e965d6fSRomain Jobredeaux Method: zip.Store, 75*9e965d6fSRomain Jobredeaux }) 76*9e965d6fSRomain Jobredeaux if err != nil { 77*9e965d6fSRomain Jobredeaux return fmt.Errorf("%s: could not create output entry: %v", f.Name, err) 78*9e965d6fSRomain Jobredeaux } 79*9e965d6fSRomain Jobredeaux if err := copyAndClose(zo, rc); err != nil { 80*9e965d6fSRomain Jobredeaux return fmt.Errorf("%s: copy to output failed: %v", f.Name, err) 81*9e965d6fSRomain Jobredeaux } 82*9e965d6fSRomain Jobredeaux } 83*9e965d6fSRomain Jobredeaux return nil 84*9e965d6fSRomain Jobredeaux} 85*9e965d6fSRomain Jobredeaux 86*9e965d6fSRomain Jobredeauxfunc copyAndClose(w io.Writer, rc io.ReadCloser) error { 87*9e965d6fSRomain Jobredeaux defer rc.Close() 88*9e965d6fSRomain Jobredeaux _, err := io.Copy(w, rc) 89*9e965d6fSRomain Jobredeaux return err 90*9e965d6fSRomain Jobredeaux} 91