1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package serializer normalizes the license text and calculates the hash 16// values for all substrings in the license. It then outputs the normalized 17// text and hashes to disk in a compressed archive. 18package serializer 19 20import ( 21 "archive/tar" 22 "bytes" 23 "compress/gzip" 24 "io" 25 "log" 26 "path/filepath" 27 "strings" 28 29 "github.com/google/licenseclassifier" 30 "github.com/google/licenseclassifier/stringclassifier/searchset" 31) 32 33// ArchiveLicenses takes all of the known license texts, normalizes them, then 34// calculates the hash values of all substrings. The resulting normalized text 35// and hashed substring values are then serialized into an archive file. 36func ArchiveLicenses(licenses []string, w io.Writer) error { 37 gw := gzip.NewWriter(w) 38 defer gw.Close() 39 40 tw := tar.NewWriter(gw) 41 for _, license := range licenses { 42 // All license files have a ".txt" extension. 43 ext := filepath.Ext(license) 44 if ext != ".txt" { 45 continue 46 } 47 48 contents, err := licenseclassifier.ReadLicenseFile(license) 49 if err != nil { 50 return err 51 } 52 53 str := licenseclassifier.TrimExtraneousTrailingText(string(contents)) 54 for _, n := range licenseclassifier.Normalizers { 55 str = n(str) 56 } 57 58 baseName := strings.TrimSuffix(filepath.Base(license), ext) 59 60 // Serialize the normalized license text. 61 log.Printf("Serializing %q", baseName) 62 hdr := &tar.Header{ 63 Name: filepath.Base(license), 64 Mode: 0644, 65 Size: int64(len(str)), 66 } 67 68 if err := tw.WriteHeader(hdr); err != nil { 69 return err 70 } 71 if _, err := tw.Write([]byte(str)); err != nil { 72 return err 73 } 74 75 // Calculate the substrings' checksums 76 set := searchset.New(str, searchset.DefaultGranularity) 77 78 var s bytes.Buffer 79 if err := set.Serialize(&s); err != nil { 80 return err 81 } 82 83 // Serialize the checksums. 84 hdr = &tar.Header{ 85 Name: baseName + ".hash", 86 Mode: 0644, 87 Size: int64(s.Len()), 88 } 89 90 if err := tw.WriteHeader(hdr); err != nil { 91 return err 92 } 93 if _, err := tw.Write(s.Bytes()); err != nil { 94 return err 95 } 96 } 97 98 return tw.Close() 99} 100