1// Copyright 2016 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.genomics.v1; 18 19import "google/api/annotations.proto"; 20import "google/protobuf/struct.proto"; 21 22option cc_enable_arenas = true; 23option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; 24option java_multiple_files = true; 25option java_outer_classname = "ReadGroupProto"; 26option java_package = "com.google.genomics.v1"; 27 28// A read group is all the data that's processed the same way by the sequencer. 29message ReadGroup { 30 message Experiment { 31 // A client-supplied library identifier; a library is a collection of DNA 32 // fragments which have been prepared for sequencing from a sample. This 33 // field is important for quality control as error or bias can be introduced 34 // during sample preparation. 35 string library_id = 1; 36 37 // The platform unit used as part of this experiment, for example 38 // flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the 39 // @RG PU field in the SAM spec. 40 string platform_unit = 2; 41 42 // The sequencing center used as part of this experiment. 43 string sequencing_center = 3; 44 45 // The instrument model used as part of this experiment. This maps to 46 // sequencing technology in the SAM spec. 47 string instrument_model = 4; 48 } 49 50 message Program { 51 // The command line used to run this program. 52 string command_line = 1; 53 54 // The user specified locally unique ID of the program. Used along with 55 // `prevProgramId` to define an ordering between programs. 56 string id = 2; 57 58 // The display name of the program. This is typically the colloquial name of 59 // the tool used, for example 'bwa' or 'picard'. 60 string name = 3; 61 62 // The ID of the program run before this one. 63 string prev_program_id = 4; 64 65 // The version of the program run. 66 string version = 5; 67 } 68 69 // The server-generated read group ID, unique for all read groups. 70 // Note: This is different than the @RG ID field in the SAM spec. For that 71 // value, see [name][google.genomics.v1.ReadGroup.name]. 72 string id = 1; 73 74 // The dataset to which this read group belongs. 75 string dataset_id = 2; 76 77 // The read group name. This corresponds to the @RG ID field in the SAM spec. 78 string name = 3; 79 80 // A free-form text description of this read group. 81 string description = 4; 82 83 // A client-supplied sample identifier for the reads in this read group. 84 string sample_id = 5; 85 86 // The experiment used to generate this read group. 87 Experiment experiment = 6; 88 89 // The predicted insert size of this read group. The insert size is the length 90 // the sequenced DNA fragment from end-to-end, not including the adapters. 91 int32 predicted_insert_size = 7; 92 93 // The programs used to generate this read group. Programs are always 94 // identical for all read groups within a read group set. For this reason, 95 // only the first read group in a returned set will have this field 96 // populated. 97 repeated Program programs = 10; 98 99 // The reference set the reads in this read group are aligned to. 100 string reference_set_id = 11; 101 102 // A map of additional read group information. This must be of the form 103 // map<string, string[]> (string key mapping to a list of string values). 104 map<string, google.protobuf.ListValue> info = 12; 105} 106