1*d5c09012SAndroid Build Coastguard Worker// Copyright 2016 Google Inc. 2*d5c09012SAndroid Build Coastguard Worker// 3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License"); 4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License. 5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at 6*d5c09012SAndroid Build Coastguard Worker// 7*d5c09012SAndroid Build Coastguard Worker// http://www.apache.org/licenses/LICENSE-2.0 8*d5c09012SAndroid Build Coastguard Worker// 9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software 10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS, 11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and 13*d5c09012SAndroid Build Coastguard Worker// limitations under the License. 14*d5c09012SAndroid Build Coastguard Worker 15*d5c09012SAndroid Build Coastguard Workersyntax = "proto3"; 16*d5c09012SAndroid Build Coastguard Worker 17*d5c09012SAndroid Build Coastguard Workerpackage google.genomics.v1; 18*d5c09012SAndroid Build Coastguard Worker 19*d5c09012SAndroid Build Coastguard Workerimport "google/api/annotations.proto"; 20*d5c09012SAndroid Build Coastguard Workerimport "google/genomics/v1/cigar.proto"; 21*d5c09012SAndroid Build Coastguard Workerimport "google/genomics/v1/position.proto"; 22*d5c09012SAndroid Build Coastguard Workerimport "google/protobuf/struct.proto"; 23*d5c09012SAndroid Build Coastguard Worker 24*d5c09012SAndroid Build Coastguard Workeroption cc_enable_arenas = true; 25*d5c09012SAndroid Build Coastguard Workeroption go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; 26*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true; 27*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "ReadAlignmentProto"; 28*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.genomics.v1"; 29*d5c09012SAndroid Build Coastguard Worker 30*d5c09012SAndroid Build Coastguard Worker// A linear alignment can be represented by one CIGAR string. Describes the 31*d5c09012SAndroid Build Coastguard Worker// mapped position and local alignment of the read to the reference. 32*d5c09012SAndroid Build Coastguard Workermessage LinearAlignment { 33*d5c09012SAndroid Build Coastguard Worker // The position of this alignment. 34*d5c09012SAndroid Build Coastguard Worker Position position = 1; 35*d5c09012SAndroid Build Coastguard Worker 36*d5c09012SAndroid Build Coastguard Worker // The mapping quality of this alignment. Represents how likely 37*d5c09012SAndroid Build Coastguard Worker // the read maps to this position as opposed to other locations. 38*d5c09012SAndroid Build Coastguard Worker // 39*d5c09012SAndroid Build Coastguard Worker // Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to 40*d5c09012SAndroid Build Coastguard Worker // the nearest integer. 41*d5c09012SAndroid Build Coastguard Worker int32 mapping_quality = 2; 42*d5c09012SAndroid Build Coastguard Worker 43*d5c09012SAndroid Build Coastguard Worker // Represents the local alignment of this sequence (alignment matches, indels, 44*d5c09012SAndroid Build Coastguard Worker // etc) against the reference. 45*d5c09012SAndroid Build Coastguard Worker repeated CigarUnit cigar = 3; 46*d5c09012SAndroid Build Coastguard Worker} 47*d5c09012SAndroid Build Coastguard Worker 48*d5c09012SAndroid Build Coastguard Worker// A read alignment describes a linear alignment of a string of DNA to a 49*d5c09012SAndroid Build Coastguard Worker// [reference sequence][google.genomics.v1.Reference], in addition to metadata 50*d5c09012SAndroid Build Coastguard Worker// about the fragment (the molecule of DNA sequenced) and the read (the bases 51*d5c09012SAndroid Build Coastguard Worker// which were read by the sequencer). A read is equivalent to a line in a SAM 52*d5c09012SAndroid Build Coastguard Worker// file. A read belongs to exactly one read group and exactly one 53*d5c09012SAndroid Build Coastguard Worker// [read group set][google.genomics.v1.ReadGroupSet]. 54*d5c09012SAndroid Build Coastguard Worker// 55*d5c09012SAndroid Build Coastguard Worker// For more genomics resource definitions, see [Fundamentals of Google 56*d5c09012SAndroid Build Coastguard Worker// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 57*d5c09012SAndroid Build Coastguard Worker// 58*d5c09012SAndroid Build Coastguard Worker// ### Reverse-stranded reads 59*d5c09012SAndroid Build Coastguard Worker// 60*d5c09012SAndroid Build Coastguard Worker// Mapped reads (reads having a non-null `alignment`) can be aligned to either 61*d5c09012SAndroid Build Coastguard Worker// the forward or the reverse strand of their associated reference. Strandedness 62*d5c09012SAndroid Build Coastguard Worker// of a mapped read is encoded by `alignment.position.reverseStrand`. 63*d5c09012SAndroid Build Coastguard Worker// 64*d5c09012SAndroid Build Coastguard Worker// If we consider the reference to be a forward-stranded coordinate space of 65*d5c09012SAndroid Build Coastguard Worker// `[0, reference.length)` with `0` as the left-most position and 66*d5c09012SAndroid Build Coastguard Worker// `reference.length` as the right-most position, reads are always aligned left 67*d5c09012SAndroid Build Coastguard Worker// to right. That is, `alignment.position.position` always refers to the 68*d5c09012SAndroid Build Coastguard Worker// left-most reference coordinate and `alignment.cigar` describes the alignment 69*d5c09012SAndroid Build Coastguard Worker// of this read to the reference from left to right. All per-base fields such as 70*d5c09012SAndroid Build Coastguard Worker// `alignedSequence` and `alignedQuality` share this same left-to-right 71*d5c09012SAndroid Build Coastguard Worker// orientation; this is true of reads which are aligned to either strand. For 72*d5c09012SAndroid Build Coastguard Worker// reverse-stranded reads, this means that `alignedSequence` is the reverse 73*d5c09012SAndroid Build Coastguard Worker// complement of the bases that were originally reported by the sequencing 74*d5c09012SAndroid Build Coastguard Worker// machine. 75*d5c09012SAndroid Build Coastguard Worker// 76*d5c09012SAndroid Build Coastguard Worker// ### Generating a reference-aligned sequence string 77*d5c09012SAndroid Build Coastguard Worker// 78*d5c09012SAndroid Build Coastguard Worker// When interacting with mapped reads, it's often useful to produce a string 79*d5c09012SAndroid Build Coastguard Worker// representing the local alignment of the read to reference. The following 80*d5c09012SAndroid Build Coastguard Worker// pseudocode demonstrates one way of doing this: 81*d5c09012SAndroid Build Coastguard Worker// 82*d5c09012SAndroid Build Coastguard Worker// out = "" 83*d5c09012SAndroid Build Coastguard Worker// offset = 0 84*d5c09012SAndroid Build Coastguard Worker// for c in read.alignment.cigar { 85*d5c09012SAndroid Build Coastguard Worker// switch c.operation { 86*d5c09012SAndroid Build Coastguard Worker// case "ALIGNMENT_MATCH", "SEQUENCE_MATCH", "SEQUENCE_MISMATCH": 87*d5c09012SAndroid Build Coastguard Worker// out += read.alignedSequence[offset:offset+c.operationLength] 88*d5c09012SAndroid Build Coastguard Worker// offset += c.operationLength 89*d5c09012SAndroid Build Coastguard Worker// break 90*d5c09012SAndroid Build Coastguard Worker// case "CLIP_SOFT", "INSERT": 91*d5c09012SAndroid Build Coastguard Worker// offset += c.operationLength 92*d5c09012SAndroid Build Coastguard Worker// break 93*d5c09012SAndroid Build Coastguard Worker// case "PAD": 94*d5c09012SAndroid Build Coastguard Worker// out += repeat("*", c.operationLength) 95*d5c09012SAndroid Build Coastguard Worker// break 96*d5c09012SAndroid Build Coastguard Worker// case "DELETE": 97*d5c09012SAndroid Build Coastguard Worker// out += repeat("-", c.operationLength) 98*d5c09012SAndroid Build Coastguard Worker// break 99*d5c09012SAndroid Build Coastguard Worker// case "SKIP": 100*d5c09012SAndroid Build Coastguard Worker// out += repeat(" ", c.operationLength) 101*d5c09012SAndroid Build Coastguard Worker// break 102*d5c09012SAndroid Build Coastguard Worker// case "CLIP_HARD": 103*d5c09012SAndroid Build Coastguard Worker// break 104*d5c09012SAndroid Build Coastguard Worker// } 105*d5c09012SAndroid Build Coastguard Worker// } 106*d5c09012SAndroid Build Coastguard Worker// return out 107*d5c09012SAndroid Build Coastguard Worker// 108*d5c09012SAndroid Build Coastguard Worker// ### Converting to SAM's CIGAR string 109*d5c09012SAndroid Build Coastguard Worker// 110*d5c09012SAndroid Build Coastguard Worker// The following pseudocode generates a SAM CIGAR string from the 111*d5c09012SAndroid Build Coastguard Worker// `cigar` field. Note that this is a lossy conversion 112*d5c09012SAndroid Build Coastguard Worker// (`cigar.referenceSequence` is lost). 113*d5c09012SAndroid Build Coastguard Worker// 114*d5c09012SAndroid Build Coastguard Worker// cigarMap = { 115*d5c09012SAndroid Build Coastguard Worker// "ALIGNMENT_MATCH": "M", 116*d5c09012SAndroid Build Coastguard Worker// "INSERT": "I", 117*d5c09012SAndroid Build Coastguard Worker// "DELETE": "D", 118*d5c09012SAndroid Build Coastguard Worker// "SKIP": "N", 119*d5c09012SAndroid Build Coastguard Worker// "CLIP_SOFT": "S", 120*d5c09012SAndroid Build Coastguard Worker// "CLIP_HARD": "H", 121*d5c09012SAndroid Build Coastguard Worker// "PAD": "P", 122*d5c09012SAndroid Build Coastguard Worker// "SEQUENCE_MATCH": "=", 123*d5c09012SAndroid Build Coastguard Worker// "SEQUENCE_MISMATCH": "X", 124*d5c09012SAndroid Build Coastguard Worker// } 125*d5c09012SAndroid Build Coastguard Worker// cigarStr = "" 126*d5c09012SAndroid Build Coastguard Worker// for c in read.alignment.cigar { 127*d5c09012SAndroid Build Coastguard Worker// cigarStr += c.operationLength + cigarMap[c.operation] 128*d5c09012SAndroid Build Coastguard Worker// } 129*d5c09012SAndroid Build Coastguard Worker// return cigarStr 130*d5c09012SAndroid Build Coastguard Workermessage Read { 131*d5c09012SAndroid Build Coastguard Worker // The server-generated read ID, unique across all reads. This is different 132*d5c09012SAndroid Build Coastguard Worker // from the `fragmentName`. 133*d5c09012SAndroid Build Coastguard Worker string id = 1; 134*d5c09012SAndroid Build Coastguard Worker 135*d5c09012SAndroid Build Coastguard Worker // The ID of the read group this read belongs to. A read belongs to exactly 136*d5c09012SAndroid Build Coastguard Worker // one read group. This is a server-generated ID which is distinct from SAM's 137*d5c09012SAndroid Build Coastguard Worker // RG tag (for that value, see 138*d5c09012SAndroid Build Coastguard Worker // [ReadGroup.name][google.genomics.v1.ReadGroup.name]). 139*d5c09012SAndroid Build Coastguard Worker string read_group_id = 2; 140*d5c09012SAndroid Build Coastguard Worker 141*d5c09012SAndroid Build Coastguard Worker // The ID of the read group set this read belongs to. A read belongs to 142*d5c09012SAndroid Build Coastguard Worker // exactly one read group set. 143*d5c09012SAndroid Build Coastguard Worker string read_group_set_id = 3; 144*d5c09012SAndroid Build Coastguard Worker 145*d5c09012SAndroid Build Coastguard Worker // The fragment name. Equivalent to QNAME (query template name) in SAM. 146*d5c09012SAndroid Build Coastguard Worker string fragment_name = 4; 147*d5c09012SAndroid Build Coastguard Worker 148*d5c09012SAndroid Build Coastguard Worker // The orientation and the distance between reads from the fragment are 149*d5c09012SAndroid Build Coastguard Worker // consistent with the sequencing protocol (SAM flag 0x2). 150*d5c09012SAndroid Build Coastguard Worker bool proper_placement = 5; 151*d5c09012SAndroid Build Coastguard Worker 152*d5c09012SAndroid Build Coastguard Worker // The fragment is a PCR or optical duplicate (SAM flag 0x400). 153*d5c09012SAndroid Build Coastguard Worker bool duplicate_fragment = 6; 154*d5c09012SAndroid Build Coastguard Worker 155*d5c09012SAndroid Build Coastguard Worker // The observed length of the fragment, equivalent to TLEN in SAM. 156*d5c09012SAndroid Build Coastguard Worker int32 fragment_length = 7; 157*d5c09012SAndroid Build Coastguard Worker 158*d5c09012SAndroid Build Coastguard Worker // The read number in sequencing. 0-based and less than numberReads. This 159*d5c09012SAndroid Build Coastguard Worker // field replaces SAM flag 0x40 and 0x80. 160*d5c09012SAndroid Build Coastguard Worker int32 read_number = 8; 161*d5c09012SAndroid Build Coastguard Worker 162*d5c09012SAndroid Build Coastguard Worker // The number of reads in the fragment (extension to SAM flag 0x1). 163*d5c09012SAndroid Build Coastguard Worker int32 number_reads = 9; 164*d5c09012SAndroid Build Coastguard Worker 165*d5c09012SAndroid Build Coastguard Worker // Whether this read did not pass filters, such as platform or vendor quality 166*d5c09012SAndroid Build Coastguard Worker // controls (SAM flag 0x200). 167*d5c09012SAndroid Build Coastguard Worker bool failed_vendor_quality_checks = 10; 168*d5c09012SAndroid Build Coastguard Worker 169*d5c09012SAndroid Build Coastguard Worker // The linear alignment for this alignment record. This field is null for 170*d5c09012SAndroid Build Coastguard Worker // unmapped reads. 171*d5c09012SAndroid Build Coastguard Worker LinearAlignment alignment = 11; 172*d5c09012SAndroid Build Coastguard Worker 173*d5c09012SAndroid Build Coastguard Worker // Whether this alignment is secondary. Equivalent to SAM flag 0x100. 174*d5c09012SAndroid Build Coastguard Worker // A secondary alignment represents an alternative to the primary alignment 175*d5c09012SAndroid Build Coastguard Worker // for this read. Aligners may return secondary alignments if a read can map 176*d5c09012SAndroid Build Coastguard Worker // ambiguously to multiple coordinates in the genome. By convention, each read 177*d5c09012SAndroid Build Coastguard Worker // has one and only one alignment where both `secondaryAlignment` 178*d5c09012SAndroid Build Coastguard Worker // and `supplementaryAlignment` are false. 179*d5c09012SAndroid Build Coastguard Worker bool secondary_alignment = 12; 180*d5c09012SAndroid Build Coastguard Worker 181*d5c09012SAndroid Build Coastguard Worker // Whether this alignment is supplementary. Equivalent to SAM flag 0x800. 182*d5c09012SAndroid Build Coastguard Worker // Supplementary alignments are used in the representation of a chimeric 183*d5c09012SAndroid Build Coastguard Worker // alignment. In a chimeric alignment, a read is split into multiple 184*d5c09012SAndroid Build Coastguard Worker // linear alignments that map to different reference contigs. The first 185*d5c09012SAndroid Build Coastguard Worker // linear alignment in the read will be designated as the representative 186*d5c09012SAndroid Build Coastguard Worker // alignment; the remaining linear alignments will be designated as 187*d5c09012SAndroid Build Coastguard Worker // supplementary alignments. These alignments may have different mapping 188*d5c09012SAndroid Build Coastguard Worker // quality scores. In each linear alignment in a chimeric alignment, the read 189*d5c09012SAndroid Build Coastguard Worker // will be hard clipped. The `alignedSequence` and 190*d5c09012SAndroid Build Coastguard Worker // `alignedQuality` fields in the alignment record will only 191*d5c09012SAndroid Build Coastguard Worker // represent the bases for its respective linear alignment. 192*d5c09012SAndroid Build Coastguard Worker bool supplementary_alignment = 13; 193*d5c09012SAndroid Build Coastguard Worker 194*d5c09012SAndroid Build Coastguard Worker // The bases of the read sequence contained in this alignment record, 195*d5c09012SAndroid Build Coastguard Worker // **without CIGAR operations applied** (equivalent to SEQ in SAM). 196*d5c09012SAndroid Build Coastguard Worker // `alignedSequence` and `alignedQuality` may be 197*d5c09012SAndroid Build Coastguard Worker // shorter than the full read sequence and quality. This will occur if the 198*d5c09012SAndroid Build Coastguard Worker // alignment is part of a chimeric alignment, or if the read was trimmed. When 199*d5c09012SAndroid Build Coastguard Worker // this occurs, the CIGAR for this read will begin/end with a hard clip 200*d5c09012SAndroid Build Coastguard Worker // operator that will indicate the length of the excised sequence. 201*d5c09012SAndroid Build Coastguard Worker string aligned_sequence = 14; 202*d5c09012SAndroid Build Coastguard Worker 203*d5c09012SAndroid Build Coastguard Worker // The quality of the read sequence contained in this alignment record 204*d5c09012SAndroid Build Coastguard Worker // (equivalent to QUAL in SAM). 205*d5c09012SAndroid Build Coastguard Worker // `alignedSequence` and `alignedQuality` may be shorter than the full read 206*d5c09012SAndroid Build Coastguard Worker // sequence and quality. This will occur if the alignment is part of a 207*d5c09012SAndroid Build Coastguard Worker // chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR 208*d5c09012SAndroid Build Coastguard Worker // for this read will begin/end with a hard clip operator that will indicate 209*d5c09012SAndroid Build Coastguard Worker // the length of the excised sequence. 210*d5c09012SAndroid Build Coastguard Worker repeated int32 aligned_quality = 15; 211*d5c09012SAndroid Build Coastguard Worker 212*d5c09012SAndroid Build Coastguard Worker // The mapping of the primary alignment of the 213*d5c09012SAndroid Build Coastguard Worker // `(readNumber+1)%numberReads` read in the fragment. It replaces 214*d5c09012SAndroid Build Coastguard Worker // mate position and mate strand in SAM. 215*d5c09012SAndroid Build Coastguard Worker Position next_mate_position = 16; 216*d5c09012SAndroid Build Coastguard Worker 217*d5c09012SAndroid Build Coastguard Worker // A map of additional read alignment information. This must be of the form 218*d5c09012SAndroid Build Coastguard Worker // map<string, string[]> (string key mapping to a list of string values). 219*d5c09012SAndroid Build Coastguard Worker map<string, google.protobuf.ListValue> info = 17; 220*d5c09012SAndroid Build Coastguard Worker} 221