1// Copyright 2016 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.genomics.v1; 18 19import "google/api/annotations.proto"; 20 21option cc_enable_arenas = true; 22option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; 23option java_multiple_files = true; 24option java_outer_classname = "CigarProto"; 25option java_package = "com.google.genomics.v1"; 26 27// A single CIGAR operation. 28message CigarUnit { 29 // Describes the different types of CIGAR alignment operations that exist. 30 // Used wherever CIGAR alignments are used. 31 enum Operation { 32 OPERATION_UNSPECIFIED = 0; 33 34 // An alignment match indicates that a sequence can be aligned to the 35 // reference without evidence of an INDEL. Unlike the 36 // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, 37 // the `ALIGNMENT_MATCH` operator does not indicate whether the 38 // reference and read sequences are an exact match. This operator is 39 // equivalent to SAM's `M`. 40 ALIGNMENT_MATCH = 1; 41 42 // The insert operator indicates that the read contains evidence of bases 43 // being inserted into the reference. This operator is equivalent to SAM's 44 // `I`. 45 INSERT = 2; 46 47 // The delete operator indicates that the read contains evidence of bases 48 // being deleted from the reference. This operator is equivalent to SAM's 49 // `D`. 50 DELETE = 3; 51 52 // The skip operator indicates that this read skips a long segment of the 53 // reference, but the bases have not been deleted. This operator is commonly 54 // used when working with RNA-seq data, where reads may skip long segments 55 // of the reference between exons. This operator is equivalent to SAM's 56 // `N`. 57 SKIP = 4; 58 59 // The soft clip operator indicates that bases at the start/end of a read 60 // have not been considered during alignment. This may occur if the majority 61 // of a read maps, except for low quality bases at the start/end of a read. 62 // This operator is equivalent to SAM's `S`. Bases that are soft 63 // clipped will still be stored in the read. 64 CLIP_SOFT = 5; 65 66 // The hard clip operator indicates that bases at the start/end of a read 67 // have been omitted from this alignment. This may occur if this linear 68 // alignment is part of a chimeric alignment, or if the read has been 69 // trimmed (for example, during error correction or to trim poly-A tails for 70 // RNA-seq). This operator is equivalent to SAM's `H`. 71 CLIP_HARD = 6; 72 73 // The pad operator indicates that there is padding in an alignment. This 74 // operator is equivalent to SAM's `P`. 75 PAD = 7; 76 77 // This operator indicates that this portion of the aligned sequence exactly 78 // matches the reference. This operator is equivalent to SAM's `=`. 79 SEQUENCE_MATCH = 8; 80 81 // This operator indicates that this portion of the aligned sequence is an 82 // alignment match to the reference, but a sequence mismatch. This can 83 // indicate a SNP or a read error. This operator is equivalent to SAM's 84 // `X`. 85 SEQUENCE_MISMATCH = 9; 86 } 87 88 Operation operation = 1; 89 90 // The number of genomic bases that the operation runs for. Required. 91 int64 operation_length = 2; 92 93 // `referenceSequence` is only used at mismatches 94 // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`). 95 // Filling this field replaces SAM's MD tag. If the relevant information is 96 // not available, this field is unset. 97 string reference_sequence = 3; 98} 99