xref: /aosp_15_r20/external/googleapis/google/genomics/v1/cigar.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2016 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.genomics.v1;
18
19import "google/api/annotations.proto";
20
21option cc_enable_arenas = true;
22option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
23option java_multiple_files = true;
24option java_outer_classname = "CigarProto";
25option java_package = "com.google.genomics.v1";
26
27// A single CIGAR operation.
28message CigarUnit {
29  // Describes the different types of CIGAR alignment operations that exist.
30  // Used wherever CIGAR alignments are used.
31  enum Operation {
32    OPERATION_UNSPECIFIED = 0;
33
34    // An alignment match indicates that a sequence can be aligned to the
35    // reference without evidence of an INDEL. Unlike the
36    // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
37    // the `ALIGNMENT_MATCH` operator does not indicate whether the
38    // reference and read sequences are an exact match. This operator is
39    // equivalent to SAM's `M`.
40    ALIGNMENT_MATCH = 1;
41
42    // The insert operator indicates that the read contains evidence of bases
43    // being inserted into the reference. This operator is equivalent to SAM's
44    // `I`.
45    INSERT = 2;
46
47    // The delete operator indicates that the read contains evidence of bases
48    // being deleted from the reference. This operator is equivalent to SAM's
49    // `D`.
50    DELETE = 3;
51
52    // The skip operator indicates that this read skips a long segment of the
53    // reference, but the bases have not been deleted. This operator is commonly
54    // used when working with RNA-seq data, where reads may skip long segments
55    // of the reference between exons. This operator is equivalent to SAM's
56    // `N`.
57    SKIP = 4;
58
59    // The soft clip operator indicates that bases at the start/end of a read
60    // have not been considered during alignment. This may occur if the majority
61    // of a read maps, except for low quality bases at the start/end of a read.
62    // This operator is equivalent to SAM's `S`. Bases that are soft
63    // clipped will still be stored in the read.
64    CLIP_SOFT = 5;
65
66    // The hard clip operator indicates that bases at the start/end of a read
67    // have been omitted from this alignment. This may occur if this linear
68    // alignment is part of a chimeric alignment, or if the read has been
69    // trimmed (for example, during error correction or to trim poly-A tails for
70    // RNA-seq). This operator is equivalent to SAM's `H`.
71    CLIP_HARD = 6;
72
73    // The pad operator indicates that there is padding in an alignment. This
74    // operator is equivalent to SAM's `P`.
75    PAD = 7;
76
77    // This operator indicates that this portion of the aligned sequence exactly
78    // matches the reference. This operator is equivalent to SAM's `=`.
79    SEQUENCE_MATCH = 8;
80
81    // This operator indicates that this portion of the aligned sequence is an
82    // alignment match to the reference, but a sequence mismatch. This can
83    // indicate a SNP or a read error. This operator is equivalent to SAM's
84    // `X`.
85    SEQUENCE_MISMATCH = 9;
86  }
87
88  Operation operation = 1;
89
90  // The number of genomic bases that the operation runs for. Required.
91  int64 operation_length = 2;
92
93  // `referenceSequence` is only used at mismatches
94  // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
95  // Filling this field replaces SAM's MD tag. If the relevant information is
96  // not available, this field is unset.
97  string reference_sequence = 3;
98}
99