migration/v2alpha/translation_task.proto

// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.migration.v2alpha;

option csharp_namespace = "Google.Cloud.BigQuery.Migration.V2Alpha";
option go_package = "cloud.google.com/go/bigquery/migration/apiv2alpha/migrationpb;migrationpb";
option java_multiple_files = true;
option java_outer_classname = "TranslationTaskProto";
option java_package = "com.google.cloud.bigquery.migration.v2alpha";
option php_namespace = "Google\\Cloud\\BigQuery\\Migration\\V2alpha";

// Mapping between an input and output file to be translated in a subtask.
message TranslationFileMapping {
  // The Cloud Storage path for a file to translation in a subtask.
  string input_path = 1;

  // The Cloud Storage path to write back the corresponding input file to.
  string output_path = 2;
}

// The translation task config to capture necessary settings for a translation
// task and subtask.
message TranslationTaskDetails {
  // The file encoding types.
  enum FileEncoding {
    // File encoding setting is not specified.
    FILE_ENCODING_UNSPECIFIED = 0;

    // File encoding is UTF_8.
    UTF_8 = 1;

    // File encoding is ISO_8859_1.
    ISO_8859_1 = 2;

    // File encoding is US_ASCII.
    US_ASCII = 3;

    // File encoding is UTF_16.
    UTF_16 = 4;

    // File encoding is UTF_16LE.
    UTF_16LE = 5;

    // File encoding is UTF_16BE.
    UTF_16BE = 6;
  }

  // The special token data type.
  enum TokenType {
    // Token type is not specified.
    TOKEN_TYPE_UNSPECIFIED = 0;

    // Token type as string.
    STRING = 1;

    // Token type as integer.
    INT64 = 2;

    // Token type as numeric.
    NUMERIC = 3;

    // Token type as boolean.
    BOOL = 4;

    // Token type as float.
    FLOAT64 = 5;

    // Token type as date.
    DATE = 6;

    // Token type as timestamp.
    TIMESTAMP = 7;
  }

  // The language specific settings for the translation task.
  oneof language_options {
    // The Teradata SQL specific settings for the translation task.
    TeradataOptions teradata_options = 10;

    // The BTEQ specific settings for the translation task.
    BteqOptions bteq_options = 11;
  }

  // The Cloud Storage path for translation input files.
  string input_path = 1;

  // The Cloud Storage path for translation output files.
  string output_path = 2;

  // Cloud Storage files to be processed for translation.
  repeated TranslationFileMapping file_paths = 12;

  // The Cloud Storage path to DDL files as table schema to assist semantic
  // translation.
  string schema_path = 3;

  // The file encoding type.
  FileEncoding file_encoding = 4;

  // The settings for SQL identifiers.
  IdentifierSettings identifier_settings = 5;

  // The map capturing special tokens to be replaced during translation. The key
  // is special token in string. The value is the token data type. This is used
  // to translate SQL query template which contains special token as place
  // holder. The special token makes a query invalid to parse. This map will be
  // applied to annotate those special token with types to let parser understand
  // how to parse them into proper structure with type information.
  map<string, TokenType> special_token_map = 6;

  // The filter applied to translation details.
  Filter filter = 7;

  // Specifies the exact name of the bigquery table ("dataset.table") to be used
  // for surfacing raw translation errors. If the table does not exist, we will
  // create it. If it already exists and the schema is the same, we will re-use.
  // If the table exists and the schema is different, we will throw an error.
  string translation_exception_table = 13;
}

// The filter applied to fields of translation details.
message Filter {
  // The list of prefixes used to exclude processing for input files.
  repeated string input_file_exclusion_prefixes = 1;
}

// Settings related to SQL identifiers.
message IdentifierSettings {
  // The identifier case type.
  enum IdentifierCase {
    // The identifier case is not specified.
    IDENTIFIER_CASE_UNSPECIFIED = 0;

    // Identifiers' cases will be kept as the original cases.
    ORIGINAL = 1;

    // Identifiers will be in upper cases.
    UPPER = 2;

    // Identifiers will be in lower cases.
    LOWER = 3;
  }

  // The SQL identifier rewrite mode.
  enum IdentifierRewriteMode {
    // SQL Identifier rewrite mode is unspecified.
    IDENTIFIER_REWRITE_MODE_UNSPECIFIED = 0;

    // SQL identifiers won't be rewrite.
    NONE = 1;

    // All SQL identifiers will be rewrite.
    REWRITE_ALL = 2;
  }

  // The setting to control output queries' identifier case.
  IdentifierCase output_identifier_case = 1;

  // Specifies the rewrite mode for SQL identifiers.
  IdentifierRewriteMode identifier_rewrite_mode = 2;
}

// Teradata SQL specific translation task related settings.
message TeradataOptions {

}

// BTEQ translation task related settings.
message BteqOptions {
  // Specifies the project and dataset in BigQuery that will be used for
  // external table creation during the translation.
  DatasetReference project_dataset = 1;

  // The Cloud Storage location to be used as the default path for files that
  // are not otherwise specified in the file replacement map.
  string default_path_uri = 2;

  // Maps the local paths that are used in BTEQ scripts (the keys) to the paths
  // in Cloud Storage that should be used in their stead in the translation (the
  // value).
  map<string, string> file_replacement_map = 3;
}

// Reference to a BigQuery dataset.
message DatasetReference {
  // A unique ID for this dataset, without the project name. The ID
  // must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_).
  // The maximum length is 1,024 characters.
  string dataset_id = 1;

  // The ID of the project containing this dataset.
  string project_id = 2;
}