xref: /aosp_15_r20/external/executorch/extension/llm/tokenizer/utils.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1# Copyright (c) Meta Platforms, Inc. and affiliates.
2# All rights reserved.
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7from executorch.examples.models.llama.tokenizer.tiktoken import Tokenizer as Tiktoken
8from executorch.extension.llm.tokenizer.tokenizer import (
9    Tokenizer as SentencePieceTokenizer,
10)
11
12
13def get_tokenizer(tokenizer_path):
14    try:
15        tokenizer = SentencePieceTokenizer(model_path=str(tokenizer_path))
16    except Exception:
17        print("Using Tiktokenizer")
18        tokenizer = Tiktoken(model_path=str(tokenizer_path))
19    return tokenizer
20