runtime/include/NeuronBackend.h

/*
 * Copyright (c) 2024 MediaTek Inc.
 *
 * Licensed under the BSD License (the "License"); you may not use this file
 * except in compliance with the License. See the license file in the root
 * directory of this source tree for more details.
 */

#pragma once

#include "NeuronBufferAllocator.h"
#include "NeuronExecutor.h"
#include "NeuronLog.h"
#include "NeuronPayloadHeader.h"
#include "api/APUWareUtilsLib.h"
#include "api/NeuronAdapter.h"

#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/evalue.h>

#include <memory>
#include <unordered_map>
#include <unordered_set>

namespace executorch {
namespace backends {
namespace neuron {

class NeuronBackend final : public ::executorch::runtime::BackendInterface {
 public:
  ::executorch::runtime::Result<::executorch::runtime::DelegateHandle*> init(
      ::executorch::runtime::BackendInitContext& context,
      ::executorch::runtime::FreeableBuffer* processed,
      ::executorch::runtime::ArrayRef<::executorch::runtime::CompileSpec>
          compile_specs) const override;

  ::executorch::runtime::Error execute(
      ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
      ::executorch::runtime::DelegateHandle* handle,
      ::executorch::runtime::EValue** args) const override;

  void destroy(::executorch::runtime::DelegateHandle* handle) const override;

  bool is_available() const override;
};

extern const char kHighAddrKey[];
extern const char kImportForeverKey[];

struct NeuronDelegateSetting {
  bool mHighAddr = false;

  bool mImportForever = false;

  std::string ToRuntimeOption() {
    if (mHighAddr && mImportForever) {
      return "--apusys-config \"{ \\\"high_addr\\\": true, \\\"import_forever\\\": true }\"";
    } else if (mHighAddr) {
      return "--apusys-config \"{ \\\"high_addr\\\": true }\"";
    } else if (mImportForever) {
      return "--apusys-config \"{ \\\"import_forever\\\": true }\"";
    } else {
      return "";
    }
  }
};

class NeuronExecuTorchDelegate {
 public:
  class MemoryCache {
   public:
    template <bool isInput>
    bool IsCached(int i, void* ptr) {
      const auto& cache = isInput ? mInputCache : mOutputCache;
      auto it = cache.find(i);
      return (it != cache.end()) && (ptr == it->second);
    }

    template <bool isInput>
    void UpdateCache(int i, void* ptr) {
      (isInput ? mInputCache[i] : mOutputCache[i]) = ptr;
      return;
    }

   private:
    std::unordered_map<int, void*> mInputCache;

    std::unordered_map<int, void*> mOutputCache;
  };

  NeuronExecuTorchDelegate() {}

  ~NeuronExecuTorchDelegate() {
    mPLock->Stop();
  }

  int LoadCompiledNetwork(
      NeuronPayload payload,
      NeuronDelegateSetting options) {
    mSettings = options;
    auto runtimeOption = mSettings.ToRuntimeOption();
    auto res = mExecutor.LoadFromCompiledNetwork(
        payload.CompiledNetwork,
        payload.Header.DataLen,
        payload.Header.InputCount,
        payload.Header.OutputCount,
        runtimeOption);
    CHECK_NO_ERROR(res);
    CHECK_TRUE(mExecutor.IsValid());
    SummaryIoCounts();
    mPLock = std::unique_ptr<ScopePerformancer>(new ScopePerformancer);
    return NEURON_NO_ERROR;
  }

  ::executorch::runtime::Error execute(
      ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
      ::executorch::runtime::EValue** args) const;

 private:
  template <bool isInput>
  bool IsCached(int index, void* ptr) const {
    return mCache.IsCached</*isInput=*/isInput>(index, ptr);
  }

  template <bool isInput>
  void UpdateCache(int index, void* ptr) const {
    mCache.UpdateCache<isInput>(index, ptr);
  }

  int SummaryIoCounts() {
    for (int i = 0;; i++) {
      size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ true>(i);
      if (size == 0) {
        break;
      }
      LogInfo("NeuronBackend", "Model input:%d size: %lu", i, size);
      mInputSizes.push_back(size);
    }
    for (int o = 0;; o++) {
      size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ false>(o);
      if (size == 0) {
        break;
      }
      LogInfo("NeuronBackend", "Model output:%d size: %lu", o, size);
      mOutputSizes.push_back(size);
    }
    return NEURON_NO_ERROR;
  }

  int HintNeuronBackend(::executorch::runtime::EValue** args) const;

 private:
  std::vector<size_t> mInputSizes;

  std::vector<size_t> mOutputSizes;

  mutable MemoryCache mCache;

  std::unique_ptr<ScopePerformancer> mPLock;

  neuron::NeuronExecutor mExecutor;

  NeuronDelegateSetting mSettings;

  mutable std::unordered_set<const void*> mHasImported;

 private:
  NeuronExecuTorchDelegate(const NeuronExecuTorchDelegate&);

  NeuronExecuTorchDelegate operator=(const NeuronExecuTorchDelegate&);
};

} // namespace neuron
} // namespace backends
} // namespace executorch