xref: /aosp_15_r20/external/executorch/backends/apple/coreml/runtime/inmemoryfs/inmemory_filesystem_py.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 //
2 // inmemory_filesystem_py.cpp
3 //
4 // Copyright © 2024 Apple Inc. All rights reserved.
5 //
6 // Please refer to the license found in the LICENSE file in the root directory of the source tree.
7 
8 
9 #include <iostream>
10 #include <memory>
11 #include <mutex>
12 #include <sstream>
13 #include <stdexcept>
14 #include <string>
15 #include <sys/mman.h>
16 #include <system_error>
17 #include <thread>
18 #include <unistd.h>
19 
20 #include <pybind11/pybind11.h>
21 #include <pybind11/pytypes.h>
22 
23 #include "inmemory_filesystem_utils.hpp"
24 #include "memory_buffer.hpp"
25 #include "memory_stream.hpp"
26 
27 #if __has_include(<filesystem>)
28 #include <filesystem>
29 #elif __has_include(<experimental/filesystem>)
30 #include <experimental/filesystem>
31 namespace std {
32 namespace filesystem = std::experimental::filesystem;
33 }
34 #endif
35 
36 namespace executorchcoreml {
37 
alloc_using_mmap(size_t size)38 void* alloc_using_mmap(size_t size) {
39     return mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
40 }
41 
42 std::once_flag external_bytes_initialization_flag;
43 static PyTypeObject PyExternalBytes_Type;
44 
external_bytes_free(void * ptr)45 static void external_bytes_free(void* ptr) {
46     printf("external_bytes_free called \n");
47     PyBytesObject* obj = (PyBytesObject*)ptr;
48     Py_ssize_t size = Py_SIZE(obj);
49     munmap(obj, size);
50 }
51 
intialize_external_bytes_type()52 void intialize_external_bytes_type() {
53     std::call_once(external_bytes_initialization_flag, []() {
54         PyExternalBytes_Type = PyBytes_Type;
55         PyExternalBytes_Type.tp_free = external_bytes_free;
56     });
57 }
58 
initialize_buffer_as_bytes_object(void * buffer,Py_ssize_t size)59 PyBytesObject* initialize_buffer_as_bytes_object(void* buffer, Py_ssize_t size) {
60     intialize_external_bytes_type();
61     PyBytesObject* obj = (PyBytesObject*)buffer;
62     PyObject_INIT_VAR(obj, &PyExternalBytes_Type, size);
63     obj->ob_sval[size] = '\0';
64 
65     return obj;
66 }
67 
68 /// The method allocates memory using `mmap` and then reads the contents of the all files in the directory. The file
69 /// content is again memory mapped at fixed addresses in the allocated memory. The approach avoids dirtying the memory.
70 /// The down side of this method is that it could result in a larger file when the bytes are dumped to disk.
get_bytes_from_external_memory(const std::filesystem::path & dir_path)71 PyBytesObject* get_bytes_from_external_memory(const std::filesystem::path& dir_path) {
72     using namespace inmemoryfs;
73 
74     std::error_code error;
75     std::stringstream ss;
76     auto fs = InMemoryFileSystem::make_from_directory(dir_path, InMemoryFileSystem::FileLoadOption::LazyMMap, error);
77     if (fs == nullptr) {
78         ss << "Failed to create InMemoryFileSystem because of error=" << error.message().c_str() << "\n";
79         PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
80         return nullptr;
81     }
82 
83     size_t alignment = getpagesize();
84     size_t serialized_buffer_length = get_buffer_size_for_serialization(*fs, {}, alignment);
85     size_t py_bytes_obj_length = offsetof(PyBytesObject, ob_sval);
86     size_t py_bytes_obj_total_length = py_bytes_obj_length + serialized_buffer_length + 1;
87     void* backing_buffer = alloc_using_mmap(py_bytes_obj_total_length);
88     if (backing_buffer == NULL || (reinterpret_cast<int*>(backing_buffer) == MAP_FAILED)) {
89         ss << "Failed to allocate memory of size=" << py_bytes_obj_total_length / (1024 * 10224) << " mb.";
90         PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
91         return nullptr;
92     }
93 
94     if (!serialize(*fs, {}, alignment, static_cast<uint8_t*>(backing_buffer) + py_bytes_obj_length, error)) {
95         ss << "Failed to serialize directory contents because of error=" << error.message().c_str() << ".";
96         PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
97         return nullptr;
98     }
99 
100     PyBytesObject* bytes = initialize_buffer_as_bytes_object(backing_buffer, py_bytes_obj_total_length);
101     if (bytes == NULL) {
102         PyErr_SetString(PyExc_RuntimeError, "Failed to create bytes object.");
103         return nullptr;
104     }
105 
106     return bytes;
107 }
108 
109 /// The method writes to the memory managed by the python bytes object. The method dirties the memory and can be slow
110 /// but results in a relatively smaller file when the bytes are dumped to disk.
get_bytes(inmemoryfs::InMemoryFileSystem & fs,size_t length)111 PyBytesObject* get_bytes(inmemoryfs::InMemoryFileSystem& fs, size_t length) {
112     using namespace inmemoryfs;
113 
114     std::error_code error;
115     PyObject* bytes = PyBytes_FromStringAndSize(NULL, length);
116     void* data = static_cast<void*>(PyBytes_AsString(bytes));
117     if (!serialize(fs, {}, 1, data, error)) {
118         throw std::system_error(error.value(), error.category(), error.message());
119     }
120 
121     return (PyBytesObject*)bytes;
122 }
123 
is_large_model(size_t model_size_in_bytes)124 bool is_large_model(size_t model_size_in_bytes) {
125     static constexpr size_t large_model_size_threshold = 1024 * 1024 * 1024; // 1 GB
126     return model_size_in_bytes > large_model_size_threshold;
127 }
128 
129 /// Flattens the directory contents at the specified path.
130 ///
131 /// @param path  The directory path
132 /// @retval The flattened directory contents.
flatten_directory_contents(const std::string & path)133 pybind11::bytes flatten_directory_contents(const std::string& path) {
134     using namespace inmemoryfs;
135 
136     std::filesystem::path fs_path(path);
137     std::error_code error;
138     auto canonical_path = std::filesystem::canonical(fs_path);
139     std::stringstream ss;
140     auto fs = InMemoryFileSystem::make_from_directory(canonical_path, InMemoryFileSystem::FileLoadOption::MMap, error);
141     if (fs == nullptr) {
142         ss << "Failed to create InMemoryFileSystem because of error=" << error.message().c_str() << ".";
143         PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
144         return nullptr;
145     }
146 
147     size_t model_size_in_bytes = get_buffer_size_for_serialization(*fs, {}, 1);
148     PyBytesObject* bytes = nullptr;
149     if (is_large_model(model_size_in_bytes)) {
150         bytes = get_bytes_from_external_memory(canonical_path);
151     } else {
152         bytes = get_bytes(*fs, model_size_in_bytes);
153     }
154 
155     return bytes == nullptr ? pybind11::none() : pybind11::reinterpret_steal<pybind11::object>((PyObject*)bytes);
156 }
157 
158 /// Unflattens and writes the contents of the memory buffer at the specified path.
159 ///
160 /// @param bytes  The bytes returned from `flatten_directory_contents`.
161 /// @param path  The directory path
unflatten_directory_contents(pybind11::bytes bytes,const std::string & path)162 bool unflatten_directory_contents(pybind11::bytes bytes, const std::string& path) {
163     using namespace inmemoryfs;
164 
165     char* buffer = nullptr;
166     ssize_t length = 0;
167     if (PYBIND11_BYTES_AS_STRING_AND_SIZE(bytes.ptr(), &buffer, &length)) {
168         pybind11::pybind11_fail("Failed to extract contents of bytes object!");
169     }
170     std::shared_ptr<MemoryBuffer> memory_buffer =
171         MemoryBuffer::make_unowned((void*)buffer, static_cast<size_t>(length));
172     auto fs = inmemoryfs::make_from_buffer(memory_buffer);
173     if (!fs) {
174         pybind11::pybind11_fail("Failed to de-serialize bytes object!");
175         return false;
176     }
177     std::error_code ec;
178     std::filesystem::path fs_path(path);
179     auto canonical_path = std::filesystem::canonical(fs_path);
180     if (!fs->write_item_to_disk({}, canonical_path, true, ec)) {
181         pybind11::pybind11_fail("Failed to write the item to disk!");
182         return false;
183     }
184 
185     return true;
186 }
187 } // namespace executorchcoreml
188 
PYBIND11_MODULE(executorchcoreml,mod)189 PYBIND11_MODULE(executorchcoreml, mod) {
190     mod.def("flatten_directory_contents", &executorchcoreml::flatten_directory_contents);
191     mod.def("unflatten_directory_contents", &executorchcoreml::unflatten_directory_contents);
192 }
193