1 //
2 // inmemory_filesystem_py.cpp
3 //
4 // Copyright © 2024 Apple Inc. All rights reserved.
5 //
6 // Please refer to the license found in the LICENSE file in the root directory of the source tree.
7
8
9 #include <iostream>
10 #include <memory>
11 #include <mutex>
12 #include <sstream>
13 #include <stdexcept>
14 #include <string>
15 #include <sys/mman.h>
16 #include <system_error>
17 #include <thread>
18 #include <unistd.h>
19
20 #include <pybind11/pybind11.h>
21 #include <pybind11/pytypes.h>
22
23 #include "inmemory_filesystem_utils.hpp"
24 #include "memory_buffer.hpp"
25 #include "memory_stream.hpp"
26
27 #if __has_include(<filesystem>)
28 #include <filesystem>
29 #elif __has_include(<experimental/filesystem>)
30 #include <experimental/filesystem>
31 namespace std {
32 namespace filesystem = std::experimental::filesystem;
33 }
34 #endif
35
36 namespace executorchcoreml {
37
alloc_using_mmap(size_t size)38 void* alloc_using_mmap(size_t size) {
39 return mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
40 }
41
42 std::once_flag external_bytes_initialization_flag;
43 static PyTypeObject PyExternalBytes_Type;
44
external_bytes_free(void * ptr)45 static void external_bytes_free(void* ptr) {
46 printf("external_bytes_free called \n");
47 PyBytesObject* obj = (PyBytesObject*)ptr;
48 Py_ssize_t size = Py_SIZE(obj);
49 munmap(obj, size);
50 }
51
intialize_external_bytes_type()52 void intialize_external_bytes_type() {
53 std::call_once(external_bytes_initialization_flag, []() {
54 PyExternalBytes_Type = PyBytes_Type;
55 PyExternalBytes_Type.tp_free = external_bytes_free;
56 });
57 }
58
initialize_buffer_as_bytes_object(void * buffer,Py_ssize_t size)59 PyBytesObject* initialize_buffer_as_bytes_object(void* buffer, Py_ssize_t size) {
60 intialize_external_bytes_type();
61 PyBytesObject* obj = (PyBytesObject*)buffer;
62 PyObject_INIT_VAR(obj, &PyExternalBytes_Type, size);
63 obj->ob_sval[size] = '\0';
64
65 return obj;
66 }
67
68 /// The method allocates memory using `mmap` and then reads the contents of the all files in the directory. The file
69 /// content is again memory mapped at fixed addresses in the allocated memory. The approach avoids dirtying the memory.
70 /// The down side of this method is that it could result in a larger file when the bytes are dumped to disk.
get_bytes_from_external_memory(const std::filesystem::path & dir_path)71 PyBytesObject* get_bytes_from_external_memory(const std::filesystem::path& dir_path) {
72 using namespace inmemoryfs;
73
74 std::error_code error;
75 std::stringstream ss;
76 auto fs = InMemoryFileSystem::make_from_directory(dir_path, InMemoryFileSystem::FileLoadOption::LazyMMap, error);
77 if (fs == nullptr) {
78 ss << "Failed to create InMemoryFileSystem because of error=" << error.message().c_str() << "\n";
79 PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
80 return nullptr;
81 }
82
83 size_t alignment = getpagesize();
84 size_t serialized_buffer_length = get_buffer_size_for_serialization(*fs, {}, alignment);
85 size_t py_bytes_obj_length = offsetof(PyBytesObject, ob_sval);
86 size_t py_bytes_obj_total_length = py_bytes_obj_length + serialized_buffer_length + 1;
87 void* backing_buffer = alloc_using_mmap(py_bytes_obj_total_length);
88 if (backing_buffer == NULL || (reinterpret_cast<int*>(backing_buffer) == MAP_FAILED)) {
89 ss << "Failed to allocate memory of size=" << py_bytes_obj_total_length / (1024 * 10224) << " mb.";
90 PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
91 return nullptr;
92 }
93
94 if (!serialize(*fs, {}, alignment, static_cast<uint8_t*>(backing_buffer) + py_bytes_obj_length, error)) {
95 ss << "Failed to serialize directory contents because of error=" << error.message().c_str() << ".";
96 PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
97 return nullptr;
98 }
99
100 PyBytesObject* bytes = initialize_buffer_as_bytes_object(backing_buffer, py_bytes_obj_total_length);
101 if (bytes == NULL) {
102 PyErr_SetString(PyExc_RuntimeError, "Failed to create bytes object.");
103 return nullptr;
104 }
105
106 return bytes;
107 }
108
109 /// The method writes to the memory managed by the python bytes object. The method dirties the memory and can be slow
110 /// but results in a relatively smaller file when the bytes are dumped to disk.
get_bytes(inmemoryfs::InMemoryFileSystem & fs,size_t length)111 PyBytesObject* get_bytes(inmemoryfs::InMemoryFileSystem& fs, size_t length) {
112 using namespace inmemoryfs;
113
114 std::error_code error;
115 PyObject* bytes = PyBytes_FromStringAndSize(NULL, length);
116 void* data = static_cast<void*>(PyBytes_AsString(bytes));
117 if (!serialize(fs, {}, 1, data, error)) {
118 throw std::system_error(error.value(), error.category(), error.message());
119 }
120
121 return (PyBytesObject*)bytes;
122 }
123
is_large_model(size_t model_size_in_bytes)124 bool is_large_model(size_t model_size_in_bytes) {
125 static constexpr size_t large_model_size_threshold = 1024 * 1024 * 1024; // 1 GB
126 return model_size_in_bytes > large_model_size_threshold;
127 }
128
129 /// Flattens the directory contents at the specified path.
130 ///
131 /// @param path The directory path
132 /// @retval The flattened directory contents.
flatten_directory_contents(const std::string & path)133 pybind11::bytes flatten_directory_contents(const std::string& path) {
134 using namespace inmemoryfs;
135
136 std::filesystem::path fs_path(path);
137 std::error_code error;
138 auto canonical_path = std::filesystem::canonical(fs_path);
139 std::stringstream ss;
140 auto fs = InMemoryFileSystem::make_from_directory(canonical_path, InMemoryFileSystem::FileLoadOption::MMap, error);
141 if (fs == nullptr) {
142 ss << "Failed to create InMemoryFileSystem because of error=" << error.message().c_str() << ".";
143 PyErr_SetString(PyExc_RuntimeError, ss.str().c_str());
144 return nullptr;
145 }
146
147 size_t model_size_in_bytes = get_buffer_size_for_serialization(*fs, {}, 1);
148 PyBytesObject* bytes = nullptr;
149 if (is_large_model(model_size_in_bytes)) {
150 bytes = get_bytes_from_external_memory(canonical_path);
151 } else {
152 bytes = get_bytes(*fs, model_size_in_bytes);
153 }
154
155 return bytes == nullptr ? pybind11::none() : pybind11::reinterpret_steal<pybind11::object>((PyObject*)bytes);
156 }
157
158 /// Unflattens and writes the contents of the memory buffer at the specified path.
159 ///
160 /// @param bytes The bytes returned from `flatten_directory_contents`.
161 /// @param path The directory path
unflatten_directory_contents(pybind11::bytes bytes,const std::string & path)162 bool unflatten_directory_contents(pybind11::bytes bytes, const std::string& path) {
163 using namespace inmemoryfs;
164
165 char* buffer = nullptr;
166 ssize_t length = 0;
167 if (PYBIND11_BYTES_AS_STRING_AND_SIZE(bytes.ptr(), &buffer, &length)) {
168 pybind11::pybind11_fail("Failed to extract contents of bytes object!");
169 }
170 std::shared_ptr<MemoryBuffer> memory_buffer =
171 MemoryBuffer::make_unowned((void*)buffer, static_cast<size_t>(length));
172 auto fs = inmemoryfs::make_from_buffer(memory_buffer);
173 if (!fs) {
174 pybind11::pybind11_fail("Failed to de-serialize bytes object!");
175 return false;
176 }
177 std::error_code ec;
178 std::filesystem::path fs_path(path);
179 auto canonical_path = std::filesystem::canonical(fs_path);
180 if (!fs->write_item_to_disk({}, canonical_path, true, ec)) {
181 pybind11::pybind11_fail("Failed to write the item to disk!");
182 return false;
183 }
184
185 return true;
186 }
187 } // namespace executorchcoreml
188
PYBIND11_MODULE(executorchcoreml,mod)189 PYBIND11_MODULE(executorchcoreml, mod) {
190 mod.def("flatten_directory_contents", &executorchcoreml::flatten_directory_contents);
191 mod.def("unflatten_directory_contents", &executorchcoreml::unflatten_directory_contents);
192 }
193