1 /* 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <cstdint> 20 #include <string> 21 22 #include <jni.h> 23 24 namespace facebook { 25 namespace jni { 26 27 namespace detail { 28 29 void utf8ToModifiedUTF8( 30 const uint8_t* bytes, 31 size_t len, 32 uint8_t* modified, 33 size_t modifiedLength); 34 size_t modifiedLength(const std::string& str); 35 size_t modifiedLength(const uint8_t* str, size_t* length); 36 std::string modifiedUTF8ToUTF8(const uint8_t* modified, size_t len) noexcept; 37 std::string utf16toUTF8(const uint16_t* utf16Bytes, size_t len) noexcept; 38 39 } // namespace detail 40 41 // JNI represents strings encoded with modified version of UTF-8. The 42 // difference between UTF-8 and Modified UTF-8 is that the latter support only 43 // 1-byte, 2-byte, and 3-byte formats. Supplementary character (4 bytes in 44 // unicode) needs to be represented in the form of surrogate pairs. To create a 45 // Modified UTF-8 surrogate pair that Dalvik would understand we take 4-byte 46 // unicode character, encode it with UTF-16 which gives us two 2 byte chars 47 // (surrogate pair) and then we encode each pair as UTF-8. This result in 2 x 3 48 // byte characters. To convert modified UTF-8 to standard UTF-8, this mus tbe 49 // reversed. 50 // 51 // The second difference is that Modified UTF-8 is encoding NUL byte in 2-byte 52 // format. 53 // 54 // In order to avoid complex error handling, only a minimum of validity checking 55 // is done to avoid crashing. If the input is invalid, the output may be 56 // invalid as well. 57 // 58 // Relevant links: 59 // - 60 // http://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/functions.html 61 // - 62 // https://docs.oracle.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8 63 64 // JString to UTF16 extractor using RAII idiom. Note that the 65 // ctor/dtor use GetStringCritical/ReleaseStringCritical, so this 66 // class is subject to the restrictions imposed by those functions. 67 class JStringUtf16Extractor { 68 public: JStringUtf16Extractor(JNIEnv * env,jstring javaString)69 JStringUtf16Extractor(JNIEnv* env, jstring javaString) 70 : env_(env), javaString_(javaString), length_(0), utf16String_(nullptr) { 71 if (env_ && javaString_) { 72 length_ = env_->GetStringLength(javaString_); 73 utf16String_ = env_->GetStringCritical(javaString_, nullptr); 74 } 75 } 76 ~JStringUtf16Extractor()77 ~JStringUtf16Extractor() { 78 if (utf16String_) { 79 env_->ReleaseStringCritical(javaString_, utf16String_); 80 } 81 } 82 length()83 jsize length() const { 84 return length_; 85 } 86 chars()87 const jchar* chars() const { 88 return utf16String_; 89 } 90 91 private: 92 JNIEnv* env_; 93 jstring javaString_; 94 jsize length_; 95 const jchar* utf16String_; 96 }; 97 98 } // namespace jni 99 } // namespace facebook 100