diff --git a/common.gypi b/common.gypi index a4825c5429d761..c58aa7fd89305d 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.12', + 'v8_embedder_string': '-node.13', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/strings/unicode-inl.h b/deps/v8/src/strings/unicode-inl.h index 782ff2ab500ee1..25f3d0375e7f1a 100644 --- a/deps/v8/src/strings/unicode-inl.h +++ b/deps/v8/src/strings/unicode-inl.h @@ -206,16 +206,6 @@ bool Utf8::IsValidCharacter(uchar c) { c != kBadChar); } -template <> -bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, size_t size) { - return simdutf::validate_ascii(reinterpret_cast(buffer), size); -} - -template <> -bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, size_t size) { - return false; -} - template Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, char* buffer, size_t capacity, @@ -234,12 +224,10 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector string, size_t read_index = 0; if (kSourceIsOneByte) { size_t writeable = std::min(string.size(), content_capacity); - // Just memcpy when possible. - if (writeable > 0 && Utf8::IsAsciiOneByteString(characters, writeable)) { - memcpy(buffer, characters, writeable); - read_index = writeable; - write_index = writeable; - } + size_t ascii_length = + Utf8::WriteLeadingAscii(characters, buffer, writeable); + read_index = ascii_length; + write_index = ascii_length; } uint16_t last = Utf16::kNoPreviousCharacter; for (; read_index < string.size(); read_index++) { diff --git a/deps/v8/src/strings/unicode.cc b/deps/v8/src/strings/unicode.cc index 3f318556787f2a..d213ea68e8ad1d 100644 --- a/deps/v8/src/strings/unicode.cc +++ b/deps/v8/src/strings/unicode.cc @@ -21,10 +21,49 @@ #include "unicode/uchar.h" #endif +#include "hwy/highway.h" #include "third_party/simdutf/simdutf.h" namespace unibrow { +template <> +size_t Utf8::WriteLeadingAscii(const uint8_t* src, char* dest, + size_t length) { + namespace hw = hwy::HWY_NAMESPACE; + const hw::ScalableTag d; + const size_t N = hw::Lanes(d); + // Don't bother with simd if the string isn't long enough. We're using 2 + // registers, so don't enter the loop unless we can iterate 2 times through. + if (length < 4 * N) { + return 0; + } + // We're checking ascii by checking the sign bit so make the strings signed. + const int8_t* src_s = reinterpret_cast(src); + int8_t* dst_s = reinterpret_cast(dest); + size_t i = 0; + DCHECK_GE(length, 2 * N); + for (; i <= length - 2 * N; i += 2 * N) { + const auto v0 = hw::LoadU(d, src_s + i); + const auto v1 = hw::LoadU(d, src_s + i + N); + const auto combined = hw::Or(v0, v1); + bool is_ascii = hw::AllTrue(d, hw::Ge(combined, hw::Zero(d))); + if (is_ascii) { + hw::StoreU(v0, d, dst_s + i); + hw::StoreU(v1, d, dst_s + i + N); + } else { + break; + } + } + return i; +} + +template <> +size_t Utf8::WriteLeadingAscii(const uint16_t* src, char* dest, + size_t size) { + // TODO(dcarney): this could be implemented similarly to the one byte variant + return 0; +} + #ifndef V8_INTL_SUPPORT static const int kStartBit = (1 << 30); static const int kChunkBits = (1 << 13); diff --git a/deps/v8/src/strings/unicode.h b/deps/v8/src/strings/unicode.h index e8e9cedceeadc9..1d6be916f773ab 100644 --- a/deps/v8/src/strings/unicode.h +++ b/deps/v8/src/strings/unicode.h @@ -213,7 +213,7 @@ class V8_EXPORT_PRIVATE Utf8 { static bool ValidateEncoding(const uint8_t* str, size_t length); template - static bool IsAsciiOneByteString(const Char* buffer, size_t size); + static size_t WriteLeadingAscii(const Char* src, char* dest, size_t size); // Encode the given characters as Utf8 into the provided output buffer. struct EncodingResult { @@ -227,12 +227,12 @@ class V8_EXPORT_PRIVATE Utf8 { }; template <> -inline bool Utf8::IsAsciiOneByteString(const uint8_t* buffer, - size_t size); +size_t Utf8::WriteLeadingAscii(const uint8_t* src, char* dest, + size_t size); template <> -inline bool Utf8::IsAsciiOneByteString(const uint16_t* buffer, - size_t size); +size_t Utf8::WriteLeadingAscii(const uint16_t* src, char* dest, + size_t size); #if V8_ENABLE_WEBASSEMBLY class V8_EXPORT_PRIVATE Wtf8 {