Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common.gypi
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

# Reset this number to 0 on major V8 upgrades.
# Increment by one for each non-official patch applied to deps/v8.
'v8_embedder_string': '-node.12',
'v8_embedder_string': '-node.14',

##### V8 defaults for Node.js #####

Expand Down
54 changes: 3 additions & 51 deletions deps/v8/src/api/api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@
#include "src/objects/intl-objects.h"
#endif // V8_INTL_SUPPORT

#include "src/strings/string-hasher-inl.h"

#if V8_OS_LINUX || V8_OS_DARWIN || V8_OS_FREEBSD
#include <signal.h>
#include <unistd.h>
Expand Down Expand Up @@ -5575,28 +5577,6 @@ bool String::IsOneByte() const {
return Utils::OpenDirectHandle(this)->IsOneByteRepresentation();
}

// Helpers for ContainsOnlyOneByteHelper
template <size_t size>
struct OneByteMask;
template <>
struct OneByteMask<4> {
static const uint32_t value = 0xFF00FF00;
};
template <>
struct OneByteMask<8> {
static const uint64_t value = 0xFF00'FF00'FF00'FF00;
};
static const uintptr_t kOneByteMask = OneByteMask<sizeof(uintptr_t)>::value;
static const uintptr_t kAlignmentMask = sizeof(uintptr_t) - 1;
static inline bool Unaligned(const uint16_t* chars) {
return reinterpret_cast<const uintptr_t>(chars) & kAlignmentMask;
}

static inline const uint16_t* Align(const uint16_t* chars) {
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(chars) &
~kAlignmentMask);
}

class ContainsOnlyOneByteHelper {
public:
ContainsOnlyOneByteHelper() : is_one_byte_(true) {}
Expand All @@ -5613,35 +5593,7 @@ class ContainsOnlyOneByteHelper {
// Nothing to do.
}
void VisitTwoByteString(const uint16_t* chars, int length) {
// Accumulated bits.
uintptr_t acc = 0;
// Align to uintptr_t.
const uint16_t* end = chars + length;
while (Unaligned(chars) && chars != end) {
acc |= *chars++;
}
// Read word aligned in blocks,
// checking the return value at the end of each block.
const uint16_t* aligned_end = Align(end);
const int increment = sizeof(uintptr_t) / sizeof(uint16_t);
const int inner_loops = 16;
while (chars + inner_loops * increment < aligned_end) {
for (int i = 0; i < inner_loops; i++) {
acc |= *reinterpret_cast<const uintptr_t*>(chars);
chars += increment;
}
// Check for early return.
if ((acc & kOneByteMask) != 0) {
is_one_byte_ = false;
return;
}
}
// Read the rest.
while (chars != end) {
acc |= *chars++;
}
// Check result.
if ((acc & kOneByteMask) != 0) is_one_byte_ = false;
is_one_byte_ = internal::detail::IsOnly8Bit(chars, length);
}

private:
Expand Down
22 changes: 10 additions & 12 deletions deps/v8/src/strings/string-hasher-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@
#include "src/common/globals.h"
#include "src/utils/utils.h"

#ifdef __SSE2__
#include <emmintrin.h>
#elif defined(__ARM_NEON__)
#include <arm_neon.h>
#endif

// Comment inserted to prevent header reordering.
#include <type_traits>

Expand All @@ -43,14 +37,18 @@ uint32_t ConvertRawHashToUsableHash(T raw_hash) {
}

V8_INLINE bool IsOnly8Bit(const uint16_t* chars, unsigned len) {
// TODO(leszeks): This could be SIMD for efficiency on large strings, if we
// need it.
for (unsigned i = 0; i < len; ++i) {
if (chars[i] > 255) {
return false;
// For small strings, use a simple scalar loop to avoid SIMD overhead.
// Threshold of 16 is chosen to balance setup cost vs benefit.
if (len <= 16) {
for (unsigned i = 0; i < len; i++) {
if (chars[i] > 0xFF) {
return false;
}
}
return true;
}
return true;
// For larger strings, use the non-inlined SIMD implementation.
return IsOnly8BitSIMD(chars, len);
}

V8_INLINE uint64_t GetRapidHash(const uint8_t* chars, uint32_t length,
Expand Down
28 changes: 28 additions & 0 deletions deps/v8/src/strings/string-hasher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "src/strings/string-hasher.h"

#include "hwy/highway.h"
#include "src/strings/string-hasher-inl.h"

namespace v8::internal {
Expand Down Expand Up @@ -73,6 +74,33 @@ uint64_t HashConvertingTo8Bit(const uint16_t* chars, uint32_t length,
return rapidhash<ConvertTo8BitHashReader>(
reinterpret_cast<const uint8_t*>(chars), length, seed, secret);
}

bool IsOnly8BitSIMD(const uint16_t* chars, unsigned len) {
namespace hw = hwy::HWY_NAMESPACE;
hw::FixedTag<uint16_t, 8> tag;
const size_t stride = hw::Lanes(tag);
const auto high_byte_mask = hw::Set(tag, static_cast<uint16_t>(0xFF00));
const auto zero = hw::Zero(tag);

const uint16_t* end = chars + len;
while (chars + stride <= end) {
const auto data = hw::LoadU(tag, chars);
const auto high_bytes = hw::And(data, high_byte_mask);
const auto cmp = hw::Eq(high_bytes, zero);
if (!hw::AllTrue(tag, cmp)) {
return false;
}
chars += stride;
}
// Handle remaining characters.
while (chars < end) {
if (*chars > 0xFF) {
return false;
}
chars++;
}
return true;
}
} // namespace detail

} // namespace v8::internal
6 changes: 6 additions & 0 deletions deps/v8/src/strings/string-hasher.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ class Vector;

namespace internal {

namespace detail {
// Non-inlined SIMD implementation for checking if a uint16_t string contains
// only Latin1 characters. Used by the inline IsOnly8Bit wrapper.
V8_EXPORT_PRIVATE bool IsOnly8BitSIMD(const uint16_t* chars, unsigned len);
} // namespace detail

// A simple incremental string hasher. Slow but allows for special casing each
// individual character.
class RunningStringHasher final {
Expand Down
20 changes: 4 additions & 16 deletions deps/v8/src/strings/unicode-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,6 @@ bool Utf8::IsValidCharacter(uchar c) {
c != kBadChar);
}

template <>
bool Utf8::IsAsciiOneByteString<uint8_t>(const uint8_t* buffer, size_t size) {
return simdutf::validate_ascii(reinterpret_cast<const char*>(buffer), size);
}

template <>
bool Utf8::IsAsciiOneByteString<uint16_t>(const uint16_t* buffer, size_t size) {
return false;
}

template <typename Char>
Utf8::EncodingResult Utf8::Encode(v8::base::Vector<const Char> string,
char* buffer, size_t capacity,
Expand All @@ -234,12 +224,10 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector<const Char> string,
size_t read_index = 0;
if (kSourceIsOneByte) {
size_t writeable = std::min(string.size(), content_capacity);
// Just memcpy when possible.
if (writeable > 0 && Utf8::IsAsciiOneByteString(characters, writeable)) {
memcpy(buffer, characters, writeable);
read_index = writeable;
write_index = writeable;
}
size_t ascii_length =
Utf8::WriteLeadingAscii(characters, buffer, writeable);
read_index = ascii_length;
write_index = ascii_length;
}
uint16_t last = Utf16::kNoPreviousCharacter;
for (; read_index < string.size(); read_index++) {
Expand Down
39 changes: 39 additions & 0 deletions deps/v8/src/strings/unicode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,49 @@
#include "unicode/uchar.h"
#endif

#include "hwy/highway.h"
#include "third_party/simdutf/simdutf.h"

namespace unibrow {

template <>
size_t Utf8::WriteLeadingAscii<uint8_t>(const uint8_t* src, char* dest,
size_t length) {
namespace hw = hwy::HWY_NAMESPACE;
const hw::ScalableTag<int8_t> d;
const size_t N = hw::Lanes(d);
// Don't bother with simd if the string isn't long enough. We're using 2
// registers, so don't enter the loop unless we can iterate 2 times through.
if (length < 4 * N) {
return 0;
}
// We're checking ascii by checking the sign bit so make the strings signed.
const int8_t* src_s = reinterpret_cast<const int8_t*>(src);
int8_t* dst_s = reinterpret_cast<int8_t*>(dest);
size_t i = 0;
DCHECK_GE(length, 2 * N);
for (; i <= length - 2 * N; i += 2 * N) {
const auto v0 = hw::LoadU(d, src_s + i);
const auto v1 = hw::LoadU(d, src_s + i + N);
const auto combined = hw::Or(v0, v1);
bool is_ascii = hw::AllTrue(d, hw::Ge(combined, hw::Zero(d)));
if (is_ascii) {
hw::StoreU(v0, d, dst_s + i);
hw::StoreU(v1, d, dst_s + i + N);
} else {
break;
}
}
return i;
}

template <>
size_t Utf8::WriteLeadingAscii<uint16_t>(const uint16_t* src, char* dest,
size_t size) {
// TODO(dcarney): this could be implemented similarly to the one byte variant
return 0;
}

#ifndef V8_INTL_SUPPORT
static const int kStartBit = (1 << 30);
static const int kChunkBits = (1 << 13);
Expand Down
18 changes: 9 additions & 9 deletions deps/v8/src/strings/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,15 @@ class V8_EXPORT_PRIVATE Utf8 {
static bool ValidateEncoding(const uint8_t* str, size_t length);

template <typename Char>
static bool IsAsciiOneByteString(const Char* buffer, size_t size);
static size_t WriteLeadingAscii(const Char* src, char* dest, size_t size);

template <>
size_t WriteLeadingAscii<uint8_t>(const uint8_t* src, char* dest,
size_t size);

template <>
size_t WriteLeadingAscii<uint16_t>(const uint16_t* src, char* dest,
size_t size);

// Encode the given characters as Utf8 into the provided output buffer.
struct EncodingResult {
Expand All @@ -226,14 +234,6 @@ class V8_EXPORT_PRIVATE Utf8 {
bool replace_invalid_utf8);
};

template <>
inline bool Utf8::IsAsciiOneByteString<uint8_t>(const uint8_t* buffer,
size_t size);

template <>
inline bool Utf8::IsAsciiOneByteString<uint16_t>(const uint16_t* buffer,
size_t size);

#if V8_ENABLE_WEBASSEMBLY
class V8_EXPORT_PRIVATE Wtf8 {
public:
Expand Down
Loading