Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (280743 => 280744)
--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2021-08-06 23:21:07 UTC (rev 280743)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2021-08-07 00:02:51 UTC (rev 280744)
@@ -36,6 +36,7 @@
#include "YarrMatchingContextHolder.h"
#include <wtf/ASCIICType.h>
#include <wtf/HexNumber.h>
+#include <wtf/ListDump.h>
#include <wtf/Threading.h>
@@ -50,6 +51,15 @@
JSC_ANNOTATE_JIT_OPERATION(_JITTarget_vmEntryToYarrJITAfter, vmEntryToYarrJITAfter);
#endif
+void BoyerMooreFastCandidates::dump(PrintStream& out) const
+{
+ if (!isValid()) {
+ out.print("isValid:(false)");
+ return;
+ }
+ out.print("isValid:(true),characters:(", listDump(m_characters), ")");
+}
+
class BoyerMooreInfo {
WTF_MAKE_NONCOPYABLE(BoyerMooreInfo);
WTF_MAKE_FAST_ALLOCATED(BoyerMooreInfo);
@@ -56,8 +66,9 @@
public:
static constexpr unsigned maxLength = 32;
- explicit BoyerMooreInfo(unsigned length)
+ explicit BoyerMooreInfo(CharSize charSize, unsigned length)
: m_characters(length)
+ , m_charSize(charSize)
{
ASSERT(this->length() <= maxLength);
}
@@ -71,7 +82,7 @@
void set(unsigned index, UChar32 character)
{
- m_characters[index].add(character);
+ m_characters[index].add(m_charSize, character);
}
void setAll(unsigned index)
@@ -81,26 +92,27 @@
void addCharacters(unsigned index, const Vector<UChar32>& characters)
{
- m_characters[index].addCharacters(characters);
+ m_characters[index].addCharacters(m_charSize, characters);
}
void addRanges(unsigned index, const Vector<CharacterRange>& range)
{
- m_characters[index].addRanges(range);
+ m_characters[index].addRanges(m_charSize, range);
}
- static UniqueRef<BoyerMooreInfo> create(unsigned length)
+ static UniqueRef<BoyerMooreInfo> create(CharSize charSize, unsigned length)
{
- return makeUniqueRef<BoyerMooreInfo>(length);
+ return makeUniqueRef<BoyerMooreInfo>(charSize, length);
}
std::optional<std::tuple<unsigned, unsigned>> findWorthwhileCharacterSequenceForLookahead() const;
- std::tuple<BoyerMooreBitmap::Map, bool> createCandidateBitmap(unsigned begin, unsigned end) const;
+ std::tuple<BoyerMooreBitmap::Map, BoyerMooreFastCandidates> createCandidateBitmap(unsigned begin, unsigned end) const;
private:
std::tuple<int, unsigned, unsigned> findBestCharacterSequence(unsigned numberOfCandidatesLimit) const;
Vector<BoyerMooreBitmap> m_characters;
+ CharSize m_charSize;
};
std::tuple<int, unsigned, unsigned> BoyerMooreInfo::findBestCharacterSequence(unsigned numberOfCandidatesLimit) const
@@ -158,16 +170,16 @@
return std::tuple { begin, end };
}
-std::tuple<BoyerMooreBitmap::Map, bool> BoyerMooreInfo::createCandidateBitmap(unsigned begin, unsigned end) const
+std::tuple<BoyerMooreBitmap::Map, BoyerMooreFastCandidates> BoyerMooreInfo::createCandidateBitmap(unsigned begin, unsigned end) const
{
BoyerMooreBitmap::Map map { };
- bool isMaskEffective = false;
+ BoyerMooreFastCandidates charactersFastPath;
for (unsigned index = begin; index < end; ++index) {
auto& bmBitmap = m_characters[index];
map.merge(bmBitmap.map());
- isMaskEffective |= bmBitmap.isMaskEffective();
+ charactersFastPath.merge(bmBitmap.charactersFastPath());
}
- return std::tuple { map, isMaskEffective };
+ return std::tuple { WTFMove(map), WTFMove(charactersFastPath) };
}
class YarrGenerator final : public YarrJITInfo, private MacroAssembler {
@@ -2409,7 +2421,7 @@
auto [beginIndex, endIndex] = *range;
ASSERT(endIndex <= alternative->m_minimumSize);
- auto [map, isMaskEffective] = op.m_bmInfo->createCandidateBitmap(beginIndex, endIndex);
+ auto [map, charactersFastPath] = op.m_bmInfo->createCandidateBitmap(beginIndex, endIndex);
unsigned mapCount = map.count();
// If candiate characters are <= 2, checking each is better than using vector.
JumpList outOfLengthFailure;
@@ -2418,23 +2430,15 @@
// Patterns like /[]/ have zero candidates. Since it is rare, we do not do nothing for now.
if (!mapCount)
break;
- if (mapCount <= 2) {
- UChar32 character1 = map.findBit(0, true);
- ASSERT(character1 != BoyerMooreBitmap::Map::size());
- UChar32 character2 = 0xff;
- if (mapCount == 2) {
- character2 = map.findBit(character1 + 1, true);
- ASSERT(character2 != BoyerMooreBitmap::Map::size());
- }
- dataLogLnIf(Options::verboseRegExpCompilation(), "Found 1-or-2 characters lookahead character:(0x", hex(character1), "),character2:(", hex(character2), "),isMaskEffective:(", isMaskEffective,"),range:[", beginIndex, ", ", endIndex, ")");
+ if (charactersFastPath.isValid() && !charactersFastPath.isEmpty()) {
+ static_assert(BoyerMooreFastCandidates::maxSize == 2);
+ dataLogLnIf(Options::verboseRegExpCompilation(), "Found characters fastpath lookahead ", charactersFastPath, " range:[", beginIndex, ", ", endIndex, ")");
auto loopHead = label();
readCharacter(m_checkedOffset - endIndex + 1, regT0);
- if (isMaskEffective)
- and32(TrustedImm32(BoyerMooreBitmap::mapMask), regT0, regT0);
- matched.append(branch32(Equal, regT0, TrustedImm32(character1)));
- if (mapCount == 2)
- matched.append(branch32(Equal, regT0, TrustedImm32(character2)));
+ matched.append(branch32(Equal, regT0, TrustedImm32(charactersFastPath.at(0))));
+ if (charactersFastPath.size() > 1)
+ matched.append(branch32(Equal, regT0, TrustedImm32(charactersFastPath.at(1))));
outOfLengthFailure.append(jumpIfNoAvailableInput(endIndex - beginIndex));
jump().linkTo(loopHead, this);
} else {
@@ -3895,7 +3899,7 @@
// FIXME: Support unicode flag.
// https://bugs.webkit.org/show_bug.cgi?id=228611
if (disjunction->m_minimumSize && !m_pattern.sticky() && !m_pattern.unicode()) {
- auto bmInfo = BoyerMooreInfo::create(std::min<unsigned>(disjunction->m_minimumSize, BoyerMooreInfo::maxLength));
+ auto bmInfo = BoyerMooreInfo::create(m_charSize, std::min<unsigned>(disjunction->m_minimumSize, BoyerMooreInfo::maxLength));
if (collectBoyerMooreInfo(disjunction, currentAlternativeIndex, bmInfo.get())) {
m_ops.last().m_bmInfo = bmInfo.ptr();
m_bmInfos.append(WTFMove(bmInfo));
@@ -3975,11 +3979,6 @@
++cursor;
continue;
}
- if (characterClass.m_table) {
- bmInfo.setAll(cursor);
- ++cursor;
- continue;
- }
if (!characterClass.m_rangesUnicode.isEmpty())
bmInfo.addRanges(cursor, characterClass.m_rangesUnicode);
if (!characterClass.m_matchesUnicode.isEmpty())
Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.h (280743 => 280744)
--- trunk/Source/_javascript_Core/yarr/YarrJIT.h 2021-08-06 23:21:07 UTC (rev 280743)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.h 2021-08-07 00:02:51 UTC (rev 280744)
@@ -60,6 +60,56 @@
ExecutableMemoryAllocationFailure,
};
+class BoyerMooreFastCandidates {
+ WTF_MAKE_FAST_ALLOCATED(BoyerMooreFastCandidates);
+public:
+ static constexpr unsigned maxSize = 2;
+ using CharacterVector = Vector<UChar32, maxSize>;
+
+ BoyerMooreFastCandidates() = default;
+
+ bool isValid() const { return m_isValid; }
+ void invalidate()
+ {
+ m_characters.clear();
+ m_isValid = false;
+ }
+
+ bool isEmpty() const { return m_characters.isEmpty(); }
+ unsigned size() const { return m_characters.size(); }
+ UChar32 at(unsigned index) const { return m_characters.at(index); }
+
+ void add(UChar32 character)
+ {
+ if (!isValid())
+ return;
+ if (!m_characters.contains(character)) {
+ if (m_characters.size() < maxSize)
+ m_characters.append(character);
+ else
+ invalidate();
+ }
+ }
+
+ void merge(const BoyerMooreFastCandidates& other)
+ {
+ if (!isValid())
+ return;
+ if (!other.isValid()) {
+ invalidate();
+ return;
+ }
+ for (unsigned index = 0; index < other.size(); ++index)
+ add(other.at(index));
+ }
+
+ void dump(PrintStream&) const;
+
+private:
+ CharacterVector m_characters;
+ bool m_isValid { true };
+};
+
class BoyerMooreBitmap {
WTF_MAKE_NONCOPYABLE(BoyerMooreBitmap);
WTF_MAKE_FAST_ALLOCATED(BoyerMooreBitmap);
@@ -72,48 +122,62 @@
unsigned count() const { return m_count; }
const Map& map() const { return m_map; }
- bool isMaskEffective() const { return m_isMaskEffective; }
+ const BoyerMooreFastCandidates& charactersFastPath() const { return m_charactersFastPath; }
- void add(UChar32 character)
+ bool add(CharSize charSize, UChar32 character)
{
if (isAllSet())
- return;
+ return false;
+ if (charSize == CharSize::Char8 && character > 0xff)
+ return true;
+ m_charactersFastPath.add(character);
unsigned position = character & mapMask;
- if (position != static_cast<unsigned>(character))
- m_isMaskEffective = true;
if (!m_map.get(position)) {
m_map.set(position);
++m_count;
}
+ return !isAllSet();
}
- void addCharacters(const Vector<UChar32>& characters)
+ void addCharacters(CharSize charSize, const Vector<UChar32>& characters)
{
if (isAllSet())
return;
- if (characters.size() >= mapSize) {
- setAll();
- return;
+ ASSERT(std::is_sorted(characters));
+ for (UChar32 character : characters) {
+ // Early return since characters are sorted.
+ if (charSize == CharSize::Char8 && character > 0xff)
+ return;
+ if (!add(charSize, character))
+ return;
}
- for (UChar32 character : characters)
- add(character);
}
- void addRanges(const Vector<CharacterRange>& ranges)
+ void addRanges(CharSize charSize, const Vector<CharacterRange>& ranges)
{
- if (ranges.size() >= mapSize) {
- setAll();
+ if (isAllSet())
return;
- }
+ ASSERT(std::is_sorted(characters, [](CharacterRange lhs, CharacterRange rhs) {
+ return lhs.begin < rhs.begin;
+ }));
for (CharacterRange range : ranges) {
- if (isAllSet())
- return;
- if (static_cast<unsigned>(range.end - range.begin + 1) >= mapSize) {
+ auto begin = range.begin;
+ auto end = range.end;
+ if (charSize == CharSize::Char8) {
+ // Early return since ranges are sorted.
+ if (begin > 0xff)
+ return;
+ if (end > 0xff)
+ end = 0xff;
+ }
+ if (static_cast<unsigned>(end - begin + 1) >= mapSize) {
setAll();
return;
}
- for (UChar32 character = range.begin; character <= range.end; ++character)
- add(character);
+ for (UChar32 character = begin; character <= end; ++character) {
+ if (!add(charSize, character))
+ return;
+ }
}
}
@@ -126,8 +190,8 @@
private:
Map m_map { };
+ BoyerMooreFastCandidates m_charactersFastPath;
unsigned m_count { 0 };
- bool m_isMaskEffective { false };
};
#if CPU(ARM64E)