17 #ifndef __BYTESTRIE_H__ 18 #define __BYTESTRIE_H__ 27 #if U_SHOW_CPLUSPLUS_API 38 class BytesTrieBuilder;
72 : ownedArray_(nullptr), bytes_(static_cast<const uint8_t *>(trieBytes)),
73 pos_(bytes_), remainingMatchLength_(-1) {}
88 : ownedArray_(nullptr), bytes_(other.bytes_),
89 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
98 remainingMatchLength_=-1;
111 return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
112 static_cast<uint64_t
>(pos_ - bytes_);
130 remainingMatchLength_ =
static_cast<int32_t
>(state >> kState64RemainingShift) - 2;
131 pos_ = bytes_ + (state & kState64PosMask);
150 const uint8_t *bytes;
152 int32_t remainingMatchLength;
165 state.remainingMatchLength=remainingMatchLength_;
180 if(bytes_==state.bytes && bytes_!=
nullptr) {
182 remainingMatchLength_=state.remainingMatchLength;
204 remainingMatchLength_=-1;
208 return nextImpl(bytes_, inByte);
247 const uint8_t *pos=pos_;
248 int32_t leadByte=*pos++;
250 return readValue(pos, leadByte>>1);
263 const uint8_t *pos=pos_;
265 return pos!=
nullptr && findUniqueValue(pos+remainingMatchLength_+1,
false, uniqueValue);
276 int32_t getNextBytes(
ByteSink &out)
const;
327 UBool hasNext()
const;
357 UBool truncateAndStop();
359 const uint8_t *branchNext(
const uint8_t *pos, int32_t length,
UErrorCode &errorCode);
361 const uint8_t *bytes_;
363 const uint8_t *initialPos_;
364 int32_t remainingMatchLength_;
365 int32_t initialRemainingMatchLength_;
383 friend class ::BytesTrieTest;
391 BytesTrie(
void *adoptBytes,
const void *trieBytes)
392 : ownedArray_(static_cast<uint8_t *>(adoptBytes)),
393 bytes_(static_cast<const uint8_t *>(trieBytes)),
394 pos_(bytes_), remainingMatchLength_(-1) {}
397 BytesTrie &operator=(
const BytesTrie &other) =
delete;
405 static int32_t readValue(
const uint8_t *pos, int32_t leadByte);
406 static inline const uint8_t *skipValue(
const uint8_t *pos, int32_t leadByte) {
408 if(leadByte>=(kMinTwoByteValueLead<<1)) {
409 if(leadByte<(kMinThreeByteValueLead<<1)) {
411 }
else if(leadByte<(kFourByteValueLead<<1)) {
414 pos+=3+((leadByte>>1)&1);
419 static inline const uint8_t *skipValue(
const uint8_t *pos) {
420 int32_t leadByte=*pos++;
421 return skipValue(pos, leadByte);
425 static const uint8_t *jumpByDelta(
const uint8_t *pos);
427 static inline const uint8_t *skipDelta(
const uint8_t *pos) {
428 int32_t delta=*pos++;
429 if(delta>=kMinTwoByteDeltaLead) {
430 if(delta<kMinThreeByteDeltaLead) {
432 }
else if(delta<kFourByteDeltaLead) {
446 UStringTrieResult branchNext(
const uint8_t *pos, int32_t length, int32_t inByte);
454 static const uint8_t *findUniqueValueFromBranch(
const uint8_t *pos, int32_t length,
455 UBool haveUniqueValue, int32_t &uniqueValue);
458 static UBool findUniqueValue(
const uint8_t *pos,
UBool haveUniqueValue, int32_t &uniqueValue);
462 static void getNextBranchBytes(
const uint8_t *pos, int32_t length, ByteSink &out);
463 static void append(ByteSink &out,
int c);
504 static const int32_t kMaxBranchLinearSubNodeLength=5;
507 static const int32_t kMinLinearMatch=0x10;
508 static const int32_t kMaxLinearMatchLength=0x10;
515 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
517 static const int32_t kValueIsFinal=1;
520 static const int32_t kMinOneByteValueLead=kMinValueLead/2;
521 static const int32_t kMaxOneByteValue=0x40;
523 static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;
524 static const int32_t kMaxTwoByteValue=0x1aff;
526 static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;
527 static const int32_t kFourByteValueLead=0x7e;
530 static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
532 static const int32_t kFiveByteValueLead=0x7f;
535 static const int32_t kMaxOneByteDelta=0xbf;
536 static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1;
537 static const int32_t kMinThreeByteDeltaLead=0xf0;
538 static const int32_t kFourByteDeltaLead=0xfe;
539 static const int32_t kFiveByteDeltaLead=0xff;
541 static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;
542 static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;
548 static constexpr int32_t kState64RemainingShift = 59;
549 static constexpr uint64_t kState64PosMask = (
UINT64_C(1) << kState64RemainingShift) - 1;
551 uint8_t *ownedArray_;
554 const uint8_t *bytes_;
561 int32_t remainingMatchLength_;
568 #endif // __BYTESTRIE_H__ BytesTrie & resetToState64(uint64_t state)
Resets this trie to the saved state.
Builder class for BytesTrie.
BytesTrie state object, for saving a trie's current state and resetting the trie back to this state l...
UStringTrieResult
Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
BytesTrie(const void *trieBytes)
Constructs a BytesTrie reader instance.
A ByteSink can be filled with bytes.
UBool hasUniqueValue(int32_t &uniqueValue) const
Determines whether all byte sequences reachable from the current state map to the same value...
Light-weight, non-const reader class for a BytesTrie.
BytesTrie & reset()
Resets this trie to its initial state.
C++ API: StringPiece: Read-only byte string wrapper class.
UStringTrieResult first(int32_t inByte)
Traverses the trie from the initial state for this input byte.
int32_t getValue() const
Returns a matching byte sequence's value if called immediately after current()/first()/next() returne...
BytesTrie(const BytesTrie &other)
Copy constructor, copies the other trie reader object and its state, but not the byte array which wil...
State()
Constructs an empty State.
Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
C++ API: Common ICU base class UObject.
uint64_t getState64() const
Returns the state of this trie as a 64-bit integer.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
C API: Helper definitions for dictionary trie APIs.
const BytesTrie & saveState(State &state) const
Saves the state of this trie.
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
The input unit(s) continued a matching string and there is a value for the string so far...
#define UINT64_C(c)
Provides a platform independent way to specify an unsigned 64-bit integer constant.
A string-like object that points to a sized piece of memory.
BytesTrie & resetToState(const State &state)
Resets this trie to the saved state.
UMemory is the common ICU base class.
int8_t UBool
The ICU boolean type, a signed-byte integer.