yiguolei commented on code in PR #64667: URL: https://github.com/apache/doris/pull/64667#discussion_r3489001273
########## be/src/storage/index/inverted/analyzer/kuromoji/dict/darts.h: ########## @@ -0,0 +1,2001 @@ +// Vendored from https://github.com/s-yata/darts-clone (include/darts.h, v0.32). +// Copyright (c) 2008-2014, Susumu Yata. Licensed under the 2-clause BSD license. +// See dist/licenses/LICENSE-darts-clone.txt. Unmodified except this header note +// and the `#pragma GCC system_header` below (so this third-party header is +// exempt from Doris's -Wall -Werror -Wpedantic when included). +#ifndef DARTS_H_ +#define DARTS_H_ + +// Treat the rest of this third-party header as a system header: suppress all +// warnings it would otherwise raise under Doris's strict compile flags. +#pragma GCC system_header + +#include <cstdio> +#include <exception> +#include <new> + +#define DARTS_VERSION "0.32" + +// DARTS_THROW() throws a <Darts::Exception> whose message starts with the +// file name and the line number. For example, DARTS_THROW("error message") at +// line 123 of "darts.h" throws a <Darts::Exception> which has a pointer to +// "darts.h:123: exception: error message". The message is available by using +// what() as well as that of <std::exception>. +#define DARTS_INT_TO_STR(value) #value +#define DARTS_LINE_TO_STR(line) DARTS_INT_TO_STR(line) +#define DARTS_LINE_STR DARTS_LINE_TO_STR(__LINE__) +#define DARTS_THROW(msg) throw Darts::Details::Exception( \ + __FILE__ ":" DARTS_LINE_STR ": exception: " msg) + +namespace Darts { + +// The following namespace hides the internal types and classes. +namespace Details { + +// This header assumes that <int> and <unsigned int> are 32-bit integer types. +// +// Darts-clone keeps values associated with keys. The type of the values is +// <value_type>. Note that the values must be positive integers because the +// most significant bit (MSB) of each value is used to represent whether the +// corresponding unit is a leaf or not. Also, the keys are represented by +// sequences of <char_type>s. <uchar_type> is the unsigned type of <char_type>. +typedef char char_type; +typedef unsigned char uchar_type; +typedef int value_type; + +// The main structure of Darts-clone is an array of <DoubleArrayUnit>s, and the +// unit type is actually a wrapper of <id_type>. +typedef unsigned int id_type; + +// <progress_func_type> is the type of callback functions for reporting the +// progress of building a dictionary. See also build() of <DoubleArray>. +// The 1st argument receives the progress value and the 2nd argument receives +// the maximum progress value. A usage example is to show the progress +// percentage, 100.0 * (the 1st argument) / (the 2nd argument). +typedef int (*progress_func_type)(std::size_t, std::size_t); + +// <DoubleArrayUnit> is the type of double-array units and it is a wrapper of +// <id_type> in practice. +class DoubleArrayUnit { + public: + DoubleArrayUnit() : unit_() {} + + // has_leaf() returns whether a leaf unit is immediately derived from the + // unit (true) or not (false). + bool has_leaf() const { + return ((unit_ >> 8) & 1) == 1; + } + // value() returns the value stored in the unit, and thus value() is + // available when and only when the unit is a leaf unit. + value_type value() const { + return static_cast<value_type>(unit_ & ((1U << 31) - 1)); + } + + // label() returns the label associted with the unit. Note that a leaf unit + // always returns an invalid label. For this feature, leaf unit's label() + // returns an <id_type> that has the MSB of 1. + id_type label() const { + return unit_ & ((1U << 31) | 0xFF); + } + // offset() returns the offset from the unit to its derived units. + id_type offset() const { + return (unit_ >> 10) << ((unit_ & (1U << 9)) >> 6); + } + + private: + id_type unit_; + + // Copyable. +}; + +// Darts-clone throws an <Exception> for memory allocation failure, invalid +// arguments or a too large offset. The last case means that there are too many +// keys in the given set of keys. Note that the `msg' of <Exception> must be a +// constant or static string because an <Exception> keeps only a pointer to +// that string. +class Exception : public std::exception { Review Comment: No, I think you should modify it and using Doris's exception. "our builder already catches the darts error as std::exception and converts to Status" we add this convertor because clucene is a seperate lib for Doris BE. We have a plan to move it to doris's code and modify it. In this case we will unify the error status and exception logic to doris's. And also in doris's exception and status, we have some logic for example the call stack. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
