commit edca2e0c4aa4a4371dfe2b430455d1b7a64de54d
Author: Kornel Benko <[email protected]>
Date: Sat Oct 13 21:02:53 2018 +0200
Amend 7a03fa6: Advanced search with format:
In the latexified text:
* Check and handle contained regex properly
* Discard superfluos '{' preventing our search engine
to match with the search pattern
---
src/lyxfind.cpp | 244 +++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 203 insertions(+), 41 deletions(-)
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index 03a8d20..03d1221 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -998,7 +998,8 @@ class LangInfo {
: par(par),
_tokenend(0),
_dataEnd(0),
- actualdeptindex(0)
+ actualdeptindex(0),
+ ignoreidx(-1)
{
valid = Invalid;
_tokenstart = start;
@@ -1011,6 +1012,12 @@ class LangInfo {
maxoffset = par.length();
if (!search1.empty())
_search = search1;
+ regexPossible = false;
+ size_t tmpreg = par.find("\\regexp{", _tokenstart);
+ if (tmpreg != string::npos) {
+ if (tmpreg < maxoffset)
+ regexPossible = true;
+ }
}
bool nextInfo(); // of the same type, from the last start in the same
reagion
bool firstInfo(string search, int datastart);
@@ -1029,6 +1036,7 @@ class LangInfo {
void addIntervall(int low, int upper); /* if explicit */
void handleParentheses(int lastpos);
string show(int lastpos);
+ bool discardParethesizedInBlock(int start);
private:
string par;
string _search;
@@ -1044,6 +1052,10 @@ class LangInfo {
int actualdeptindex;
int ignoreIntervalls[10][2];
int ignoreidx;
+ bool regexPossible;
+ void adaptIgnoringParts(bool useOld = false);
+ int nextNotIgored(int start);
+ bool discarSuperfluousParentheses(int start);
};
void LangInfo::setDataEnd(int dataend)
@@ -1060,15 +1072,53 @@ void LangInfo::setDataEnd(int dataend)
_dataEnd = dataend;
}
+/*
+ * Try to reuse the ignoring parts from parent
+ *
+ */
+void LangInfo::adaptIgnoringParts(bool useOld)
+{
+ if (useOld && (ignoreidx >= 0)) {
+ int idx;
+ for (idx = 0; idx <= ignoreidx; idx++) {
+ if (ignoreIntervalls[idx][1] >= _dataStart) {
+ if (ignoreIntervalls[idx][0] < _dataStart)
+ ignoreIntervalls[idx][0] = _dataStart;
+ break;
+ }
+ }
+ if (idx > 0) {
+ for (int i = 0; i <= ignoreidx - idx; i++) {
+ ignoreIntervalls[i][0] = ignoreIntervalls[idx+i][0];
+ ignoreIntervalls[i][1] = ignoreIntervalls[idx+i][1];
+ }
+ ignoreidx -= idx;
+ }
+ }
+ else {
+ ignoreidx = 0;
+ ignoreIntervalls[ignoreidx][0] = _dataStart;
+ if ((par[_dataStart] == '{') && (par[_dataStart+1] == '}')) {
+ // First candidates to be ignored
+ ignoreIntervalls[ignoreidx][1] = _dataStart+2;
+ }
+ else
+ ignoreIntervalls[ignoreidx][1] = _dataStart;
+ }
+}
+
void LangInfo::setDataStart(int datastart)
{
+ bool reUse = true; /* Reuse previous ignoring intervalls
*/
if (datastart < _tokenend) {
_dataStart = _tokenend;
// cout << "Wrong data start, too low\n";
+ reUse = false;
}
else if (size_t(datastart) > par.length()) {
// cout << "Wrong data start, too high\n";
_dataStart = par.length();
+ reUse = false;
}
else
_dataStart = datastart;
@@ -1077,16 +1127,13 @@ void LangInfo::setDataStart(int datastart)
depts[0] = _dataStart;
closes[0] = -1;
depts[1] = _dataStart;
- ignoreidx = 0;
- ignoreIntervalls[ignoreidx][0] = _dataStart;
- if ((par[_dataStart] == '{') && (par[_dataStart+1] == '}')) {
- // First candidates to be ignored
- ignoreIntervalls[ignoreidx][1] = _dataStart+2;
- }
- else
- ignoreIntervalls[ignoreidx][1] = _dataStart;
+ adaptIgnoringParts(reUse);
}
+/*
+ * Keep the list of actual opened parentheses actual
+ * (e.g. depth == 4 means there are 4 '{' not processed yet)
+ */
void LangInfo::handleParentheses(int lastpos)
{
int skip = 0;
@@ -1102,7 +1149,7 @@ void LangInfo::handleParentheses(int lastpos)
}
else if (c == '}') {
if (actualdeptindex <= 0) {
- LYXERR0("ERROR ERROR ERROR"); /* should never happen! */
+ LYXERR(Debug::FIND, "ERROR ERROR ERROR"); /* should never happen! */
}
else {
closes[actualdeptindex] = i+1;
@@ -1112,6 +1159,10 @@ void LangInfo::handleParentheses(int lastpos)
}
}
+/*
+ * Expand the region of ignored parts of the input latex string
+ * The region is only relevant in output()
+ */
void LangInfo::addIntervall(int low, int upper)
{
int idx;
@@ -1127,8 +1178,9 @@ void LangInfo::addIntervall(int low, int upper)
}
else {
// Expand only if one of the new bounds is inside the interwall
- if (((low <= ignoreIntervalls[idx][1]) && (low >=
ignoreIntervalls[idx][0])) ||
- ((upper <= ignoreIntervalls[idx][1]) && (upper >=
ignoreIntervalls[idx][0]))) {
+ // or
+ if (((low <= ignoreIntervalls[idx][1]) && (upper >=
ignoreIntervalls[idx][1])) ||
+ ((low <= ignoreIntervalls[idx][0]) && (upper >=
ignoreIntervalls[idx][0]))) {
if (low < ignoreIntervalls[idx][0])
ignoreIntervalls[idx][0] = low;
if (upper > ignoreIntervalls[idx][1])
@@ -1144,7 +1196,7 @@ void LangInfo::addIntervall(int upper)
if (actualdeptindex >= 0)
low = depts[actualdeptindex]; /* the position of last unclosed '{' */
else {
- LYXERR0("ERROR ERROR ERROR2");
+ LYXERR(Debug::FIND, "ERROR ERROR ERROR2");
low = upper;
}
addIntervall(low, upper);
@@ -1172,6 +1224,18 @@ string LangInfo::show(int lastpos)
return os.str();
}
+int LangInfo::nextNotIgored(int start)
+{
+ int idx = 0; /* int intervalls */
+ for (idx = 0; idx <= ignoreidx; idx++) {
+ if (start < ignoreIntervalls[idx][0])
+ return(start);
+ if (start < ignoreIntervalls[idx][1])
+ start = ignoreIntervalls[idx][1];
+ }
+ return start;
+}
+
void LangInfo::output(ostringstream &os, int lastpos)
{
// get number of chars to output
@@ -1196,11 +1260,14 @@ void LangInfo::output(ostringstream &os, int lastpos)
for (int i = _dataStart; i < lastpos;) {
if (i <= ignoreIntervalls[idx][0]) {
os << par.substr(i, ignoreIntervalls[idx][0] - i);
+ handleParentheses(ignoreIntervalls[idx][0]);
i = ignoreIntervalls[idx][1];
}
idx++;
if (idx > ignoreidx) {
- os << par.substr(i, lastpos-i);
+ if (lastpos > i) {
+ os << par.substr(i, lastpos-i);
+ }
break;
}
}
@@ -1261,52 +1328,121 @@ bool LangInfo::firstInfo(string search1, int datastart)
_search = search1;
valid = Invalid;
}
+ else {
+ // This is a clone
+ _tokenstart = datastart;
+ valid = Invalid;
+ }
}
return nextInfo();
}
+bool LangInfo::discardParethesizedInBlock(int start)
+{
+ int depth = 0;
+ int skip = 0;
+ bool isempty = true;
+
+ size_t regex_start, regex_end;
+ if (regexPossible) {
+ regex_start = par.find("\\regexp{", start);
+ if (regex_start == string::npos)
+ regexPossible = false;
+ else if (regex_start >= size_t(_dataEnd))
+ regexPossible = false;
+ else {
+ regex_end = par.find("\\endregexp{}}", regex_start + 8);
+ if (regex_end == string::npos)
+ regexPossible = false;
+ else if (regex_end > size_t(_dataEnd))
+ regexPossible = false;
+ }
+ }
+ if (!regexPossible) {
+ regex_start = _dataEnd;
+ regex_end = _dataEnd;
+ }
+ for (int i = start; i < _dataEnd; i += 1+skip) {
+ char c = par[i];
+ skip = 0;
+ if (c == '\\') {
+ if (size_t(i) == regex_start) {
+ // 12 is correct, even if the length of "\\endregexp{}}" is 13
+ skip = regex_end + 12 - i;
+ }
+ else
+ skip = 1;
+ isempty = false;
+ }
+ else if (c == '{') {
+ if (depth == 0) {
+ addIntervall(i, i+1);
+ // cout << "discard '{' at " << i << "\n";
+ }
+ else
+ isempty = false;
+ depth++;
+ }
+ else if (c == '}') {
+ if (depth == 1) {
+ addIntervall(i, i+1);
+ // cout << "discard '}' at " << i << "\n";
+ }
+ else if (depth < 1)
+ break;
+ depth--;
+ }
+ else
+ isempty = false;
+ }
+ return(isempty);
+}
+
+bool LangInfo::discarSuperfluousParentheses(int start)
+{
+ start = nextNotIgored(start);
+ while ((par[start] == '{') && (start < _dataEnd)) {
+ discardParethesizedInBlock(start);
+ start = nextNotIgored(start+1);
+ }
+ // It is empty if (par[start] == '}')
+ return ((start >= _dataEnd) || (par[start] == '}'));
+}
+
void LangInfo::process(ostringstream &os)
{
LangInfo color(*this);
(void) color.firstInfo("\\textcolor{", _dataStart);
while (color.isValid() && (color.getStart() < _dataEnd)) {
- bool isEmpty = false;
- if (color.getDataStart() == color.getEnd()) {
- // Empty, e.g. par[color.getDataStart()] == '}'
- isEmpty = true;
- }
- else if ((par[color.getDataStart()] == '{') &&
(par[color.getDataStart()+1] == '}')) {
- // color starts with '{}', discard it
- if (color.getDataStart()+2 == color.getEnd())
- isEmpty = true;
- else {
- // discard the first '{}'
- addIntervall(color.getDataStart(), color.getDataStart()+2);
- }
- }
+ bool isEmpty = discarSuperfluousParentheses(color.getDataStart());
if (isEmpty) {
- // it is emty, so ignore and go to next color
+ // it is empty, so ignore and go to next color
addIntervall(color.getStart(), color.getEnd()+1);
+ int start = color.getEnd()+1;
+ discarSuperfluousParentheses(start);
}
- else {
+ else { // not empty
if (par[color.getStart()-1] != '{') {
+ // We are not at start
output(os, color.getStart());
addIntervall(color.getStart());
}
// Check if color empty
output(os, color.getEnd()+1);
addIntervall(color.getEnd()+1);
- }
- for (int i = color.getEnd()+1; par[i] == '}'; i++) {
- handleParentheses(i+1);
- addIntervall(i+1);
+ discarSuperfluousParentheses(color.getEnd()+1);
}
color.nextInfo();
}
- if (par[_dataEnd] != '}')
- output(os, _dataEnd-1);
- else
- output(os, _dataEnd);
+ int start;
+ if (color.valid == LastValid)
+ start = color.getEnd()+1;
+ else {
+ // Apparently nothing output so far
+ start = _dataStart;
+ }
+ discarSuperfluousParentheses(start);
+ output(os, _dataEnd);
}
/*
@@ -1318,15 +1454,30 @@ string splitForColors(string par) {
LangInfo firstLanguage(par, "\\foreignlanguage{");
if (firstLanguage.firstInfo("\\foreignlanguage{", 0)) {
LangInfo nextLanguage(firstLanguage);
- nextLanguage.setDataEnd(firstLanguage.getDataStart());
if (nextLanguage.firstInfo("\\foreignlanguage{",
firstLanguage.getTokenEnd())) {
firstLanguage.setDataEnd(nextLanguage.getStart());
}
+ else {
+ int oldend = firstLanguage.getEnd();
+ firstLanguage.setDataEnd(par.length());
+ // discard old closing
+ firstLanguage.addIntervall(oldend, oldend+1);
+ for (int i = 1; i < firstLanguage.getEnd(); i++) {
+ if (par[i] == '{')
+ firstLanguage.discardParethesizedInBlock(i);
+ else
+ break;
+ }
+ }
firstLanguage.process(os);
+ // For the case, that the first language ends unexpected
+ int lastgapstart = firstLanguage.getEnd()+1;
while (nextLanguage.isValid()) {
nextLanguage.process(os);
// To handle the gap, we need the end of last languuage to start of next
int gapstart = nextLanguage.getEnd()+1;
+ if (gapstart > lastgapstart)
+ lastgapstart = gapstart;
int gapend;
nextLanguage.nextInfo();
if (nextLanguage.isValid())
@@ -1339,8 +1490,16 @@ string splitForColors(string par) {
firstLanguage.setDataEnd(gapend);
firstLanguage.setDataStart(gapstart);
firstLanguage.process(os);
+ lastgapstart = gapend+1;
}
}
+ if (size_t(lastgapstart) < par.length()) {
+ int lastgapend = par.length();
+ // cout << "Found last gap, size = " << lastgapend - lastgapstart <<
"\n";
+ firstLanguage.setDataEnd(lastgapend);
+ firstLanguage.setDataStart(lastgapstart);
+ firstLanguage.process(os);
+ }
}
string s = os.str();
return s;
@@ -1363,8 +1522,9 @@ static string correctlanguagesetting(string par, bool
from_regex, bool withforma
parlen--;
}
string result = removefontinfo(par.substr(0, parlen));
+ LYXERR(Debug::FIND, "input: \"" << result, << "\n");
result = splitForColors(result);
- LYXERR(Debug::FIND, "Converted: \"" << result << "\"");
+ LYXERR(Debug::FIND, "After split: \"" << result << "\"");
bool handle_colors = false;
if (from_regex) {
missed = 0;
@@ -1695,7 +1855,9 @@ string MatchStringAdv::normalize(docstring const & s,
bool hack_braces) const
// Kornel: Added textsl, textsf, textit, texttt and noun
// + allow to seach for colored text too
LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{},
\\*section{} macros from: " << t);
- while (regex_replace(t, t,
"\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{(\\{\\})?\\})+",
""))
+ while (regex_replace(t, t,
"\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+",
""))
+ LYXERR(Debug::FIND, " further removing stale empty \\emph{},
\\textbf{} macros from: " << t);
+ while (regex_replace(t, t,
"\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
LYXERR(Debug::FIND, " further removing stale empty \\emph{},
\\textbf{} macros from: " << t);
while (regex_replace(t, t,
"\\\\(foreignlanguage|textcolor)\\{[a-z]+\\}(\\{(\\\\item |\\{\\})?\\})+", ""));