Petr Onderka has submitted this change and it was merged.
Change subject: Fix comments early, so that check for no changes works right
......................................................................
Fix comments early, so that check for no changes works right
+ handle another case of overlong comment
Change-Id: Id6424a9e44ba1351396bb7ba994899c088742cb3
---
M DumpObjects/DumpTraits.h
M DumpWriters/DumpWriter.cpp
M DumpWriters/DumpWriter.h
3 files changed, 22 insertions(+), 11 deletions(-)
Approvals:
Petr Onderka: Verified; Looks good to me, approved
diff --git a/DumpObjects/DumpTraits.h b/DumpObjects/DumpTraits.h
index ba7f3cf..45a686e 100644
--- a/DumpObjects/DumpTraits.h
+++ b/DumpObjects/DumpTraits.h
@@ -219,17 +219,7 @@
if (length > 255)
{
- // invalid UTF-8 at the end of a string is represented as U+FFFD
- // this can get string over 255 bytes, so that character needs to
be removed
-
- string replacementChar = "\xEF\xBF\xBD"; // UTF-8 encoded U+FFFD
REPLACEMENT CHARACTER
- if (value.substr(value.length() - 3) == replacementChar)
- {
- string fixedValue = value.substr(0, value.length() - 3);
- Write(stream, fixedValue);
- }
- else
- throw DumpException();
+ throw DumpException();
}
else
{
diff --git a/DumpWriters/DumpWriter.cpp b/DumpWriters/DumpWriter.cpp
index 6183c4c..4d899e9 100644
--- a/DumpWriters/DumpWriter.cpp
+++ b/DumpWriters/DumpWriter.cpp
@@ -20,6 +20,24 @@
page.Title.erase(0, namespapceName.length());
}
+void DumpWriter::NormalizeComment(Revision& revision)
+{
+ std::string &comment = revision.Comment;
+ if (comment.length() > 255)
+ {
+ // invalid UTF-8 at the end of a string is represented as U+FFFD
+ // this can get string over 255 bytes, so that character needs to be
removed
+
+ std::string replacementChar = "\xEF\xBF\xBD"; // UTF-8 encoded U+FFFD
REPLACEMENT CHARACTER
+ if (comment.substr(comment.length() - 3) == replacementChar)
+ comment.erase(comment.length() - 3, 3);
+ else if (comment.substr(comment.length() - 6) == replacementChar +
"...")
+ comment.pop_back();
+ else
+ throw DumpException();
+ }
+}
+
DumpWriter::DumpWriter(std::shared_ptr<WritableDump> dump, bool withText,
std::unique_ptr<DiffWriter> diffWriter)
: dump(dump), withText(withText), diffWriter(std::move(diffWriter))
{
@@ -63,6 +81,8 @@
DumpRevision dumpRevision(dump, revision->RevisionId, false);
dumpRevision.revision = *revision;
+ NormalizeComment(dumpRevision.revision);
+
if (diffWriter != nullptr)
{
bool isNew;
diff --git a/DumpWriters/DumpWriter.h b/DumpWriters/DumpWriter.h
index a1fee36..2d99090 100644
--- a/DumpWriters/DumpWriter.h
+++ b/DumpWriters/DumpWriter.h
@@ -20,6 +20,7 @@
bool withText;
void RemoveNamespace(Page& page);
+ void NormalizeComment(Revision& revision);
public:
DumpWriter(std::shared_ptr<WritableDump> dump, bool withText,
std::unique_ptr<DiffWriter> diffWriter = nullptr);
--
To view, visit https://gerrit.wikimedia.org/r/83786
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Id6424a9e44ba1351396bb7ba994899c088742cb3
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
Gerrit-Reviewer: Petr Onderka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits