Petr Onderka has submitted this change and it was merged.

Change subject: Fix comments early, so that check for no changes works right
......................................................................


Fix comments early, so that check for no changes works right

+ handle another case of overlong comment

Change-Id: Id6424a9e44ba1351396bb7ba994899c088742cb3
---
M DumpObjects/DumpTraits.h
M DumpWriters/DumpWriter.cpp
M DumpWriters/DumpWriter.h
3 files changed, 22 insertions(+), 11 deletions(-)

Approvals:
  Petr Onderka: Verified; Looks good to me, approved



diff --git a/DumpObjects/DumpTraits.h b/DumpObjects/DumpTraits.h
index ba7f3cf..45a686e 100644
--- a/DumpObjects/DumpTraits.h
+++ b/DumpObjects/DumpTraits.h
@@ -219,17 +219,7 @@
 
         if (length > 255)
         {
-            // invalid UTF-8 at the end of a string is represented as U+FFFD
-            // this can get string over 255 bytes, so that character needs to 
be removed
-
-            string replacementChar = "\xEF\xBF\xBD"; // UTF-8 encoded U+FFFD 
REPLACEMENT CHARACTER
-            if (value.substr(value.length() - 3) == replacementChar)
-            {
-                string fixedValue = value.substr(0, value.length() - 3);
-                Write(stream, fixedValue);
-            }
-            else
-                throw DumpException();
+            throw DumpException();
         }
         else
         {
diff --git a/DumpWriters/DumpWriter.cpp b/DumpWriters/DumpWriter.cpp
index 6183c4c..4d899e9 100644
--- a/DumpWriters/DumpWriter.cpp
+++ b/DumpWriters/DumpWriter.cpp
@@ -20,6 +20,24 @@
     page.Title.erase(0, namespapceName.length());
 }
 
+void DumpWriter::NormalizeComment(Revision& revision)
+{
+    std::string &comment = revision.Comment;
+    if (comment.length() > 255)
+    {
+        // invalid UTF-8 at the end of a string is represented as U+FFFD
+        // this can get string over 255 bytes, so that character needs to be 
removed
+
+        std::string replacementChar = "\xEF\xBF\xBD"; // UTF-8 encoded U+FFFD 
REPLACEMENT CHARACTER
+        if (comment.substr(comment.length() - 3) == replacementChar)
+            comment.erase(comment.length() - 3, 3);
+        else if (comment.substr(comment.length() - 6) == replacementChar + 
"...")
+            comment.pop_back();
+        else
+            throw DumpException();
+    }
+}
+
 DumpWriter::DumpWriter(std::shared_ptr<WritableDump> dump, bool withText, 
std::unique_ptr<DiffWriter> diffWriter)
     : dump(dump), withText(withText), diffWriter(std::move(diffWriter))
 {
@@ -63,6 +81,8 @@
     DumpRevision dumpRevision(dump, revision->RevisionId, false);
     dumpRevision.revision = *revision;
 
+    NormalizeComment(dumpRevision.revision);
+
     if (diffWriter != nullptr)
     {
         bool isNew;
diff --git a/DumpWriters/DumpWriter.h b/DumpWriters/DumpWriter.h
index a1fee36..2d99090 100644
--- a/DumpWriters/DumpWriter.h
+++ b/DumpWriters/DumpWriter.h
@@ -20,6 +20,7 @@
     bool withText;
 
     void RemoveNamespace(Page& page);
+    void NormalizeComment(Revision& revision);
 public:
     DumpWriter(std::shared_ptr<WritableDump> dump, bool withText, 
std::unique_ptr<DiffWriter> diffWriter = nullptr);
 

-- 
To view, visit https://gerrit.wikimedia.org/r/83786
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Id6424a9e44ba1351396bb7ba994899c088742cb3
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <[email protected]>
Gerrit-Reviewer: Petr Onderka <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to