jenkins-bot has submitted this change and it was merged.

Change subject: Test fill_table_monuments_all.sql with monuments_config
......................................................................


Test fill_table_monuments_all.sql with monuments_config

Parse fill_table_monuments_all.sql and do some basic validation
both alone and in combination with the contents of monuments_config.py.

This is in preparation for dealing with T136704 and T55813.

Also:
* Correct name of monuments_config test file and class.

Change-Id: I7cbc91884df6a985562e92bb231d7ee964537f5f
---
M erfgoedbot/sql/fill_table_monuments_all.sql
A tests/test_fill_table.py
R tests/test_monuments_config.py
3 files changed, 159 insertions(+), 29 deletions(-)

Approvals:
  Jean-Frédéric: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/erfgoedbot/sql/fill_table_monuments_all.sql 
b/erfgoedbot/sql/fill_table_monuments_all.sql
index 5d1cf9b..f992ee5 100644
--- a/erfgoedbot/sql/fill_table_monuments_all.sql
+++ b/erfgoedbot/sql/fill_table_monuments_all.sql
@@ -1004,34 +1004,6 @@
     `registrant_url` AS `registrant_url`
     FROM `monuments_de-he_(de)`;
 
-/* Nordrhein-Westfalen (North-Rhine Westphalia), Germany in German - disabled, 
was a hack on the Toolserver
-REPLACE INTO
-  `monuments_all_tmp` (
-    `country`, `lang`, `id`, `adm0`, `adm1`, `adm2`, `adm3`, `adm4`, `name`, 
`address`, `municipality`, `lat`, `lon`, `lat_int`, `lon_int`, `image`, 
`commonscat`, `source`, `changed`, `monument_article`, `registrant_url`
-  ) SELECT
-    'de-nrw' AS `country`,
-    'de' AS `lang`,
-    `id` AS `id`,
-    'de' AS `adm0`,
-    'de-nw' AS `adm1`,
-    `area` AS `adm2`,
-    '' AS `adm3`,
-    '' AS `adm4`,
-    `name` AS `name`,
-    `address` AS `address`,
-    `municipality` AS `municipality`,
-    `lat` AS `lat`,
-    `lon` AS `lon`,
-     ROUND(`lat` * @granularity) AS `lat_int`,
-     ROUND(`lon` * @granularity) AS `lon_int`,
-    `image` AS `image`,
-    '' AS `commonscat`,
-    `source` AS `source`,
-    `changed` AS `changed`,
-    `monument_article` AS `monument_article`,
-    `registrant_url` AS `registrant_url`
-    FROM u_wiegels_mon_p.`monuments_de-nrw_(de)`; */
-
 /* Bergheim, NRW, Germany in German */
 REPLACE INTO
   `monuments_all_tmp` (
diff --git a/tests/test_fill_table.py b/tests/test_fill_table.py
new file mode 100644
index 0000000..dafd03b
--- /dev/null
+++ b/tests/test_fill_table.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8  -*-
+"""Validation for fill_table_monuments_all.sql with monuments_config.py."""
+
+import unittest
+import re
+from erfgoedbot import monuments_config as config
+
+
+def isolate_dataset_entries(text):
+    """Parse sql to identify replacement groups."""
+    data = {}
+    results = re.findall('REPLACE INTO\n  `monuments_all_tmp`(.*?)'
+                         'SELECT(.*?)'
+                         'FROM `(.*?)`',
+                         text, re.DOTALL | re.MULTILINE)
+
+    for result in results:
+        to_replace = result[0].strip('\n ()`').split('`, `')
+        replaced, sources = isolate_dataset_data(result[1])
+        data[result[2]] = {'to_replace': to_replace,
+                           'replaced': replaced,
+                           'sources': sources}
+
+    return data
+
+
+def isolate_dataset_data(text):
+    """Identify replaced fields and source fields, if any."""
+    replaced = set()
+    sources = set()
+    entries = text.strip().split('\n')
+    for entry in entries:
+        entry = entry.split(' AS ')
+        replaced |= set(re.findall('`(.*?)`', entry[1]))
+        sources |= set(re.findall('`(.*?)`', entry[0]))
+    return list(replaced), list(sources)
+
+
+class TestSQLTestParser(unittest.TestCase):
+
+    """Test the sql parser utilised in later tests."""
+
+    def test_isolate_dataset_entries(self):
+        """Test isolate_dataset_entries used in later tests."""
+        indata = """
+SomeText
+REPLACE INTO
+  `monuments_all_tmp` (
+    `tr_1`, `tr_2`, `tr_3`
+  ) SELECT
+    `s_1` AS `r_1`,
+    'something' AS `r_2`,
+    '' AS `r_3`,
+    NULL AS `r_4`,
+    ROUND(`s_2` * @granularity) AS `r_5`,
+    `s_3` AS `r_6`, /* Comment 1 */
+    `s_4` AS `r_7` -- Comment 2
+    CONCATENATE(`s_5`, `s_6`) AS `r_7`
+    FROM `monuments_some_(table)`;
+SomeText
+            """
+        expected_table = 'monuments_some_(table)'
+        expected_to_replace = ['tr_1', 'tr_2', 'tr_3']
+        expected_replaced = ['r_1', 'r_2', 'r_3', 'r_4', 'r_5', 'r_6', 'r_7']
+        expected_sources = ['s_1', 's_2', 's_3', 's_4', 's_5', 's_6']
+        data = isolate_dataset_entries(indata)
+        self.assertItemsEqual(data.keys(), [expected_table, ])
+        self.assertItemsEqual(data[expected_table]['to_replace'],
+                              expected_to_replace)
+        self.assertItemsEqual(data[expected_table]['replaced'],
+                              expected_replaced)
+        self.assertItemsEqual(data[expected_table]['sources'],
+                              expected_sources)
+
+
+class TestFillTableMonumentsBase(unittest.TestCase):
+
+    """Base class introducing assert_all_in method."""
+
+    def assert_all_in(self, first, second, msg=None):
+        """Test that all first is in second, else append failing to msg."""
+        failing = []
+        for i in first:
+            try:
+                self.assertIn(i, second, msg=msg)
+            except AssertionError:
+                failing.append(i)
+        self.assertEqual(failing, [], msg=msg % ', '.join(failing))
+
+
+class FillTableMonumentsValidation(TestFillTableMonumentsBase):
+
+    """Validate fill_table_monuments_all.sql."""
+
+    def setUp(self):
+        with open('erfgoedbot/sql/fill_table_monuments_all.sql', 'r') as f:
+            self.text = f.read()
+        self.data = isolate_dataset_entries(self.text)
+
+    def test_fill_table_monuments_all_replaced(self):
+        """Ensure all variables stated to be replaced are in fact replaced."""
+        self.longMessage = True
+        for table, dataset in self.data.iteritems():
+            self.assertItemsEqual(
+                dataset['to_replace'], dataset['replaced'], msg=table)
+
+    def test_fill_table_monuments_all_required_replacements(self):
+        """Ensure the required variables are replaced, at least."""
+        required = [
+            'source', 'changed', 'lat_int', 'lon_int',
+            'country', 'lang', 'id', 'adm0']
+        for table, dataset in self.data.iteritems():
+            msg = '%s in fill_table_monuments_all ' % table
+            msg += 'missing required variable(s): %s'
+            self.assert_all_in(required, dataset['replaced'], msg=msg)
+
+
+class TestFillTableMonumentsOntoMonumentsConfig(TestFillTableMonumentsBase):
+
+    """Compatibility of fill_table_monuments_all.sql with monuments_config."""
+
+    def setUp(self):
+        with open('erfgoedbot/sql/fill_table_monuments_all.sql', 'r') as f:
+            self.text = f.read()
+        self.data = isolate_dataset_entries(self.text)
+        self.process_config_tables()
+
+    def process_config_tables(self):
+        """Identify tables in monuments_config."""
+        self.config_tables = []
+        self.config_lookup = {}
+        for key, data in config.countries.iteritems():
+            table = data['table']
+            if table.startswith('monuments'):  # i.e. not wlpa
+                self.config_tables.append(table)
+                self.config_lookup[table] = key
+
+    def get_config_field_dests(self, table):
+        """Return field destinations for a given table in monuments_config."""
+        key = self.config_lookup[table]
+        dest = []
+        for field in config.countries[key]['fields']:
+            dest.append(field['dest'])
+        return dest
+
+    def test_fill_table_monuments_all_tables_present(self):
+        """Ensure all needed tables are present in monuments_config."""
+        msg = '%s in fill_table_monuments_all not present in monuments_config'
+        self.assert_all_in(self.data.keys(), self.config_tables, msg=msg)
+
+    def test_fill_table_monuments_all_source_in_config(self):
+        """Ensure all sources are present in the corresponding config entry."""
+        for table, dataset in self.data.iteritems():
+            msg = '%s in fill_table_monuments_all ' % table
+            msg += 'expects missing field(s): %s'
+            dest = self.get_config_field_dests(table)
+            dest += ['source', 'changed']  # implicitly defined
+            self.assert_all_in(dataset['sources'], dest, msg=msg)
diff --git a/tests/test_monument_config.py b/tests/test_monuments_config.py
similarity index 98%
rename from tests/test_monument_config.py
rename to tests/test_monuments_config.py
index b221c8e..c3c3b14 100644
--- a/tests/test_monument_config.py
+++ b/tests/test_monuments_config.py
@@ -5,7 +5,7 @@
 from erfgoedbot import monuments_config as config
 
 
-class TestMonumentConfigValidation(unittest.TestCase):
+class TestMonumentsConfigValidation(unittest.TestCase):
 
     """Test that monuments_config is valid."""
 

-- 
To view, visit https://gerrit.wikimedia.org/r/292532
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7cbc91884df6a985562e92bb231d7ee964537f5f
Gerrit-PatchSet: 3
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil <[email protected]>
Gerrit-Reviewer: Jean-Frédéric <[email protected]>
Gerrit-Reviewer: Lokal Profil <[email protected]>
Gerrit-Reviewer: Multichill <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to