[MediaWiki-commits] [Gerrit] move mediawiki version arg to output file structure (prelude... - change (operations/dumps)

ArielGlenn (Code Review) Thu, 21 Feb 2013 07:28:05 -0800

ArielGlenn has submitted this change and it was merged.

Change subject: move mediawiki version arg to output file structure (prelude to 
writing multiple output files with different versions in the same pass)
......................................................................



move mediawiki version arg to output file structure (prelude to writing
multiple output files with different versions in the same pass)

Change-Id: I941d0773d340f8fdda70d42a66b0edcd8484830b
---
M xmlfileutils/filebuffers.c
M xmlfileutils/mwxml2sql.c
M xmlfileutils/mwxml2sql.h
M xmlfileutils/mwxmlelts.c
M xmlfileutils/sqlutils.c
5 files changed, 40 insertions(+), 28 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/xmlfileutils/filebuffers.c b/xmlfileutils/filebuffers.c
index 96e04fe..428cd68 100644
--- a/xmlfileutils/filebuffers.c
+++ b/xmlfileutils/filebuffers.c
@@ -457,6 +457,9 @@
 /*
   args:
     filename     name of output file
+    mwv          structure with information about the MediaWiki
+                 version for which sql output iin this file will
+                 be produced
 
   returns:
     allocated and filled in output file structure on success
@@ -469,7 +472,7 @@
   if no filename is supplied, the function will assume that
   writes go to stdout and will set things up accordingly
 */
-output_file_t *init_output_file(char *filename) {
+output_file_t *init_output_file(char *filename, mw_version_t *mwv) {
   output_file_t *outf;
 
   outf = (output_file_t *)malloc(sizeof(output_file_t));
@@ -481,6 +484,7 @@
   outf->gzfd = NULL;
   outf->bz2fd = NULL;
   outf->filename = filename;
+  outf->mwv = mwv;
   if (filename == NULL) {
     outf->filetype = PLAINTEXT;
     outf->fd = stdin;
diff --git a/xmlfileutils/mwxml2sql.c b/xmlfileutils/mwxml2sql.c
index 8dde8d6..1e40423 100644
--- a/xmlfileutils/mwxml2sql.c
+++ b/xmlfileutils/mwxml2sql.c
@@ -350,11 +350,11 @@
   }
 
   if (mysql_file == NULL) {
-    mysql_createtables = init_output_file(NULL);
-    mysql_page = init_output_file(NULL);
-    mysql_revs = init_output_file(NULL);
+    mysql_createtables = init_output_file(NULL, mwv);
+    mysql_page = init_output_file(NULL, mwv);
+    mysql_revs = init_output_file(NULL, mwv);
     if (text_file)
-      mysql_text = init_output_file(NULL);
+      mysql_text = init_output_file(NULL, mwv);
   }
   else {
     /* take apart the name if needed and shove in the prefix, then the suffix 
*/
@@ -365,13 +365,13 @@
     sprintf(mysql_page_file, "%s-page.sql%s", filebase, filesuffix);
     sprintf(mysql_revs_file, "%s-revs.sql%s", filebase, filesuffix);
 
-    mysql_createtables = init_output_file(mysql_createtables_file);
-    mysql_page = init_output_file(mysql_page_file);
-    mysql_revs = init_output_file(mysql_revs_file);
+    mysql_createtables = init_output_file(mysql_createtables_file, mwv);
+    mysql_page = init_output_file(mysql_page_file, mwv);
+    mysql_revs = init_output_file(mysql_revs_file, mwv);
 
     if (text_file) {
       sprintf(mysql_text_file, "%s-text.sql%s", filebase, filesuffix);
-      mysql_text = init_output_file(mysql_text_file);
+      mysql_text = init_output_file(mysql_text_file, mwv);
     }
     
     free(filebase);
@@ -388,7 +388,7 @@
     exit(1);
   };
 
-  write_createtables_file(mysql_createtables, nodrop, mwv, tables);
+  write_createtables_file(mysql_createtables, nodrop, tables);
   close_output_file(mysql_createtables);
   if (verbose) fprintf(stderr,"Create tables sql file written, beginning scan 
of xml\n");
 
@@ -399,7 +399,7 @@
     fprintf(stderr,"abrupt end to content\n");
     return(1);
   }
-  result = do_file_header(stubs, 0, &stubs_schema, &s_info, mwv, verbose);
+  result = do_file_header(stubs, 0, &stubs_schema, &s_info, verbose);
   if (result) {
     fprintf(stderr,"error encountered scanning stubs file header\n");
     exit(1);
@@ -410,7 +410,7 @@
       fprintf(stderr,"abrupt end to content\n");
       exit(1);
     }
-    result = do_file_header(text, 1, NULL, NULL, mwv, verbose);
+    result = do_file_header(text, 1, NULL, NULL, verbose);
     if (result) {
       fprintf(stderr,"error encountered scanning text file header\n");
       exit(1);
@@ -425,7 +425,7 @@
   }
 
   while (! eof) {
-    result = do_page(stubs, text, text_compress, mysql_page, mysql_revs, 
mysql_text, verbose, tables, nodrop, start_page_id, mwv);
+    result = do_page(stubs, text, text_compress, mysql_page, mysql_revs, 
mysql_text, verbose, tables, nodrop, start_page_id);
     if (!result) break;
     pages_done++;
     if (verbose && !(pages_done%1000)) fprintf(stderr,"%d pages processed\n", 
pages_done);
diff --git a/xmlfileutils/mwxml2sql.h b/xmlfileutils/mwxml2sql.h
index 8483bb7..612e5d8 100644
--- a/xmlfileutils/mwxml2sql.h
+++ b/xmlfileutils/mwxml2sql.h
@@ -132,6 +132,7 @@
   FILE *fd;
   gzFile gzfd;
   BZFILE *bz2fd;
+  mw_version_t *mwv;
 } output_file_t;
 
 typedef struct {
@@ -208,15 +209,15 @@
 int find_text_in_rev(input_file_t *f);
 char *sql_escape(char *s, int s_size, char *out, int out_size);
 int do_text(input_file_t *f,  output_file_t *sqlt, revision_t *r, int verbose, 
tablenames_t *t, int insrt_ignore, int get_sha1, int get_text_len, int 
text_commpress);
-int do_revision(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int 
verbose, tablenames_t *t, int insert_ignore, mw_version_t *mwv);
+int do_revision(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int 
verbose, tablenames_t *t, int insert_ignore);
 void digits_only(char *buf);
 int find_page_with_id(input_file_t *f, char *id);
-int do_page(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose, 
tablenames_t *t, int insert_ignore, char *start_page_id, mw_version_t *mwv);
+int do_page(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose, 
tablenames_t *t, int insert_ignore, char *start_page_id);
 int do_namespace(input_file_t *f, namespace_t *n, int verbose);
 int do_namespaces(input_file_t *f, siteinfo_t *s, int verbose);
 int do_siteinfo(input_file_t *f, siteinfo_t **s, int verbose);
 int do_mw_header(input_file_t *f, int skipschema, char **schema, int verbose);
-void write_createtables_file(output_file_t *f, int nodrop, mw_version_t *mwv, 
tablenames_t *t);
+void write_createtables_file(output_file_t *f, int nodrop, tablenames_t *t);
 tablenames_t *setup_table_names(char *prefix);
 
 void free_input_buffer(string_t *b);
@@ -226,7 +227,7 @@
 void free_input_file(input_file_t *f);
 void free_output_file(output_file_t *f);
 input_file_t *init_input_file(char *xml_file);
-output_file_t *init_output_file(char *xml_file);
+output_file_t *init_output_file(char *xml_file, mw_version_t *mwv);
 void close_input_file(input_file_t *f);
 void close_output_file(output_file_t *f);
 
@@ -234,7 +235,7 @@
 void usage(char *whoami, char *message);
 char *get_filebase(char *file_name, int verbose);
 char *get_filesuffix(char *file_name, int verbose);
-int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t 
**s, mw_version_t *mwv, int verbose);
+int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t 
**s, int verbose);
 
 char *gzipit(char *contents, int *compressed_length, char *gz_buf, int 
gz_buf_length);
 
diff --git a/xmlfileutils/mwxmlelts.c b/xmlfileutils/mwxmlelts.c
index 5f99b38..bbc5a78 100644
--- a/xmlfileutils/mwxmlelts.c
+++ b/xmlfileutils/mwxmlelts.c
@@ -313,6 +313,9 @@
   char compressed_buf[TEXT_BUF_LEN_PADDED];
   char *compressed_ptr = NULL;
 
+  mw_version_t *mwv;
+
+  mwv = sqlt->mwv; /* unused but we'll want it in the future */
   if (get_sha1) SHA1_Init(&ctx);
 
   ind = strstr(f->in_buf->content, "<text");
@@ -505,8 +508,6 @@
                          prefix like mw_ or what have you instead of just the 
regular names)
      insert_ignore:   0 to write ordinary INSERT statements, 1 to write INSERT 
IGNORE (causes
                          mysql to ignore the insert if a record with the same 
primary key already exists)
-      mwv:            pointer to mw_version_t containing the info about the 
MediaWiki version for
-                         which output will be produced
 
    this function expects content buffer of the stubs file to already be
    filled with the line containing the revision start tag
@@ -526,7 +527,7 @@
      and p->model may be modified using data from
      the corresponding revision fields.
 */
-int do_revision(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int 
verbose, tablenames_t *t, int insert_ignore, mw_version_t *mwv) {
+int do_revision(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int 
verbose, tablenames_t *t, int insert_ignore) {
   char out_buf[TEXT_BUF_LEN*2];
   revision_t r;
   contributor_t c;
@@ -541,6 +542,9 @@
   char value[400];
   int result = 0;
 
+  mw_version_t *mwv;
+
+  mwv = sqlr->mwv;
   if (get_start_tag(stubs, REVISION) == -1) return(0);
 
   if (get_line(stubs) == NULL) {
@@ -864,12 +868,15 @@
        is successfully read
 */
 
-int do_page(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose, 
tablenames_t *t, int insert_ignore, char*start_page_id, mw_version_t *mwv) {
+int do_page(input_file_t *stubs, input_file_t *text, int text_compress, 
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose, 
tablenames_t *t, int insert_ignore, char*start_page_id) {
   page_t p;
   char out_buf[1024]; /* seriously how long can username plus title plus the 
rest of the cruft be? */
   int want_text = 0;
   char escaped_title[FIELD_LEN*2];
   int skip = 0;
+  mw_version_t *mwv;
+
+  mwv = sqlp->mwv;
 
   p.title[0] = '\0';
   p.ns[0] = '\0';
@@ -982,7 +989,7 @@
   }
 
   while (1) {
-    if (!do_revision(stubs, want_text?text:NULL, text_compress, sqlp, sqlr, 
sqlt, &p, verbose, t, insert_ignore, mwv)) break;
+    if (!do_revision(stubs, want_text?text:NULL, text_compress, sqlp, sqlr, 
sqlt, &p, verbose, t, insert_ignore)) break;
     if (get_line(stubs) == NULL) {
       whine("abrupt end of page data");
       return(0);
@@ -1387,8 +1394,6 @@
                   start tag <mediawiki is found
      schema       pointer to preallocated holder for string with schema 
version (e.g. '0.8')
      s            poiter to holder for site info
-     mwv          poiner to structure containing the info about the MediaWiki 
version for
-                  which output will be produced
      verbose      0 for quiet mode, 1 or greater to display info about the 
record
                   as it is being written
 
@@ -1412,7 +1417,7 @@
        structure on success and partially or not filled in at all
        on error.  If not filled in at all, it will be NULL.
 */
-int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t 
**s, mw_version_t *mwv, int verbose) {
+int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t 
**s, int verbose) {
   if (schema && *schema) *schema[0] = '\0';
   if (s) *s = NULL;
   /* make this header optional */
diff --git a/xmlfileutils/sqlutils.c b/xmlfileutils/sqlutils.c
index 6b41b2b..eb3d080 100644
--- a/xmlfileutils/sqlutils.c
+++ b/xmlfileutils/sqlutils.c
@@ -335,15 +335,17 @@
   args:
     f         structure for output file
     nodrop    do not write 'DROP TABLE...' statements (but do write 'INSERT 
IGNORE' statements)
-    mwv       structure with information about the version of MediaWiki for 
which to write output
     t         structure with the names of the tables
 
   this function writes to the specified output file the sql required to create 
the
   page, revision and text tables for the MediaWiki version specified
 
  */
-void write_createtables_file(output_file_t *f, int nodrop, mw_version_t *mwv, 
tablenames_t *t) {
+void write_createtables_file(output_file_t *f, int nodrop, tablenames_t *t) {
   char out_buf[256];
+  mw_version_t *mwv;
+
+  mwv = f->mwv;
 
   if (!nodrop) {
     snprintf(out_buf, sizeof(out_buf), "DROP TABLE IF EXISTS `%s`;\n", 
t->text);

-- 
To view, visit https://gerrit.wikimedia.org/r/47427
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I941d0773d340f8fdda70d42a66b0edcd8484830b
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] move mediawiki version arg to output file structure (prelude... - change (operations/dumps)

Reply via email to