ArielGlenn has submitted this change and it was merged.
Change subject: move mediawiki version arg to output file structure (prelude to
writing multiple output files with different versions in the same pass)
......................................................................
move mediawiki version arg to output file structure (prelude to writing
multiple output files with different versions in the same pass)
Change-Id: I941d0773d340f8fdda70d42a66b0edcd8484830b
---
M xmlfileutils/filebuffers.c
M xmlfileutils/mwxml2sql.c
M xmlfileutils/mwxml2sql.h
M xmlfileutils/mwxmlelts.c
M xmlfileutils/sqlutils.c
5 files changed, 40 insertions(+), 28 deletions(-)
Approvals:
ArielGlenn: Verified; Looks good to me, approved
diff --git a/xmlfileutils/filebuffers.c b/xmlfileutils/filebuffers.c
index 96e04fe..428cd68 100644
--- a/xmlfileutils/filebuffers.c
+++ b/xmlfileutils/filebuffers.c
@@ -457,6 +457,9 @@
/*
args:
filename name of output file
+ mwv structure with information about the MediaWiki
+ version for which sql output iin this file will
+ be produced
returns:
allocated and filled in output file structure on success
@@ -469,7 +472,7 @@
if no filename is supplied, the function will assume that
writes go to stdout and will set things up accordingly
*/
-output_file_t *init_output_file(char *filename) {
+output_file_t *init_output_file(char *filename, mw_version_t *mwv) {
output_file_t *outf;
outf = (output_file_t *)malloc(sizeof(output_file_t));
@@ -481,6 +484,7 @@
outf->gzfd = NULL;
outf->bz2fd = NULL;
outf->filename = filename;
+ outf->mwv = mwv;
if (filename == NULL) {
outf->filetype = PLAINTEXT;
outf->fd = stdin;
diff --git a/xmlfileutils/mwxml2sql.c b/xmlfileutils/mwxml2sql.c
index 8dde8d6..1e40423 100644
--- a/xmlfileutils/mwxml2sql.c
+++ b/xmlfileutils/mwxml2sql.c
@@ -350,11 +350,11 @@
}
if (mysql_file == NULL) {
- mysql_createtables = init_output_file(NULL);
- mysql_page = init_output_file(NULL);
- mysql_revs = init_output_file(NULL);
+ mysql_createtables = init_output_file(NULL, mwv);
+ mysql_page = init_output_file(NULL, mwv);
+ mysql_revs = init_output_file(NULL, mwv);
if (text_file)
- mysql_text = init_output_file(NULL);
+ mysql_text = init_output_file(NULL, mwv);
}
else {
/* take apart the name if needed and shove in the prefix, then the suffix
*/
@@ -365,13 +365,13 @@
sprintf(mysql_page_file, "%s-page.sql%s", filebase, filesuffix);
sprintf(mysql_revs_file, "%s-revs.sql%s", filebase, filesuffix);
- mysql_createtables = init_output_file(mysql_createtables_file);
- mysql_page = init_output_file(mysql_page_file);
- mysql_revs = init_output_file(mysql_revs_file);
+ mysql_createtables = init_output_file(mysql_createtables_file, mwv);
+ mysql_page = init_output_file(mysql_page_file, mwv);
+ mysql_revs = init_output_file(mysql_revs_file, mwv);
if (text_file) {
sprintf(mysql_text_file, "%s-text.sql%s", filebase, filesuffix);
- mysql_text = init_output_file(mysql_text_file);
+ mysql_text = init_output_file(mysql_text_file, mwv);
}
free(filebase);
@@ -388,7 +388,7 @@
exit(1);
};
- write_createtables_file(mysql_createtables, nodrop, mwv, tables);
+ write_createtables_file(mysql_createtables, nodrop, tables);
close_output_file(mysql_createtables);
if (verbose) fprintf(stderr,"Create tables sql file written, beginning scan
of xml\n");
@@ -399,7 +399,7 @@
fprintf(stderr,"abrupt end to content\n");
return(1);
}
- result = do_file_header(stubs, 0, &stubs_schema, &s_info, mwv, verbose);
+ result = do_file_header(stubs, 0, &stubs_schema, &s_info, verbose);
if (result) {
fprintf(stderr,"error encountered scanning stubs file header\n");
exit(1);
@@ -410,7 +410,7 @@
fprintf(stderr,"abrupt end to content\n");
exit(1);
}
- result = do_file_header(text, 1, NULL, NULL, mwv, verbose);
+ result = do_file_header(text, 1, NULL, NULL, verbose);
if (result) {
fprintf(stderr,"error encountered scanning text file header\n");
exit(1);
@@ -425,7 +425,7 @@
}
while (! eof) {
- result = do_page(stubs, text, text_compress, mysql_page, mysql_revs,
mysql_text, verbose, tables, nodrop, start_page_id, mwv);
+ result = do_page(stubs, text, text_compress, mysql_page, mysql_revs,
mysql_text, verbose, tables, nodrop, start_page_id);
if (!result) break;
pages_done++;
if (verbose && !(pages_done%1000)) fprintf(stderr,"%d pages processed\n",
pages_done);
diff --git a/xmlfileutils/mwxml2sql.h b/xmlfileutils/mwxml2sql.h
index 8483bb7..612e5d8 100644
--- a/xmlfileutils/mwxml2sql.h
+++ b/xmlfileutils/mwxml2sql.h
@@ -132,6 +132,7 @@
FILE *fd;
gzFile gzfd;
BZFILE *bz2fd;
+ mw_version_t *mwv;
} output_file_t;
typedef struct {
@@ -208,15 +209,15 @@
int find_text_in_rev(input_file_t *f);
char *sql_escape(char *s, int s_size, char *out, int out_size);
int do_text(input_file_t *f, output_file_t *sqlt, revision_t *r, int verbose,
tablenames_t *t, int insrt_ignore, int get_sha1, int get_text_len, int
text_commpress);
-int do_revision(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int
verbose, tablenames_t *t, int insert_ignore, mw_version_t *mwv);
+int do_revision(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int
verbose, tablenames_t *t, int insert_ignore);
void digits_only(char *buf);
int find_page_with_id(input_file_t *f, char *id);
-int do_page(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose,
tablenames_t *t, int insert_ignore, char *start_page_id, mw_version_t *mwv);
+int do_page(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose,
tablenames_t *t, int insert_ignore, char *start_page_id);
int do_namespace(input_file_t *f, namespace_t *n, int verbose);
int do_namespaces(input_file_t *f, siteinfo_t *s, int verbose);
int do_siteinfo(input_file_t *f, siteinfo_t **s, int verbose);
int do_mw_header(input_file_t *f, int skipschema, char **schema, int verbose);
-void write_createtables_file(output_file_t *f, int nodrop, mw_version_t *mwv,
tablenames_t *t);
+void write_createtables_file(output_file_t *f, int nodrop, tablenames_t *t);
tablenames_t *setup_table_names(char *prefix);
void free_input_buffer(string_t *b);
@@ -226,7 +227,7 @@
void free_input_file(input_file_t *f);
void free_output_file(output_file_t *f);
input_file_t *init_input_file(char *xml_file);
-output_file_t *init_output_file(char *xml_file);
+output_file_t *init_output_file(char *xml_file, mw_version_t *mwv);
void close_input_file(input_file_t *f);
void close_output_file(output_file_t *f);
@@ -234,7 +235,7 @@
void usage(char *whoami, char *message);
char *get_filebase(char *file_name, int verbose);
char *get_filesuffix(char *file_name, int verbose);
-int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t
**s, mw_version_t *mwv, int verbose);
+int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t
**s, int verbose);
char *gzipit(char *contents, int *compressed_length, char *gz_buf, int
gz_buf_length);
diff --git a/xmlfileutils/mwxmlelts.c b/xmlfileutils/mwxmlelts.c
index 5f99b38..bbc5a78 100644
--- a/xmlfileutils/mwxmlelts.c
+++ b/xmlfileutils/mwxmlelts.c
@@ -313,6 +313,9 @@
char compressed_buf[TEXT_BUF_LEN_PADDED];
char *compressed_ptr = NULL;
+ mw_version_t *mwv;
+
+ mwv = sqlt->mwv; /* unused but we'll want it in the future */
if (get_sha1) SHA1_Init(&ctx);
ind = strstr(f->in_buf->content, "<text");
@@ -505,8 +508,6 @@
prefix like mw_ or what have you instead of just the
regular names)
insert_ignore: 0 to write ordinary INSERT statements, 1 to write INSERT
IGNORE (causes
mysql to ignore the insert if a record with the same
primary key already exists)
- mwv: pointer to mw_version_t containing the info about the
MediaWiki version for
- which output will be produced
this function expects content buffer of the stubs file to already be
filled with the line containing the revision start tag
@@ -526,7 +527,7 @@
and p->model may be modified using data from
the corresponding revision fields.
*/
-int do_revision(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int
verbose, tablenames_t *t, int insert_ignore, mw_version_t *mwv) {
+int do_revision(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, page_t *p, int
verbose, tablenames_t *t, int insert_ignore) {
char out_buf[TEXT_BUF_LEN*2];
revision_t r;
contributor_t c;
@@ -541,6 +542,9 @@
char value[400];
int result = 0;
+ mw_version_t *mwv;
+
+ mwv = sqlr->mwv;
if (get_start_tag(stubs, REVISION) == -1) return(0);
if (get_line(stubs) == NULL) {
@@ -864,12 +868,15 @@
is successfully read
*/
-int do_page(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose,
tablenames_t *t, int insert_ignore, char*start_page_id, mw_version_t *mwv) {
+int do_page(input_file_t *stubs, input_file_t *text, int text_compress,
output_file_t *sqlp, output_file_t *sqlr, output_file_t *sqlt, int verbose,
tablenames_t *t, int insert_ignore, char*start_page_id) {
page_t p;
char out_buf[1024]; /* seriously how long can username plus title plus the
rest of the cruft be? */
int want_text = 0;
char escaped_title[FIELD_LEN*2];
int skip = 0;
+ mw_version_t *mwv;
+
+ mwv = sqlp->mwv;
p.title[0] = '\0';
p.ns[0] = '\0';
@@ -982,7 +989,7 @@
}
while (1) {
- if (!do_revision(stubs, want_text?text:NULL, text_compress, sqlp, sqlr,
sqlt, &p, verbose, t, insert_ignore, mwv)) break;
+ if (!do_revision(stubs, want_text?text:NULL, text_compress, sqlp, sqlr,
sqlt, &p, verbose, t, insert_ignore)) break;
if (get_line(stubs) == NULL) {
whine("abrupt end of page data");
return(0);
@@ -1387,8 +1394,6 @@
start tag <mediawiki is found
schema pointer to preallocated holder for string with schema
version (e.g. '0.8')
s poiter to holder for site info
- mwv poiner to structure containing the info about the MediaWiki
version for
- which output will be produced
verbose 0 for quiet mode, 1 or greater to display info about the
record
as it is being written
@@ -1412,7 +1417,7 @@
structure on success and partially or not filled in at all
on error. If not filled in at all, it will be NULL.
*/
-int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t
**s, mw_version_t *mwv, int verbose) {
+int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t
**s, int verbose) {
if (schema && *schema) *schema[0] = '\0';
if (s) *s = NULL;
/* make this header optional */
diff --git a/xmlfileutils/sqlutils.c b/xmlfileutils/sqlutils.c
index 6b41b2b..eb3d080 100644
--- a/xmlfileutils/sqlutils.c
+++ b/xmlfileutils/sqlutils.c
@@ -335,15 +335,17 @@
args:
f structure for output file
nodrop do not write 'DROP TABLE...' statements (but do write 'INSERT
IGNORE' statements)
- mwv structure with information about the version of MediaWiki for
which to write output
t structure with the names of the tables
this function writes to the specified output file the sql required to create
the
page, revision and text tables for the MediaWiki version specified
*/
-void write_createtables_file(output_file_t *f, int nodrop, mw_version_t *mwv,
tablenames_t *t) {
+void write_createtables_file(output_file_t *f, int nodrop, tablenames_t *t) {
char out_buf[256];
+ mw_version_t *mwv;
+
+ mwv = f->mwv;
if (!nodrop) {
snprintf(out_buf, sizeof(out_buf), "DROP TABLE IF EXISTS `%s`;\n",
t->text);
--
To view, visit https://gerrit.wikimedia.org/r/47427
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I941d0773d340f8fdda70d42a66b0edcd8484830b
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits