ArielGlenn has submitted this change and it was merged.

Change subject: build static binaries, makefile fixes, sha1 field fix
......................................................................


build static binaries, makefile fixes, sha1 field fix

- static binary build targets now provided, folks must provide
  their own copies of statically built libz.a and libbz2.a
- moved off of dependency on openssl libs, now using Christophe
  Devine's sha1 code, updated README to reflect change in dependencies
- sha1 digest must be converted to base36 for db, added this
- added missing files to make dist

Change-Id: I0a2db652602c34f0c901b4ec80b4dff507f845ba
---
M xmlfileutils/Makefile
M xmlfileutils/README
M xmlfileutils/mwxml2sql.h
M xmlfileutils/mwxmlelts.c
4 files changed, 70 insertions(+), 19 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/xmlfileutils/Makefile b/xmlfileutils/Makefile
index b673439..4d27faf 100644
--- a/xmlfileutils/Makefile
+++ b/xmlfileutils/Makefile
@@ -14,13 +14,13 @@
 
 all: mwxml2sql sql2txt
 
-mwxml2sql: mwxml2sql.o filebuffers.o xmltags.o mwxmlelts.o sqlutils.o
+mwxml2sql: mwxml2sql.o filebuffers.o xmltags.o mwxmlelts.o sqlutils.o base36.o 
sha1.o
        $(CC) $(CFLAGS) $(LDFLAGS) -o mwxml2sql mwxml2sql.o filebuffers.o \
-           xmltags.o mwxmlelts.o sqlutils.o -lssl -lcrypto -lbz2 -lz
+           xmltags.o mwxmlelts.o sqlutils.o base36.o sha1.o -lbz2 -lz
 
 sql2txt: sql2txt.o filebuffers.o sqlutils.o
        $(CC) $(CFLAGS) $(LDFLAGS) -o sql2txt sql2txt.o filebuffers.o 
sqlutils.o \
-           -lcrypto -lbz2 -lz
+           -lbz2 -lz
 
 install: mwxml2sql sql2txt
        if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi
@@ -31,6 +31,16 @@
 
 clean: 
        rm -f *.o *.a mwxml2sql sql2txt
+
+static: mwxml2sql_static sql2txt_static
+
+mwxml2sql_static: mwxml2sql
+       $(CC) $(CFLAGS) -static -static-libgcc -o mwxml2sql_static mwxml2sql.o 
filebuffers.o \
+           xmltags.o mwxmlelts.o sqlutils.o base36.o sha1.o -L. -lbz2 -lz
+
+sql2txt_static: sql2txt
+       $(CC) $(CFLAGS) -static -static-libgcc -o sql2txt_static sql2txt.o 
filebuffers.o sqlutils.o \
+           sha1.o -L. -lbz2 -lz
 
 mwxml2sql.o: mwxml2sql.c mwxml2sql.h
        $(CC) $(CFLAGS) -c mwxml2sql.c
@@ -50,6 +60,12 @@
 sqlutils.o: sqlutils.c mwxml2sql.h
        $(CC) $(CFLAGS) -c sqlutils.c
 
+base36.o: base36.c
+       $(CC) $(CFLAGS) -c base36.c
+
+sha1.o: sha1.c sha1.h
+       $(CC) $(CFLAGS) -c sha1.c
+
 distclean:
        rm -f $(DISTNAME)
        rm -f *.tar.gz
@@ -65,6 +81,10 @@
           $(DISTNAME)/sql2txt.c \
           $(DISTNAME)/sqlutils.c \
           $(DISTNAME)/xmltags.c \
+          $(DISTNAME)/mwxmlelts.c \
+          $(DISTNAME)/sha1.h \
+          $(DISTNAME)/sha1.c \
+          $(DISTNAME)/base36.c \
           $(DISTNAME)/Makefile \
           $(DISTNAME)/COPYING \
           $(DISTNAME)/README \
diff --git a/xmlfileutils/README b/xmlfileutils/README
index e6c1397..9f5268d 100644
--- a/xmlfileutils/README
+++ b/xmlfileutils/README
@@ -6,11 +6,10 @@
 xml dumps from the Wikimedia projects and that is all
 that it's intended to do.
 
-To install this program, you will need to have the libssl,
-libcrypto, libz and bz2 development libraries installed,
-as well as the gcc toolchain or an equivalent C compiler
-and its supporting libraries. You'll also need the 'make'
-utility.
+To install this program, you will need to have the libz and
+bz2 development libraries installed, as well as the gcc
+toolchain or an equivalent C compiler and its supporting
+libraries. You'll also need the 'make' utility.
 
 This program has been tested only on 64-bit Linux.  You can
 try building it on other platforms but without any support
@@ -101,3 +100,14 @@
 
 This does NOT support dumps from wikis with LiquidThread enabled.
 That's a feature set for a future version.
+
+LICENSE
+
+The files sha1.c and sha1.h are released by Christophe Levine under
+GPLv2 (see the file COPYING in this directory).  His web site is
+no longer available and the code has since been folded into many
+other projects but you can find it via archive.org:
+http://web.archive.org/web/20031123112259/http://www.cr0.net:8040/code/crypto/sha1/
+
+The remaining files are copyright Ariel Glenn 2013 and also released
+under the GPLv2 (see again the file COPYING in this directory).
diff --git a/xmlfileutils/mwxml2sql.h b/xmlfileutils/mwxml2sql.h
index 35a715d..f4f4ce1 100644
--- a/xmlfileutils/mwxml2sql.h
+++ b/xmlfileutils/mwxml2sql.h
@@ -18,6 +18,8 @@
 #include <zlib.h>
 #include <stdarg.h>
 
+#include "sha1.h"
+
 #define VERSION "0.0.1"
 
 #define MAX_TAG_NAME_LEN 256
@@ -251,6 +253,12 @@
 char *get_filesuffix(char *file_name, int verbose);
 int do_file_header(input_file_t *f, int skipschema, char **schema, siteinfo_t 
**s, int verbose);
 
+int tobase36(unsigned int *in, unsigned int *in_copy, unsigned int *temp, int 
in_len, unsigned int *out);
+int char2int(char c);
+int hexstring2int(char *s, int len, unsigned int *intbuf);
+char int2char(int i);
+void int2string(unsigned int *int_buf, int int_buf_len, char *s);
+
 static inline int mwv_any_greater(mw_version_t *mwv,int mj,int mn ) {
   mw_version_t *head = mwv;
 
diff --git a/xmlfileutils/mwxmlelts.c b/xmlfileutils/mwxmlelts.c
index e37b7de..1e7e281 100644
--- a/xmlfileutils/mwxmlelts.c
+++ b/xmlfileutils/mwxmlelts.c
@@ -18,6 +18,8 @@
 
 #include "mwxml2sql.h"
 
+#define SHA_DIGEST_LENGTH 20
+
 char page_in_process[MAX_ID_LEN];
 int page_rows_written;
 int rev_rows_written;
@@ -311,9 +313,9 @@
   int todo_length;
   char *todo, *todo_new;
   int text_length = 0;
-  SHA_CTX ctx;
+  sha1_context ctx;
   unsigned char sha1[SHA_DIGEST_LENGTH];
-  char sha1_string[SHA_DIGEST_LENGTH*2];
+  unsigned char sha1_string[SHA_DIGEST_LENGTH*2 +1];
   int i=0;
   char *compressed_content = NULL;
   int compressed_length = 0;
@@ -322,7 +324,14 @@
   char compressed_buf[TEXT_BUF_LEN_PADDED];
   char *compressed_ptr = NULL;
 
-  if (get_sha1) SHA1_Init(&ctx);
+  unsigned int sha1_copy[SHA_DIGEST_LENGTH*2 +1];
+  unsigned int sha1_temp[SHA_DIGEST_LENGTH*2 +1];
+  unsigned int sha1_num[SHA_DIGEST_LENGTH/3 +1];
+  int sha1_num_len;
+  unsigned int sha1_b36[SHA_DIGEST_LENGTH*8/5 + 6];
+  int sha1_b36_len;
+
+  if (get_sha1) sha1_starts(&ctx);
 
   ind = strstr(f->in_buf->content, "<text");
   if (!ind) return(0);
@@ -362,7 +371,7 @@
     if (!endtag) {
       leftover = un_xml_escape(ind, raw, 0);
       if (get_text_length) text_length+= strlen(raw);
-      if (get_sha1) SHA1_Update(&ctx, raw, strlen(raw));
+      if (get_sha1) sha1_update(&ctx, (unsigned char *)raw, strlen(raw));
       if (text_compress) {
        /* FIXME do something with this return value */
        compressed_ptr = gzipit(raw, &compressed_length, compressed_buf, 
sizeof(compressed_buf));
@@ -406,7 +415,7 @@
       un_xml_escape(ind, raw, 1);
       *endtag = '<';
       if (get_text_length) text_length+= strlen(raw);
-      if (get_sha1) SHA1_Update(&ctx, raw, strlen(raw));
+      if (get_sha1) sha1_update(&ctx, (unsigned char *)raw, strlen(raw));
       if (text_compress) {
        /* FIXME do something with this return value */
        compressed_ptr = gzipit(raw, &compressed_length, compressed_buf, 
sizeof(compressed_buf));
@@ -467,12 +476,16 @@
      so we don't have to compute it.
   */
   if (get_sha1) {
-    SHA1_Final(sha1, &ctx);
-    /* fixme is this really the best way? look at it later */
-    for (i=0; i < SHA_DIGEST_LENGTH; i++) {
-            sprintf((char*)&(sha1_string[i*2]), "%02x", sha1[i]);
-    }
-    sprintf(r->sha1, "%s", sha1);
+    sha1_finish(&ctx, sha1);
+
+    /* base36 conversion, blah */
+    for (i=0; i < SHA_DIGEST_LENGTH; i++)
+      sprintf((char*)&(sha1_string[i*2]), "%02x", sha1[i]);
+
+    /*    sha1_num_len = hexstring2int((char *)sha1_string, 
SHA_DIGEST_LENGTH*2, sha1_num);*/
+    sha1_num_len = hexstring2int((char *)sha1_string, SHA_DIGEST_LENGTH*2, 
sha1_num);
+    sha1_b36_len = tobase36(sha1_num, sha1_copy, sha1_temp, sha1_num_len, 
sha1_b36);
+    int2string(sha1_b36, sha1_b36_len, r->sha1);
   }
 
   if (verbose > 1) fprintf(stderr,"text info: insert end of line written\n");

-- 
To view, visit https://gerrit.wikimedia.org/r/50175
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I0a2db652602c34f0c901b4ec80b4dff507f845ba
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to