Hi - I would like to export a large amount of data from ExpressionEngine to Wordpress, and have had lots of trouble finding something that isn't miles over my head. I did find these three scripts, which seem to be perfect for this purpose, but I don't know how to go about implementing them. It's a three-step process: 1. Configure and run Databases.cfg 2. Run ExpressionEngineExport.py 3. Run WordPressImport.py. I have a mac, am all set with access to my dbs on my host, all I need is to be pointed in the right direction. Anyone? Thank you! Mindy |
Databases.cfg
Description: Binary data
#!/opt/local/bin/python import xml.sax.saxutils from sqlobject import * from elementtree import ElementTree import datetime from ConfigParser import ConfigParser import os import sys
### Read the URL from the config file theParser = ConfigParser() theParser.read(os.path.dirname(sys.argv[0]) + '/Databases.cfg') theURL = theParser.get('Databases', 'input') connection = connectionForURI(theURL) sqlhub.processConnection = connection class Weblog(SQLObject): class sqlmeta: table = 'exp_weblogs' idName = 'weblog_id' name = StringCol(length=40, dbName='blog_name') title = StringCol(length=100, dbName='blog_title') url = StringCol(length=100, dbName='blog_url') titles = MultipleJoin('Title', joinColumn = 'weblog_id') data = MultipleJoin('Data', joinColumn = 'weblog_id') class Title(SQLObject): class sqlmeta: table = 'exp_weblog_titles' idName = 'entry_id' title = StringCol(length=100, dbName='title') entry_date = IntCol() slug = StringCol(length=75, dbName='url_title') data = MultipleJoin('Data', joinColumn = 'entry_id') comments = MultipleJoin('Comment', joinColumn = 'entry_id') trackbacks = MultipleJoin('Trackback', joinColumn = 'entry_id') categories = RelatedJoin('Category', intermediateTable='exp_category_posts', joinColumn='entry_id', otherColumn='cat_id') class Field(SQLObject): class sqlmeta: table = 'exp_weblog_fields' idName = 'field_id' name = StringCol(length=32, dbName='field_name') label = StringCol(length=50, dbName='field_label') class Data(SQLObject): class sqlmeta: table = 'exp_weblog_data' idName = 'entry_id' fieldId1 = StringCol(dbName='field_id_1') fieldFt1 = StringCol(length=40, dbName='field_ft_1') fieldId2 = StringCol(dbName='field_id_2') fieldFt2 = StringCol(length=40, dbName='field_ft_2') fieldId3 = StringCol(dbName='field_id_3') fieldFt3 = StringCol(length=40, dbName='field_ft_3') fieldId4 = StringCol(dbName='field_id_4') fieldFt4 = StringCol(length=40, dbName='field_ft_4') class Comment(SQLObject): class sqlmeta: table = 'exp_comments' idName = 'comment_id' status = StringCol(length=1, dbName='status') name = StringCol(length=50, dbName='name') email = StringCol(length=50, dbName='email') url = StringCol(length=75, dbName='url') location = StringCol(length=50, dbName='location') ip_address = StringCol(length=16, dbName='ip_address') comment_date = IntCol(dbName='comment_date') edit_date = DateTimeCol(dbName='edit_date') comment = StringCol(dbName='comment') class Trackback(SQLObject): class sqlmeta: table = 'exp_trackbacks' idName = 'trackback_id' weblog_id = IntCol(dbName='weblog_id') title = StringCol(length=100, dbName='title') content = StringCol(dbName='content') weblog_name = StringCol(length=100, dbName='weblog_name') trackback_url = StringCol(length=200, dbName='trackback_url') trackback_date = IntCol(dbName='trackback_date') trackback_ip = StringCol(length=16, dbName='trackback_ip') class Category(SQLObject): class sqlmeta: table = 'exp_categories' idName = 'cat_id' group_id = IntCol(dbName='group_id') parent_id = IntCol(dbName='parent_id') name = StringCol(length=60, dbName='cat_name') image = StringCol(length=120, dbName='cat_image') description = StringCol(dbName='cat_description') order = IntCol(dbName='cat_order') entries = RelatedJoin('Title', intermediateTable='exp_category_posts', joinColumn='cat_id', otherColumn='entry_id') # class CategoryPosts(SQLObject): # class sqlmeta: # table = 'exp_category_posts' # idName = 'cat_id' # # # CREATE TABLE `exp_category_posts` ( # `entry_id` int(10) unsigned NOT NULL default '0', # `cat_id` int(10) unsigned NOT NULL default '0', # KEY `entry_id` (`entry_id`), # KEY `cat_id` (`cat_id`) # ) ENGINE=MyISAM; ######################################################################## def encode(string): return xml.sax.saxutils.escape(string) # return xml.sax.saxutils.escape(string).encode('UTF-8') def tag(tag, content): if not content: return '' else: return '<%s>%s</%s>' % (tag, content, tag) theWeblog = Weblog.select()[0] theEntriesXML = ElementTree.Element('entries') for theEntry in theWeblog.titles: print '# Importing record!' theEntryXML = ElementTree.SubElement(theEntriesXML, 'entry') print theEntry.title ElementTree.SubElement(theEntryXML, 'id').text = str(theEntry.id) ElementTree.SubElement(theEntryXML, 'title').text = theEntry.title ElementTree.SubElement(theEntryXML, 'slug').text = theEntry.slug ElementTree.SubElement(theEntryXML, 'entry_date').text = datetime.datetime.fromtimestamp(int(theEntry.entry_date)).isoformat() theData = theEntry.data[0] if theData.fieldId1 != '': theFieldName = Field.select(Field.q.id == 1)[0].name ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId1 if theData.fieldId2 != '': theFieldName = Field.select(Field.q.id == 2)[0].name ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId2 if theData.fieldId3 != '': theFieldName = Field.select(Field.q.id == 3)[0].name ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId3 if theData.fieldId4 != '': theFieldName = Field.select(Field.q.id == 4)[0].name ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId4 try: if len(theEntry.categories) > 0: theCategoriesXML = ElementTree.SubElement(theEntryXML, 'categories') for theCategory in theEntry.categories: theCategoryXML = ElementTree.SubElement(theCategoriesXML, 'category').text = theCategory.name except: print '# Problem with categories' if len(theEntry.comments) > 0: theCommentsXML = ElementTree.SubElement(theEntryXML, 'comments') for theComment in theEntry.comments: theCommentXML = ElementTree.SubElement(theCommentsXML, 'comment') ElementTree.SubElement(theCommentXML, 'id').text = str(theComment.id) if theComment.name != '': ElementTree.SubElement(theCommentXML, 'name').text = theComment.name if theComment.url != '': ElementTree.SubElement(theCommentXML, 'url').text = theComment.url if theComment.location != '': ElementTree.SubElement(theCommentXML, 'location').text = theComment.location if theComment.ip_address != '': ElementTree.SubElement(theCommentXML, 'ip_address').text = theComment.ip_address ElementTree.SubElement(theCommentXML, 'date').text = datetime.datetime.fromtimestamp(int(theComment.comment_date)).isoformat() ElementTree.SubElement(theCommentXML, 'edit_date').text = theComment.edit_date.isoformat() if theComment.comment != '': ElementTree.SubElement(theCommentXML, 'comment').text = theComment.comment if len(theEntry.trackbacks) > 0: theTrackbacksXML = ElementTree.SubElement(theEntryXML, 'trackbacks') for theTrackback in theEntry.trackbacks: theTrackbackXML = ElementTree.SubElement(theTrackbacksXML, 'trackback') ElementTree.SubElement(theTrackbackXML, 'id').text = str(theTrackback.id) if theTrackback.content != '': ElementTree.SubElement(theTrackbackXML, 'content').text = theTrackback.content if theTrackback.weblog_name != '': ElementTree.SubElement(theTrackbackXML, 'weblog_name').text = theTrackback.weblog_name if theTrackback.trackback_url != '': ElementTree.SubElement(theTrackbackXML, 'url').text = theTrackback.trackback_url ElementTree.SubElement(theTrackbackXML, 'date').text = datetime.datetime.fromtimestamp(int(theTrackback.trackback_date)).isoformat() if theTrackback.trackback_ip != '': ElementTree.SubElement(theTrackbackXML, 'ip_address').text = theTrackback.trackback_ip file('/Users/schwa/Desktop/Export.xml', 'w').write(ElementTree.tostring(theEntriesXML))
#!/opt/local/bin/python from sqlobject import * from elementtree import ElementTree import datetime import time import tzinfo ################################################################################ def delete(cls, *args, **kwargs): for theRow in cls.select(): theRow.destroySelf() SQLObject.delete = classmethod(delete) def deleteBy(cls, *args, **kwargs): for theRow in cls.selectBy(*args, **kwargs): theRow.destroySelf() SQLObject.deleteBy = classmethod(deleteBy) ################################################################################ def processTime(inTimeString): theDatetime = datetime.datetime(*tuple(list(time.strptime(inTimeString, '%Y-%m-%dT%H:%M:%S')[0:7]) + [tzinfo.utc])) return(theDatetime) ################################################################################ import codecs def MyErrorHandler(error): return (u'', error.end) codecs.register_error('MyErrorHandler', MyErrorHandler) def Purify(value): if value != None: value = value.encode('utf8', 'MyErrorHandler') else: value = '' return value ################################################################################ ### Read the URL from the config file theParser = ConfigParser() theParser.read(os.path.dirname(sys.argv[0]) + '/Databases.cfg') theURL = theParser.get('Databases', 'export') connection = connectionForURI(theURL) sqlhub.processConnection = connection class Post(SQLObject): class sqlmeta: table = 'wp_posts' idName = 'ID' post_author = IntCol() post_date = DateTimeCol(default = datetime.datetime.now) post_date_gmt = DateTimeCol(default = datetime.datetime.utcnow) post_content = StringCol(default = '') post_title = StringCol(default = '') post_category = IntCol(default = 0) post_excerpt = StringCol(default = '') post_status = EnumCol(enumValues = ['publish','draft','private','static','object','attachment'], default = 'publish') comment_status = EnumCol(enumValues = ['open','closed','registered_only'], default = 'open') ping_status = EnumCol(enumValues = ['open', 'closed'], default = 'open') post_password = StringCol(default = '') post_name = StringCol(default = '') to_ping = StringCol(default = '') pinged = StringCol(default = '') post_modified = DateTimeCol(default = datetime.datetime.now) post_modified_gmt = DateTimeCol(default = datetime.datetime.utcnow) post_parent = IntCol(default = 0) guid = StringCol(default = '') menu_order = IntCol(default = 0) post_type = StringCol(default = '') post_mime_type = StringCol(default = '') comment_count = IntCol(default = 0) categories = RelatedJoin('Category', intermediateTable='wp_post2cat', joinColumn='post_id', otherColumn='category_id') comments = MultipleJoin('Comment', joinColumn = 'comment_post_ID') class Category(SQLObject): class sqlmeta: table = 'wp_categories' idName = 'cat_ID' cat_name = StringCol(default = '') category_nicename = StringCol(default = '') category_description = StringCol(default = '') category_parent = IntCol(default = 0) category_count = IntCol(default = 0) entries = RelatedJoin('Title', intermediateTable='wp_post2cat', joinColumn='category_id', otherColumn='post_id') class Comment(SQLObject): class sqlmeta: table = 'wp_comments' idName = 'comment_ID' comment_post_ID = IntCol(dbName = 'comment_post_ID') comment_author = StringCol(default = '') comment_author_email = StringCol(default = '') comment_author_url = StringCol(default = '') comment_author_IP = StringCol(default = '', dbName = 'comment_author_IP') comment_date = DateTimeCol(default = datetime.datetime.now) comment_date_gmt = DateTimeCol(default = datetime.datetime.utcnow) comment_content = StringCol(default = '') comment_karma = IntCol(default = 0) comment_approved = EnumCol(enumValues = ['0', '1', 'spam'], default = '1') comment_agent = StringCol(default = '') comment_type = StringCol(default = '') ### 'pingback' comment_parent = IntCol(default = 0) user_id = IntCol(default = 0) class User(SQLObject): class sqlmeta: table = 'wp_users' idName = 'ID' user_login = StringCol() user_pass = StringCol() user_nicename = StringCol() user_email = StringCol() user_url = StringCol() user_registered = DateTimeCol(default = datetime.datetime.now) user_activation_key = StringCol() user_status = IntCol display_name = StringCol() Post.delete() Category.delete() Comment.delete() Category(cat_name = 'Uncategorised') theUser = User.selectBy(user_login = 'schwa')[0] print theUser.id def XMLImport(data, overwrite = True): theRootNode = ElementTree.fromstring(data) ################################################################################ theNodesToProcess = len(theRootNode) # theNodesToProcess = 2 for theEntryNode in theRootNode[:theNodesToProcess]: theEntryId = int(theEntryNode.find('id').text) if overwrite == True: print('Deleting entry(%d)' % theEntryId) Post.deleteBy(id = theEntryId) theTitle = Purify(theEntryNode.find('title').text) print('Creating Entry(%s)' % theTitle) theSlug = theEntryNode.find('slug').text theEntryDate = processTime(theEntryNode.find('entry_date').text).astimezone(tzinfo.Eastern) theEntryDateGMT = processTime(theEntryNode.find('entry_date').text) print('%s' % theEntryDate) theContent = '' theBody = theEntryNode.find('body') theSummary = theEntryNode.find('summary') theSource = theEntryNode.find('source') theExtended = theEntryNode.find('extended') if (theBody == None and theSummary != None): theBody = theSummary theSummary = None if (theBody != None): theBody = Purify(theBody.text) if (theExtended != None): theExtended = Purify(theExtended.text) if (theSummary != None): theSummary = Purify(theSummary.text) else: theSummary = '' if theSource != None: theSource = Purify(theSource.text) theContent = theBody if theExtended: theContent = theContent + '<br/>' + theExtended if theSource: theContent = theContent + '<hr/><pre>%s</pre>' % theSource thePost = Post(id = theEntryId, post_name = theSlug, post_author = theUser.id, post_title = theTitle, post_content = theContent, post_date = theEntryDate, post_date_gmt = theEntryDateGMT, post_excerpt = theSummary) for theCategoryNode in theEntryNode.findall('categories/category'): theCategoryName = theCategoryNode.text if Category.selectBy(cat_name = theCategoryName).count() == 0: theCategory = Category(cat_name = theCategoryName) else: theCategory = Category.selectBy(cat_name = theCategoryName, cat_nicename = theCategoryName)[0] theCategory.category_count = theCategory.category_count + 1 thePost.addCategory(theCategory) for theCommentNode in theEntryNode.findall('comments/comment'): print theCommentNode theAuthor = Purify(theCommentNode.find('name').text) if theCommentNode.find('url') != None: theUrl = theCommentNode.find('url').text else: theUrl = '' theIP = theCommentNode.find('ip_address').text theDateGMT = processTime(theCommentNode.find('date').text) theDate = processTime(theCommentNode.find('date').text).astimezone(tzinfo.Eastern) theText = Purify(theCommentNode.find('comment').text) theComment = Comment(comment_post_ID = thePost.id, comment_author = theAuthor, comment_author_IP = theIP, comment_author_url = theUrl, comment_date = theDate, comment_date_gmt = theDateGMT, comment_content = theText) thePost.comment_count = thePost.comment_count + 1 # <trackback> # <id>4</id> # <content>Running something useful on Xgrid This article is the third in a series on Xgrid, see Part I and Part II. In the present article, we look at a real life example to see how one can use Xgrid to...</content> # <weblog_name>: Simple.</weblog_name> # <url>http://unu.novajo.ca/simple/archives/000024.html</url> # <date>2004-01-15T18:05:04</date> # <ip_address>67.70.6.38</ip_address> # </trackback> for theTrackbackNode in theEntryNode.findall('trackbacks/trackback'): print theTrackbackNode if theTrackbackNode.find('weblog_name') != None: theAuthor = Purify(theTrackbackNode.find('weblog_name').text) else: theAuthor = '' if theTrackbackNode.find('url') != None: theUrl = theTrackbackNode.find('url').text else: theUrl = '' theIP = theTrackbackNode.find('ip_address').text theDateGMT = processTime(theTrackbackNode.find('date').text) theDate = processTime(theTrackbackNode.find('date').text).astimezone(tzinfo.Eastern) theComment = Comment(comment_post_ID = thePost.id, comment_author = theAuthor, comment_author_IP = theIP, comment_author_url = theUrl, comment_date = theDate, comment_date_gmt = theDateGMT, comment_type = 'pingback') thePost.comment_count = thePost.comment_count + 1 # see what others are running as well keep up the great # work</comment> XMLImport(file('/Volumes/Shared/Users/schwa/Desktop/export.xml').read())
................................................. Melinda Roberts Co-Founder, PearSoup.com Panelist, Momversation.com Find me online: http://clicktoadd.me/MelindaRoberts |
_______________________________________________ Tutor maillist - Tutor@python.org http://mail.python.org/mailman/listinfo/tutor