Hi - I would like to export a large amount of data from ExpressionEngine to Wordpress, and have had lots of trouble finding something that isn't miles over my head. I did find these three scripts, which seem to be perfect for this purpose, but I don't know how to go about implementing them. It's a three-step process: 1. Configure and run Databases.cfg 2. Run ExpressionEngineExport.py 3. Run WordPressImport.py. I have a mac, am all set with access to my dbs on my host, all I need is to be pointed in the right direction. Anyone? Thank you! Mindy |
Databases.cfg
Description: Binary data
#!/opt/local/bin/python import xml.sax.saxutils from sqlobject import * from elementtree import ElementTree import datetime from ConfigParser import ConfigParser import os import sys
### Read the URL from the config file
theParser = ConfigParser()
theParser.read(os.path.dirname(sys.argv[0]) + '/Databases.cfg')
theURL = theParser.get('Databases', 'input')
connection = connectionForURI(theURL)
sqlhub.processConnection = connection
class Weblog(SQLObject):
class sqlmeta:
table = 'exp_weblogs'
idName = 'weblog_id'
name = StringCol(length=40, dbName='blog_name')
title = StringCol(length=100, dbName='blog_title')
url = StringCol(length=100, dbName='blog_url')
titles = MultipleJoin('Title', joinColumn = 'weblog_id')
data = MultipleJoin('Data', joinColumn = 'weblog_id')
class Title(SQLObject):
class sqlmeta:
table = 'exp_weblog_titles'
idName = 'entry_id'
title = StringCol(length=100, dbName='title')
entry_date = IntCol()
slug = StringCol(length=75, dbName='url_title')
data = MultipleJoin('Data', joinColumn = 'entry_id')
comments = MultipleJoin('Comment', joinColumn = 'entry_id')
trackbacks = MultipleJoin('Trackback', joinColumn = 'entry_id')
categories = RelatedJoin('Category', intermediateTable='exp_category_posts', joinColumn='entry_id', otherColumn='cat_id')
class Field(SQLObject):
class sqlmeta:
table = 'exp_weblog_fields'
idName = 'field_id'
name = StringCol(length=32, dbName='field_name')
label = StringCol(length=50, dbName='field_label')
class Data(SQLObject):
class sqlmeta:
table = 'exp_weblog_data'
idName = 'entry_id'
fieldId1 = StringCol(dbName='field_id_1')
fieldFt1 = StringCol(length=40, dbName='field_ft_1')
fieldId2 = StringCol(dbName='field_id_2')
fieldFt2 = StringCol(length=40, dbName='field_ft_2')
fieldId3 = StringCol(dbName='field_id_3')
fieldFt3 = StringCol(length=40, dbName='field_ft_3')
fieldId4 = StringCol(dbName='field_id_4')
fieldFt4 = StringCol(length=40, dbName='field_ft_4')
class Comment(SQLObject):
class sqlmeta:
table = 'exp_comments'
idName = 'comment_id'
status = StringCol(length=1, dbName='status')
name = StringCol(length=50, dbName='name')
email = StringCol(length=50, dbName='email')
url = StringCol(length=75, dbName='url')
location = StringCol(length=50, dbName='location')
ip_address = StringCol(length=16, dbName='ip_address')
comment_date = IntCol(dbName='comment_date')
edit_date = DateTimeCol(dbName='edit_date')
comment = StringCol(dbName='comment')
class Trackback(SQLObject):
class sqlmeta:
table = 'exp_trackbacks'
idName = 'trackback_id'
weblog_id = IntCol(dbName='weblog_id')
title = StringCol(length=100, dbName='title')
content = StringCol(dbName='content')
weblog_name = StringCol(length=100, dbName='weblog_name')
trackback_url = StringCol(length=200, dbName='trackback_url')
trackback_date = IntCol(dbName='trackback_date')
trackback_ip = StringCol(length=16, dbName='trackback_ip')
class Category(SQLObject):
class sqlmeta:
table = 'exp_categories'
idName = 'cat_id'
group_id = IntCol(dbName='group_id')
parent_id = IntCol(dbName='parent_id')
name = StringCol(length=60, dbName='cat_name')
image = StringCol(length=120, dbName='cat_image')
description = StringCol(dbName='cat_description')
order = IntCol(dbName='cat_order')
entries = RelatedJoin('Title', intermediateTable='exp_category_posts', joinColumn='cat_id', otherColumn='entry_id')
# class CategoryPosts(SQLObject):
# class sqlmeta:
# table = 'exp_category_posts'
# idName = 'cat_id'
#
#
# CREATE TABLE `exp_category_posts` (
# `entry_id` int(10) unsigned NOT NULL default '0',
# `cat_id` int(10) unsigned NOT NULL default '0',
# KEY `entry_id` (`entry_id`),
# KEY `cat_id` (`cat_id`)
# ) ENGINE=MyISAM;
########################################################################
def encode(string):
return xml.sax.saxutils.escape(string)
# return xml.sax.saxutils.escape(string).encode('UTF-8')
def tag(tag, content):
if not content:
return ''
else:
return '<%s>%s</%s>' % (tag, content, tag)
theWeblog = Weblog.select()[0]
theEntriesXML = ElementTree.Element('entries')
for theEntry in theWeblog.titles:
print '# Importing record!'
theEntryXML = ElementTree.SubElement(theEntriesXML, 'entry')
print theEntry.title
ElementTree.SubElement(theEntryXML, 'id').text = str(theEntry.id)
ElementTree.SubElement(theEntryXML, 'title').text = theEntry.title
ElementTree.SubElement(theEntryXML, 'slug').text = theEntry.slug
ElementTree.SubElement(theEntryXML, 'entry_date').text = datetime.datetime.fromtimestamp(int(theEntry.entry_date)).isoformat()
theData = theEntry.data[0]
if theData.fieldId1 != '':
theFieldName = Field.select(Field.q.id == 1)[0].name
ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId1
if theData.fieldId2 != '':
theFieldName = Field.select(Field.q.id == 2)[0].name
ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId2
if theData.fieldId3 != '':
theFieldName = Field.select(Field.q.id == 3)[0].name
ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId3
if theData.fieldId4 != '':
theFieldName = Field.select(Field.q.id == 4)[0].name
ElementTree.SubElement(theEntryXML, theFieldName).text = theData.fieldId4
try:
if len(theEntry.categories) > 0:
theCategoriesXML = ElementTree.SubElement(theEntryXML, 'categories')
for theCategory in theEntry.categories:
theCategoryXML = ElementTree.SubElement(theCategoriesXML, 'category').text = theCategory.name
except:
print '# Problem with categories'
if len(theEntry.comments) > 0:
theCommentsXML = ElementTree.SubElement(theEntryXML, 'comments')
for theComment in theEntry.comments:
theCommentXML = ElementTree.SubElement(theCommentsXML, 'comment')
ElementTree.SubElement(theCommentXML, 'id').text = str(theComment.id)
if theComment.name != '':
ElementTree.SubElement(theCommentXML, 'name').text = theComment.name
if theComment.url != '':
ElementTree.SubElement(theCommentXML, 'url').text = theComment.url
if theComment.location != '':
ElementTree.SubElement(theCommentXML, 'location').text = theComment.location
if theComment.ip_address != '':
ElementTree.SubElement(theCommentXML, 'ip_address').text = theComment.ip_address
ElementTree.SubElement(theCommentXML, 'date').text = datetime.datetime.fromtimestamp(int(theComment.comment_date)).isoformat()
ElementTree.SubElement(theCommentXML, 'edit_date').text = theComment.edit_date.isoformat()
if theComment.comment != '':
ElementTree.SubElement(theCommentXML, 'comment').text = theComment.comment
if len(theEntry.trackbacks) > 0:
theTrackbacksXML = ElementTree.SubElement(theEntryXML, 'trackbacks')
for theTrackback in theEntry.trackbacks:
theTrackbackXML = ElementTree.SubElement(theTrackbacksXML, 'trackback')
ElementTree.SubElement(theTrackbackXML, 'id').text = str(theTrackback.id)
if theTrackback.content != '':
ElementTree.SubElement(theTrackbackXML, 'content').text = theTrackback.content
if theTrackback.weblog_name != '':
ElementTree.SubElement(theTrackbackXML, 'weblog_name').text = theTrackback.weblog_name
if theTrackback.trackback_url != '':
ElementTree.SubElement(theTrackbackXML, 'url').text = theTrackback.trackback_url
ElementTree.SubElement(theTrackbackXML, 'date').text = datetime.datetime.fromtimestamp(int(theTrackback.trackback_date)).isoformat()
if theTrackback.trackback_ip != '':
ElementTree.SubElement(theTrackbackXML, 'ip_address').text = theTrackback.trackback_ip
file('/Users/schwa/Desktop/Export.xml', 'w').write(ElementTree.tostring(theEntriesXML))
#!/opt/local/bin/python
from sqlobject import *
from elementtree import ElementTree
import datetime
import time
import tzinfo
################################################################################
def delete(cls, *args, **kwargs):
for theRow in cls.select():
theRow.destroySelf()
SQLObject.delete = classmethod(delete)
def deleteBy(cls, *args, **kwargs):
for theRow in cls.selectBy(*args, **kwargs):
theRow.destroySelf()
SQLObject.deleteBy = classmethod(deleteBy)
################################################################################
def processTime(inTimeString):
theDatetime = datetime.datetime(*tuple(list(time.strptime(inTimeString, '%Y-%m-%dT%H:%M:%S')[0:7]) + [tzinfo.utc]))
return(theDatetime)
################################################################################
import codecs
def MyErrorHandler(error):
return (u'', error.end)
codecs.register_error('MyErrorHandler', MyErrorHandler)
def Purify(value):
if value != None:
value = value.encode('utf8', 'MyErrorHandler')
else:
value = ''
return value
################################################################################
### Read the URL from the config file
theParser = ConfigParser()
theParser.read(os.path.dirname(sys.argv[0]) + '/Databases.cfg')
theURL = theParser.get('Databases', 'export')
connection = connectionForURI(theURL)
sqlhub.processConnection = connection
class Post(SQLObject):
class sqlmeta:
table = 'wp_posts'
idName = 'ID'
post_author = IntCol()
post_date = DateTimeCol(default = datetime.datetime.now)
post_date_gmt = DateTimeCol(default = datetime.datetime.utcnow)
post_content = StringCol(default = '')
post_title = StringCol(default = '')
post_category = IntCol(default = 0)
post_excerpt = StringCol(default = '')
post_status = EnumCol(enumValues = ['publish','draft','private','static','object','attachment'], default = 'publish')
comment_status = EnumCol(enumValues = ['open','closed','registered_only'], default = 'open')
ping_status = EnumCol(enumValues = ['open', 'closed'], default = 'open')
post_password = StringCol(default = '')
post_name = StringCol(default = '')
to_ping = StringCol(default = '')
pinged = StringCol(default = '')
post_modified = DateTimeCol(default = datetime.datetime.now)
post_modified_gmt = DateTimeCol(default = datetime.datetime.utcnow)
post_parent = IntCol(default = 0)
guid = StringCol(default = '')
menu_order = IntCol(default = 0)
post_type = StringCol(default = '')
post_mime_type = StringCol(default = '')
comment_count = IntCol(default = 0)
categories = RelatedJoin('Category', intermediateTable='wp_post2cat', joinColumn='post_id', otherColumn='category_id')
comments = MultipleJoin('Comment', joinColumn = 'comment_post_ID')
class Category(SQLObject):
class sqlmeta:
table = 'wp_categories'
idName = 'cat_ID'
cat_name = StringCol(default = '')
category_nicename = StringCol(default = '')
category_description = StringCol(default = '')
category_parent = IntCol(default = 0)
category_count = IntCol(default = 0)
entries = RelatedJoin('Title', intermediateTable='wp_post2cat', joinColumn='category_id', otherColumn='post_id')
class Comment(SQLObject):
class sqlmeta:
table = 'wp_comments'
idName = 'comment_ID'
comment_post_ID = IntCol(dbName = 'comment_post_ID')
comment_author = StringCol(default = '')
comment_author_email = StringCol(default = '')
comment_author_url = StringCol(default = '')
comment_author_IP = StringCol(default = '', dbName = 'comment_author_IP')
comment_date = DateTimeCol(default = datetime.datetime.now)
comment_date_gmt = DateTimeCol(default = datetime.datetime.utcnow)
comment_content = StringCol(default = '')
comment_karma = IntCol(default = 0)
comment_approved = EnumCol(enumValues = ['0', '1', 'spam'], default = '1')
comment_agent = StringCol(default = '')
comment_type = StringCol(default = '') ### 'pingback'
comment_parent = IntCol(default = 0)
user_id = IntCol(default = 0)
class User(SQLObject):
class sqlmeta:
table = 'wp_users'
idName = 'ID'
user_login = StringCol()
user_pass = StringCol()
user_nicename = StringCol()
user_email = StringCol()
user_url = StringCol()
user_registered = DateTimeCol(default = datetime.datetime.now)
user_activation_key = StringCol()
user_status = IntCol
display_name = StringCol()
Post.delete()
Category.delete()
Comment.delete()
Category(cat_name = 'Uncategorised')
theUser = User.selectBy(user_login = 'schwa')[0]
print theUser.id
def XMLImport(data, overwrite = True):
theRootNode = ElementTree.fromstring(data)
################################################################################
theNodesToProcess = len(theRootNode)
# theNodesToProcess = 2
for theEntryNode in theRootNode[:theNodesToProcess]:
theEntryId = int(theEntryNode.find('id').text)
if overwrite == True:
print('Deleting entry(%d)' % theEntryId)
Post.deleteBy(id = theEntryId)
theTitle = Purify(theEntryNode.find('title').text)
print('Creating Entry(%s)' % theTitle)
theSlug = theEntryNode.find('slug').text
theEntryDate = processTime(theEntryNode.find('entry_date').text).astimezone(tzinfo.Eastern)
theEntryDateGMT = processTime(theEntryNode.find('entry_date').text)
print('%s' % theEntryDate)
theContent = ''
theBody = theEntryNode.find('body')
theSummary = theEntryNode.find('summary')
theSource = theEntryNode.find('source')
theExtended = theEntryNode.find('extended')
if (theBody == None and theSummary != None):
theBody = theSummary
theSummary = None
if (theBody != None):
theBody = Purify(theBody.text)
if (theExtended != None):
theExtended = Purify(theExtended.text)
if (theSummary != None):
theSummary = Purify(theSummary.text)
else:
theSummary = ''
if theSource != None:
theSource = Purify(theSource.text)
theContent = theBody
if theExtended:
theContent = theContent + '<br/>' + theExtended
if theSource:
theContent = theContent + '<hr/><pre>%s</pre>' % theSource
thePost = Post(id = theEntryId, post_name = theSlug, post_author = theUser.id, post_title = theTitle, post_content = theContent, post_date = theEntryDate, post_date_gmt = theEntryDateGMT, post_excerpt = theSummary)
for theCategoryNode in theEntryNode.findall('categories/category'):
theCategoryName = theCategoryNode.text
if Category.selectBy(cat_name = theCategoryName).count() == 0:
theCategory = Category(cat_name = theCategoryName)
else:
theCategory = Category.selectBy(cat_name = theCategoryName, cat_nicename = theCategoryName)[0]
theCategory.category_count = theCategory.category_count + 1
thePost.addCategory(theCategory)
for theCommentNode in theEntryNode.findall('comments/comment'):
print theCommentNode
theAuthor = Purify(theCommentNode.find('name').text)
if theCommentNode.find('url') != None:
theUrl = theCommentNode.find('url').text
else:
theUrl = ''
theIP = theCommentNode.find('ip_address').text
theDateGMT = processTime(theCommentNode.find('date').text)
theDate = processTime(theCommentNode.find('date').text).astimezone(tzinfo.Eastern)
theText = Purify(theCommentNode.find('comment').text)
theComment = Comment(comment_post_ID = thePost.id, comment_author = theAuthor, comment_author_IP = theIP, comment_author_url = theUrl, comment_date = theDate, comment_date_gmt = theDateGMT, comment_content = theText)
thePost.comment_count = thePost.comment_count + 1
# <trackback>
# <id>4</id>
# <content>Running something useful on Xgrid This article is the third in a series on Xgrid, see Part I and Part II. In the present article, we look at a real life example to see how one can use Xgrid to...</content>
# <weblog_name>: Simple.</weblog_name>
# <url>http://unu.novajo.ca/simple/archives/000024.html</url>
# <date>2004-01-15T18:05:04</date>
# <ip_address>67.70.6.38</ip_address>
# </trackback>
for theTrackbackNode in theEntryNode.findall('trackbacks/trackback'):
print theTrackbackNode
if theTrackbackNode.find('weblog_name') != None:
theAuthor = Purify(theTrackbackNode.find('weblog_name').text)
else:
theAuthor = ''
if theTrackbackNode.find('url') != None:
theUrl = theTrackbackNode.find('url').text
else:
theUrl = ''
theIP = theTrackbackNode.find('ip_address').text
theDateGMT = processTime(theTrackbackNode.find('date').text)
theDate = processTime(theTrackbackNode.find('date').text).astimezone(tzinfo.Eastern)
theComment = Comment(comment_post_ID = thePost.id, comment_author = theAuthor, comment_author_IP = theIP, comment_author_url = theUrl, comment_date = theDate, comment_date_gmt = theDateGMT, comment_type = 'pingback')
thePost.comment_count = thePost.comment_count + 1
# see what others are running as well keep up the great
# work</comment>
XMLImport(file('/Volumes/Shared/Users/schwa/Desktop/export.xml').read())
................................................. Melinda Roberts Co-Founder, PearSoup.com Panelist, Momversation.com Find me online: http://clicktoadd.me/MelindaRoberts |
_______________________________________________ Tutor maillist - [email protected] http://mail.python.org/mailman/listinfo/tutor
