Revision: 1995
Author: [email protected]
Date: Tue Aug 10 04:34:23 2010
Log: Python script to backup all projects from a running Simal instance
using the REST API.
http://code.google.com/p/simal/source/detail?r=1995
Added:
/trunk/uk.ac.osswatch.simal.web/src/main/python
/trunk/uk.ac.osswatch.simal.web/src/main/python/backupAllProjects.py
=======================================
--- /dev/null
+++ /trunk/uk.ac.osswatch.simal.web/src/main/python/backupAllProjects.py
Tue Aug 10 04:34:23 2010
@@ -0,0 +1,133 @@
+'''
+This script writes a backup of all RDF/XML of the projects of a running
+Simal instance. Written in/for Python 3.
+'''
+import json
+import http.client
+import os
+import codecs
+
+SIMAL_INSTANCE = 'registry.oss-watch.ac.uk'
+SIMAL_REST_URL_PREFIX = '/simal-rest/project/source-simal/project-'
+SIMAL_REST_URL_SUFFIX = '/xml'
+PROJECT_FOLDER = 'allProjects/'
+
+MAX_FILENAME_LENGTH = 20
+
+def generate_path(projectLabel):
+ '''Generate path to a file for specified label and guarantee file does
not yet exist.'''
+ suffix = '.xml'
+ i = 1
+ filePath = PROJECT_FOLDER + sanitize(projectLabel, MAX_FILENAME_LENGTH)
+
+ while os.path.exists(filePath + suffix):
+ if i < 100:
+ filePath = filePath.rstrip('0' + str(i-1))
+ if i < 10:
+ filePath += '0'
+ filePath += str(i)
+ else:
+ raise IOError('Could not find non-existent path for: ' +
filePath)
+ i += 1
+
+ filePath += suffix
+ return filePath
+
+def sanitize(s, strLength):
+ '''Remove special characters and cut off at specified length.'''
+ s = s.replace(' ', '')
+ s = s.replace('/', '')
+ s = s.replace('\\', '')
+ s = s.replace('$', '')
+ s = s.replace('#', '')
+ s = s.replace(':', '')
+ s = s.replace('?', '')
+ s = s.replace('!', '')
+ s = s.replace('*', '')
+ s = s.replace('<', '')
+ s = s.replace('>', '')
+
+ return s[:strLength]
+
+def get_simal_connection():
+ ''' Return a connection to the configured running Simal instance '''
+ return http.client.HTTPConnection(SIMAL_INSTANCE)
+
+def process_simal_project(simalID, projectLabel):
+ '''Retrieve RDF/XML for the specified project and write it to file.'''
+ simalProjectUrl = SIMAL_REST_URL_PREFIX + simalID +
SIMAL_REST_URL_SUFFIX
+ conn = get_simal_connection()
+ try:
+ filePath = generate_path(projectLabel)
+ conn.request('GET', simalProjectUrl)
+ response = conn.getresponse()
+
+ if (response.status == http.client.OK):
+ data1 = response.read().decode('utf-8', 'ignore')
+ projectFile = open(filePath,'w',encoding='utf-8')
+ projectFile.write(data1)
+ projectFile.close()
+ else:
+ print('Problem getting project ' + simalID + '; reason: ' +
response.reason)
+ except Exception as e:
+ msg = str(e)
+ print('Could not process project \'' + projectLabel + '\': ' + msg)
+ if msg.find('encode characters in position ') != -1:
+ lstripped = msg[int(msg.find('position ') + 9):]
+ probChar = int(lstripped[:lstripped.find('-')])
+ print(simalProjectUrl + ' ' + data1[probChar-100:probChar+10])
+
+def get_all_projects_json():
+ '''Retrieve all projects as JSON from the running Simal instance.'''
+ allProjectsJson = ''
+ try:
+ simalAllProjectsUrl = '/simal-rest/allProjects/json'
+ conn = get_simal_connection()
+ conn.request('GET', simalAllProjectsUrl)
+ response = conn.getresponse()
+
+ if (response.status == http.client.OK):
+ allProjectsJson = response.read().decode('utf-8', 'ignore')
+ else:
+ msg = 'HTTP response was: ' + response.reason
+ raise Exception(msg)
+ except Exception as e:
+ print('Problem getting all projects from Simal: ' + str(e))
+ raise e
+
+ return allProjectsJson
+
+def init():
+ ''' Make sure folder to write projects to exists.'''
+ if not os.path.exists(PROJECT_FOLDER):
+ os.makedirs(PROJECT_FOLDER)
+
+def main():
+ try:
+ init()
+ allProjectsJson = get_all_projects_json()
+
+ # Next is only for Simal instances that don't yet escape JSON
properly
+ # See Issue 339 / 73
+ allProjectsJson = allProjectsJson.replace('\\\'','')
+
+ allProjects = json.loads(allProjectsJson)
+
+ print('Backing up ' + str(len(allProjects.get('items'))) + '
projects.')
+
+ for project in allProjects.get('items'):
+ simalID = project.get('simalID')
+ label = project.get('label')
+ if not label.startswith('http://'): # Filter out empty projects
+ process_simal_project(simalID, label)
+
+ except Exception as e:
+ msg = str(e)
+ print('Problem backing up all projects: ' + msg)
+ if msg.find('Invalid \\escape:') != -1:
+ probChar = int(msg[int(msg.find('(char ') + 6):].rstrip(')'))
+ print(allProjectsJson[probChar-100:probChar+10])
+ raise e
+
+if __name__ == "__main__":
+ main()
--
You received this message because you are subscribed to the Google Groups "Simal
Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/simal-commits?hl=en.