Author: sebb
Date: Fri Feb 12 22:30:18 2016
New Revision: 1730132
URL: http://svn.apache.org/viewvc?rev=1730132&view=rev
Log:
Delete any projects/*.json files that were not in projects.xml
Modified:
comdev/projects.apache.org/scripts/cronjobs/parseprojects.py
Modified: comdev/projects.apache.org/scripts/cronjobs/parseprojects.py
URL:
http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/parseprojects.py?rev=1730132&r1=1730131&r2=1730132&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/parseprojects.py (original)
+++ comdev/projects.apache.org/scripts/cronjobs/parseprojects.py Fri Feb 12
22:30:18 2016
@@ -7,6 +7,7 @@ import xml.etree.ElementTree as ET
import re, urllib.request
import json
import os
+from os.path import join
import traceback
"""
@@ -21,10 +22,15 @@ Writes:
../../site/json/projects/%s.json
parseprojects-failures.xml (if failures occurred)
+Deletes any obsolete files from:
+../../site/json/projects/%s.json
+
"""
URL_TIMEOUT = 60.0 # timeout for URL requests (may need tweaking)
+PROJECTS_DIR = '../../site/json/projects'
+
projectsList = "../../data/projects.xml";
save = True;
if os.path.exists("parseprojects-failures.xml"):
@@ -115,6 +121,7 @@ def handleChild(el):
break
return tag, retval
+files = []
for s in itemlist :
url = s.childNodes[0].data
try:
@@ -205,8 +212,10 @@ for s in itemlist :
projects[projectJsonFilename] = pjson
#for e in add:
# pjson[e] = add[e]
- print("Writing projects/%s" % projectJsonFilename+".json")
- with open ("../../site/json/projects/%s.json" %
projectJsonFilename, "w", encoding='utf-8') as f:
+ name = "%s.json" % projectJsonFilename
+ print("Writing projects/%s" % name)
+ files.append(name)
+ with open (join(PROJECTS_DIR, name), "w", encoding='utf-8') as f:
json.dump(pjson, f, sort_keys=True, indent=0,
ensure_ascii=False)
f.close()
else:
@@ -228,6 +237,12 @@ if save:
json.dump(projects, f, sort_keys=True, indent=0, ensure_ascii=False)
f.close()
+# Drop any obsolete files
+for f in os.listdir(PROJECTS_DIR):
+ if re.match(r'.*\.json$', f) and not f in files:
+ print("Deleting obsolete file projects/%s" %f)
+ os.remove(join(PROJECTS_DIR,f))
+
if len(failures) > 0:
with open ("parseprojects-failures.xml", "w") as f:
f.write("<doapFiles>\n")
@@ -243,4 +258,5 @@ else:
except FileNotFoundError: # should not happen
pass
+
print("Done!")