This is an automated email from the ASF dual-hosted git repository. diru pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/sling-whiteboard.git
commit 6ca7662b3afe9f07ebc8bc5e86614360f71943a4 Author: Dirk Rudolph <[email protected]> AuthorDate: Mon Jun 7 12:02:29 2021 +0200 add sitemap storage cleanup implementation --- .../apache/sling/sitemap/impl/SitemapStorage.java | 180 +++++++++++++++------ .../sitemap/impl/SitemapGeneratorExecutorTest.java | 2 +- .../sling/sitemap/impl/SitemapServletTest.java | 4 +- .../sling/sitemap/impl/SitemapStorageTest.java | 83 ++++++++++ 4 files changed, 219 insertions(+), 50 deletions(-) diff --git a/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java b/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java index b31786e..4ffa9c4 100644 --- a/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java +++ b/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java @@ -20,10 +20,13 @@ package org.apache.sling.sitemap.impl; import org.apache.commons.io.IOUtils; import org.apache.jackrabbit.JcrConstants; +import org.apache.sling.api.SlingConstants; import org.apache.sling.api.resource.*; import org.apache.sling.api.wrappers.ValueMapDecorator; +import org.apache.sling.commons.scheduler.Scheduler; import org.apache.sling.serviceusermapping.ServiceUserMapped; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import org.osgi.service.component.annotations.Activate; import org.osgi.service.component.annotations.Component; import org.osgi.service.component.annotations.Reference; @@ -39,13 +42,21 @@ import java.io.OutputStream; import java.util.*; import java.util.function.Predicate; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import static org.apache.sling.sitemap.impl.SitemapUtil.*; -@Component(service = SitemapStorage.class) +@Component( + service = {SitemapStorage.class, Runnable.class}, + property = { + Scheduler.PROPERTY_SCHEDULER_NAME + "=sitemap-storage-cleanup", + Scheduler.PROPERTY_SCHEDULER_CONCURRENT + "=false", + Scheduler.PROPERTY_SCHEDULER_RUN_ON + "=" + Scheduler.VALUE_RUN_ON_SINGLE + } +) @Designate(ocd = SitemapStorage.Configuration.class) -public class SitemapStorage { +public class SitemapStorage implements Runnable { @ObjectClassDefinition(name = "Apache Sling Sitemap - Storage") @interface Configuration { @@ -57,6 +68,10 @@ public class SitemapStorage { @AttributeDefinition(name = "Max State Age", description = "The number of milliseconds after which an " + "intermediate state will deleted.") int stateMaxAge() default 60 * 30 * 1000; + + @AttributeDefinition(name = "Cleanup Schedule", description = "A cron expression defining the schedule at " + + "which stale intermediate states and old sitemaps will be removed.") + String scheduler_expression() default "0 0 1 * * ?"; } static final String PN_SITEMAP_ENTRIES = "entries"; @@ -67,11 +82,16 @@ public class SitemapStorage { "sitemap-writer"); private static final String STATE_EXTENSION = ".part"; private static final String XML_EXTENSION = ".xml"; + private static final String RT_SITEMAP_PART = "sling/sitemap/part"; + private static final String RT_SITEMAP_FILE = "sling/sitemap/file"; + private static final String PN_RESOURCE_TYPE = SlingConstants.NAMESPACE_PREFIX + ':' + SlingConstants.PROPERTY_RESOURCE_TYPE; @Reference private ResourceResolverFactory resourceResolverFactory; @Reference(target = "(subServiceName=sitemap-writer)") private ServiceUserMapped serviceUserMapped; + @Reference + private SitemapGeneratorManager generatorManager; private String rootPath = "/var/sitemaps"; private int maxStateAge = Integer.MAX_VALUE; @@ -82,8 +102,30 @@ public class SitemapStorage { maxStateAge = configuration.stateMaxAge(); } + @Override + public void run() { + try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) { + Iterator<Resource> descendants = traverse(resolver.getResource(rootPath)).iterator(); + List<Resource> toDelete = new LinkedList<>(); + while (descendants.hasNext()) { + Resource descendant = descendants.next(); + if (descendant.isResourceType(RT_SITEMAP_PART) && isExpired(descendant)) { + toDelete.add(descendant); + } else if (descendant.isResourceType(RT_SITEMAP_FILE) && !doesSitemapRootExist(descendant)) { + toDelete.add(descendant); + } + } + for (Resource resource : toDelete) { + resolver.delete(resource); + } + resolver.commit(); + } catch (LoginException | PersistenceException ex) { + LOG.warn("Failed to cleanup storage: {}", ex.getMessage(), ex); + } + } + @NotNull - public ValueMap getState(@NotNull Resource sitemapRoot, @NotNull String name) throws IOException { + ValueMap getState(@NotNull Resource sitemapRoot, @NotNull String name) throws IOException { String statePath = getSitemapFilePath(sitemapRoot, name) + STATE_EXTENSION; try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) { Resource state = resolver.getResource(statePath); @@ -91,19 +133,10 @@ public class SitemapStorage { if (state == null) { return ValueMap.EMPTY; } - ValueMap stateProperties = state.getValueMap(); - Calendar lastModified = stateProperties.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class); - if (lastModified != null) { - // advance lastModified by maxStateAge to get the point in time the state would expire - lastModified.add(Calendar.MILLISECOND, maxStateAge); - // check if the expire time is in the future, if so return the state, if not fallback to an empty state - if (lastModified.after(Calendar.getInstance())) { - return new ValueMapDecorator(new HashMap<>(state.getValueMap())); - } else if (LOG.isDebugEnabled()) { - LOG.debug("State at {} expired at {}", statePath, lastModified.getTime().toGMTString()); - } - } - return ValueMap.EMPTY; + + return !isExpired(state) + ? new ValueMapDecorator(new HashMap<>(state.getValueMap())) + : ValueMap.EMPTY; } catch (LoginException ex) { throw new IOException("Cannot read state at " + statePath, ex); } @@ -116,19 +149,20 @@ public class SitemapStorage { Resource folder = getOrCreateFolder(resolver, ResourceUtil.getParent(statePath)); String stateName = ResourceUtil.getName(statePath); Resource stateResource = folder.getChild(stateName); - if (stateResource != null) { + if (stateResource == null) { + Map<String, Object> properties = new HashMap<>(state.size() + 1); + properties.putAll(state); + properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED); + properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance()); + properties.put(PN_RESOURCE_TYPE, RT_SITEMAP_PART); + resolver.create(folder, stateName, properties); + } else { ModifiableValueMap properties = stateResource.adaptTo(ModifiableValueMap.class); if (properties == null) { throw new IOException("Cannot modify properties of existing state: " + statePath); } properties.putAll(state); properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance()); - } else { - Map<String, Object> properties = new HashMap<>(state.size() + 1); - properties.putAll(state); - properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED); - properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance()); - resolver.create(folder, stateName, properties); } resolver.commit(); } catch (LoginException | PersistenceException ex) { @@ -163,34 +197,21 @@ public class SitemapStorage { if (sitemapResource == null) { Map<String, Object> properties = new HashMap<>(3); properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED); + properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance()); + properties.put(JcrConstants.JCR_DATA, data); properties.put(PN_SITEMAP_ENTRIES, entries); properties.put(PN_SITEMAP_SIZE, size); - sitemapResource = resolver.create(folder, sitemapFileName, properties); + properties.put(PN_RESOURCE_TYPE, RT_SITEMAP_FILE); + resolver.create(folder, sitemapFileName, properties); } else { ModifiableValueMap properties = sitemapResource.adaptTo(ModifiableValueMap.class); if (properties == null) { throw new IOException("Cannot overwrite existing sitemap at: " + sitemapFilePath); } - properties.put(PN_SITEMAP_ENTRIES, entries); - properties.put(PN_SITEMAP_SIZE, size); - } - - Resource sitemapContent = sitemapResource.getChild(JcrConstants.JCR_CONTENT); - if (sitemapContent == null) { - Map<String, Object> properties = new HashMap<>(); - properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_RESOURCE); - properties.put(JcrConstants.JCR_ENCODING, "UTF-8"); - properties.put(JcrConstants.JCR_MIMETYPE, "application/xml"); - properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance()); - properties.put(JcrConstants.JCR_DATA, data); - resolver.create(sitemapResource, JcrConstants.JCR_CONTENT, properties); - } else { - ModifiableValueMap properties = sitemapContent.adaptTo(ModifiableValueMap.class); - if (properties == null) { - throw new IOException("Cannot overwrite existing sitemap at: " + sitemapFilePath); - } properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance()); properties.put(JcrConstants.JCR_DATA, data); + properties.put(PN_SITEMAP_ENTRIES, entries); + properties.put(PN_SITEMAP_SIZE, size); } Resource stateResource = resolver.getResource(statePath); @@ -243,13 +264,11 @@ public class SitemapStorage { } return StreamSupport.stream(storageResource.getChildren().spliterator(), false) .filter(child -> child.getName().endsWith(XML_EXTENSION)) + .filter(child -> child.isResourceType(RT_SITEMAP_FILE)) .map(child -> new SitemapStorageInfo( child.getPath(), child.getName().substring(0, child.getName().lastIndexOf('.')), - Optional.ofNullable(child.getChild(JcrConstants.JCR_CONTENT)) - .map(Resource::getValueMap) - .map(content -> content.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class)) - .orElse(null), + child.getValueMap().get(JcrConstants.JCR_LASTMODIFIED, Calendar.class), child.getValueMap().get(PN_SITEMAP_SIZE, 0), child.getValueMap().get(PN_SITEMAP_ENTRIES, 0))) .filter(filter) @@ -267,7 +286,8 @@ public class SitemapStorage { String sitemapFilePath = rootPath + sitemapRoot.getPath() + '/' + sitemapSelector + XML_EXTENSION; try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) { InputStream data = Optional.ofNullable(resolver.getResource(sitemapFilePath)) - .map(r -> r.getChild(JcrConstants.JCR_CONTENT)) + .filter(r -> r.getName().endsWith(XML_EXTENSION)) + .filter(r -> r.isResourceType(RT_SITEMAP_FILE)) .map(r -> r.getValueMap().get(JcrConstants.JCR_DATA, InputStream.class)) .orElse(null); @@ -284,6 +304,61 @@ public class SitemapStorage { } } + /** + * Returns true the sitemap root of the given file exists. This first checks if the file is in a top level sitemap's + * storage path and if so, checks if the selector resolves to at least one root. + * + * @param sitemapFile + * @return + */ + private boolean doesSitemapRootExist(Resource sitemapFile) { + String path = sitemapFile.getPath(); + String parentPath = ResourceUtil.getParent(path); + String sitemapRootPath = parentPath.substring(rootPath.length()); + Resource sitemapRoot = sitemapFile.getResourceResolver().getResource(sitemapRootPath); + + if (sitemapRoot == null || !SitemapUtil.isTopLevelSitemapRoot(sitemapRoot)) { + LOG.debug("Sitemap file's top level sitemap root does not exist: {}", sitemapRootPath); + return false; + } + + String name = sitemapFile.getName(); + int lastDot = name.lastIndexOf('.'); + + if (lastDot < 0) { + LOG.debug("Unexpected name, missing extension: {}", name); + return false; + } + + Map<Resource, String> candidates = SitemapUtil.resolveSitemapRoots(sitemapRoot, name.substring(0, lastDot)); + // check if for any of the candidate resource roots a generator with the name exists + return candidates.entrySet().stream() + .map(entry -> generatorManager.getApplicableNames(entry.getKey(), Collections.singleton(entry.getValue()))) + .anyMatch(names -> names.size() > 0); + } + + /** + * Returns true when the state expired according to the configured maximum age. + * + * @param state + * @return + */ + private boolean isExpired(@NotNull Resource state) { + ValueMap stateProperties = state.getValueMap(); + Calendar lastModified = stateProperties.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class); + if (lastModified != null) { + // advance lastModified by maxStateAge to get the point in time the state would expire + lastModified.add(Calendar.MILLISECOND, maxStateAge); + // check if the expire time is in the future + if (lastModified.after(Calendar.getInstance())) { + return false; + } else if (LOG.isDebugEnabled()) { + LOG.debug("State at {} expired at {}", state.getPath(), lastModified.getTime().toGMTString()); + } + } + return true; + } + @NotNull private String getSitemapFilePath(@NotNull Resource sitemapRoot, @NotNull String name) { Resource topLevelSitemapRoot = getTopLevelSitemapRoot(sitemapRoot); @@ -315,4 +390,15 @@ public class SitemapStorage { } return folder; } + + private static Stream<Resource> traverse(@Nullable Resource resource) { + if (resource == null) { + return Stream.empty(); + } + + return Stream.concat( + Stream.of(resource), + StreamSupport.stream(resource.getChildren().spliterator(), false).flatMap(SitemapStorage::traverse) + ); + } } diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java index 226fb3d..d000c35 100644 --- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java +++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java @@ -88,8 +88,8 @@ public class SitemapGeneratorExecutorTest { context.registerService(ServiceUserMapped.class, serviceUser, "subServiceName", "sitemap-writer"); context.registerService(SitemapGenerator.class, generator); context.registerService(JobManager.class, jobManager); - context.registerInjectActivateService(storage); context.registerInjectActivateService(generatorManager); + context.registerInjectActivateService(storage); context.registerInjectActivateService(extensionProviderManager); context.registerInjectActivateService(sitemapService); diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java index 02b4172..bc678ae 100644 --- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java +++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java @@ -230,9 +230,9 @@ public class SitemapServletTest { byte[] expectedOutcomeBytes = expectedOutcome.getBytes(StandardCharsets.UTF_8); context.registerInjectActivateService(sitemapService, "onDemandNames", "news"); context.registerInjectActivateService(subject); - storage.writeSitemap(root, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(expectedOutcomeBytes), + storage.writeSitemap(root, "foo", new ByteArrayInputStream(expectedOutcomeBytes), expectedOutcomeBytes.length, 1); - MockSlingHttpServletRequest request = newSitemapReq("sitemap", root); + MockSlingHttpServletRequest request = newSitemapReq("foo-sitemap", root); MockSlingHttpServletResponse response = context.response(); // when diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java index 55991b2..3603a69 100644 --- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java +++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java @@ -20,6 +20,7 @@ package org.apache.sling.sitemap.impl; import com.google.common.collect.ImmutableMap; import org.apache.commons.io.IOUtils; +import org.apache.sling.api.resource.ModifiableValueMap; import org.apache.sling.api.resource.Resource; import org.apache.sling.api.resource.ValueMap; import org.apache.sling.serviceusermapping.ServiceUserMapped; @@ -39,12 +40,15 @@ import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.nio.charset.StandardCharsets; +import java.util.Collections; import java.util.Set; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.hasItems; import static org.hamcrest.Matchers.hasSize; import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; @ExtendWith({SlingContextExtension.class, MockitoExtension.class}) public class SitemapStorageTest { @@ -52,16 +56,23 @@ public class SitemapStorageTest { public final SlingContext context = new SlingContext(); private final SitemapStorage subject = new SitemapStorage(); + private final SitemapGeneratorManager generatorManager = new SitemapGeneratorManager(); + @Mock(lenient = true) + private SitemapGenerator generator; @Mock private ServiceUserMapped serviceUser; @BeforeEach public void setup() { + context.registerService(SitemapGenerator.class, generator); context.registerService(ServiceUserMapped.class, serviceUser, "subServiceName", "sitemap-writer"); + context.registerInjectActivateService(generatorManager); context.registerInjectActivateService(subject, "stateMaxAge", 100 ); + + when(generator.getNames(any())).thenReturn(Collections.singleton(SitemapGenerator.DEFAULT_SITEMAP)); } @Test @@ -133,6 +144,78 @@ public class SitemapStorageTest { assertThat(sitemapNames, hasItems(storageInfo("foobar-sitemap"), storageInfo("sitemap"))); } + @Test + public void testCleanupExpiredStates() throws Exception { + // given + Resource root = context.create().resource("/content/site/de", ImmutableMap.of( + "sitemapRoot", Boolean.TRUE + )); + + // when + subject.writeState(root, SitemapGenerator.DEFAULT_SITEMAP, ImmutableMap.of("i", 1)); + + // then + assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.part")); + + // and when + Thread.sleep(100); + subject.run(); + + // then + assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.part")); + } + + @Test + public void testCleanupObsoleteSitemapsAfterTopLevelChanged() throws Exception { + // given + Resource newRoot = context.create().resource("/content/site/ch"); + Resource initialRoot = context.create().resource("/content/site/ch/de-ch", ImmutableMap.of( + "sitemapRoot", Boolean.TRUE + )); + + // when + subject.writeSitemap(initialRoot, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0); + + // then + assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/ch/de-ch/sitemap.xml")); + + // and when + newRoot.adaptTo(ModifiableValueMap.class).put("sitemapRoot", Boolean.TRUE); + context.resourceResolver().commit(); + subject.run(); + + // then + assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/ch/de-ch/sitemap.xml")); + } + + @Test + public void testCleanupObsoleteSitemapsAfterNestedSitemapRootChanged() throws Exception { + // given + Resource root = context.create().resource("/content/site/de", ImmutableMap.of( + "sitemapRoot", Boolean.TRUE + )); + Resource news = context.create().resource("/content/site/de/news", ImmutableMap.of( + "sitemapRoot", Boolean.TRUE + )); + + // when + subject.writeSitemap(root, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0); + subject.writeSitemap(news, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0); + + // then + assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.xml")); + assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/news-sitemap.xml")); + + // and when + news.adaptTo(ModifiableValueMap.class).put("sitemapRoot", Boolean.FALSE); + context.resourceResolver().commit(); + subject.run(); + + // then + assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.xml")); + assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/news-sitemap.xml")); + } + static void assertResourceDataEquals(String expectedValue, Resource resource) throws IOException { assertNotNull(resource); InputStream inputStream = resource.adaptTo(InputStream.class);
