Added: manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/ConfluenceRepositoryConnector.java URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/ConfluenceRepositoryConnector.java?rev=1874903&view=auto ============================================================================== --- manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/ConfluenceRepositoryConnector.java (added) +++ manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/ConfluenceRepositoryConnector.java Fri Mar 6 15:29:45 2020 @@ -0,0 +1,1481 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.manifoldcf.crawler.connectors.confluence.v6; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.io.StringReader; +import java.text.DateFormat; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.lang.StringUtils; +import org.apache.manifoldcf.agents.interfaces.RepositoryDocument; +import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; +import org.apache.manifoldcf.core.interfaces.ConfigParams; +import org.apache.manifoldcf.core.interfaces.IHTTPOutput; +import org.apache.manifoldcf.core.interfaces.IPasswordMapperActivity; +import org.apache.manifoldcf.core.interfaces.IPostParameters; +import org.apache.manifoldcf.core.interfaces.IThreadContext; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.core.interfaces.Specification; +import org.apache.manifoldcf.core.interfaces.SpecificationNode; +import org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.client.ConfluenceClient; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Attachment; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.ConfluenceResponse; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.ConfluenceRestrictionsResponse; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.PageType; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions.ReadRestrictions; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Space; +import org.apache.manifoldcf.crawler.connectors.confluence.v6.util.ConfluenceUtil; +import org.apache.manifoldcf.crawler.interfaces.IExistingVersions; +import org.apache.manifoldcf.crawler.interfaces.IProcessActivity; +import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity; +import org.apache.manifoldcf.crawler.system.Logging; +import org.json.simple.JSONArray; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Optional; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +/** + * <p> + * Confluence Repository Connector class + * </p> + * <p> + * ManifoldCF Repository connector to deal with Confluence documents + * </p> + * + * @author Julien Massiera & Antonio David Perez Morales + * + */ +public class ConfluenceRepositoryConnector extends BaseRepositoryConnector { + + protected final static String ACTIVITY_READ = "read document"; + + /** Deny access token for default authority */ + private final static String defaultAuthorityDenyToken = GLOBAL_DENY_TOKEN; + + private static final String CHILD_PREFIX = "child+"; + + /* + * Prefix for Confluence configuration and specification parameters + */ + private static final String PARAMETER_PREFIX = "confluence_"; + + /* Configuration tabs */ + private static final String CONF_SERVER_TAB_PROPERTY = "ConfluenceRepositoryConnector.Server"; + + /* Specification tabs */ + private static final String CONF_SECURITY_TAB_PROPERTY = "ConfluenceRepositoryConnector.Security"; + private static final String CONF_SPACES_TAB_PROPERTY = "ConfluenceRepositoryConnector.Spaces"; + private static final String CONF_PAGES_TAB_PROPERTY = "ConfluenceRepositoryConnector.Pages"; + + // pages & js + // Template names for Confluence configuration + /** + * Forward to the javascript to check the configuration parameters + */ + private static final String EDIT_CONFIG_HEADER_FORWARD = "editConfiguration_conf.js"; + /** + * Server tab template + */ + private static final String EDIT_CONFIG_FORWARD_SERVER = "editConfiguration_conf_server.html"; + + /** + * Forward to the HTML template to view the configuration parameters + */ + private static final String VIEW_CONFIG_FORWARD = "viewConfiguration_conf.html"; + + // Template names for Confluence job specification + /** + * Forward to the javascript to check the specification parameters for the job + */ + private static final String EDIT_SPEC_HEADER_FORWARD = "editSpecification_conf.js"; + /** + * Forward to the template to edit the security for the job + */ + private static final String EDIT_SPEC_FORWARD_SECURITY = "editSpecification_confSecurity.html"; + /** + * Forward to the template to edit the spaces for the job + */ + private static final String EDIT_SPEC_FORWARD_SPACES = "editSpecification_confSpaces.html"; + + /** + * Forward to the template to edit the pages configuration for the job + */ + private static final String EDIT_SPEC_FORWARD_CONF_PAGES = "editSpecification_confPages.html"; + + /** + * Forward to the template to view the specification parameters for the job + */ + private static final String VIEW_SPEC_FORWARD = "viewSpecification_conf.html"; + + protected long lastSessionFetch = -1L; + protected static final long timeToRelease = 300000L; + + protected final static long interruptionRetryTime = 5L * 60L * 1000L; + + private final Logger logger = LoggerFactory.getLogger(ConfluenceRepositoryConnector.class); + + /* Confluence instance parameters */ + protected String protocol = null; + protected String host = null; + protected String port = null; + protected String path = null; + protected String username = null; + protected String password = null; + protected String socketTimeout = null; + protected String connectionTimeout = null; + + protected ConfluenceClient confluenceClient = null; + + /** + * <p> + * Default constructor + * </p> + */ + public ConfluenceRepositoryConnector() { + super(); + } + + /** + * Set Confluence Client (Mainly for Testing) + * + * @param confluenceClient + */ + public void setConfluenceClient(final ConfluenceClient confluenceClient) { + this.confluenceClient = confluenceClient; + } + + @Override + public String[] getActivitiesList() { + return new String[] { ACTIVITY_READ }; + } + + @Override + public String[] getBinNames(final String documentIdentifier) { + return new String[] { host }; + } + + /** + * Close the connection. Call this before discarding the connection. + */ + @Override + public void disconnect() throws ManifoldCFException { + if (confluenceClient != null) { + confluenceClient = null; + } + + protocol = null; + host = null; + port = null; + path = null; + username = null; + password = null; + socketTimeout = null; + connectionTimeout = null; + + } + + /** + * Makes connection to server + * + * + */ + @Override + public void connect(final ConfigParams configParams) { + super.connect(configParams); + + protocol = params.getParameter(ConfluenceConfiguration.Server.PROTOCOL); + host = params.getParameter(ConfluenceConfiguration.Server.HOST); + port = params.getParameter(ConfluenceConfiguration.Server.PORT); + path = params.getParameter(ConfluenceConfiguration.Server.PATH); + username = params.getParameter(ConfluenceConfiguration.Server.USERNAME); + password = params.getObfuscatedParameter(ConfluenceConfiguration.Server.PASSWORD); + socketTimeout = params.getParameter(ConfluenceConfiguration.Server.SOCKET_TIMEOUT); + connectionTimeout = params.getParameter(ConfluenceConfiguration.Server.CONNECTION_TIMEOUT); + + try { + initConfluenceClient(); + } catch (final ManifoldCFException e) { + logger.debug("Not possible to initialize Confluence client. Reason: {}", e.getMessage()); + e.printStackTrace(); + } + } + + /** + * Checks if connection is available + */ + @Override + public String check() throws ManifoldCFException { + try { + if (!isConnected()) { + initConfluenceClient(); + } + final Boolean result = confluenceClient.check(); + if (result) { + return super.check(); + } else { + throw new ManifoldCFException("Confluence instance could not be reached"); + } + } catch (final ServiceInterruption e) { + return "Connection temporarily failed: " + e.getMessage(); + } catch (final ManifoldCFException e) { + return "Connection failed: " + e.getMessage(); + } catch (final Exception e) { + return "Connection failed: " + e.getMessage(); + } + } + + /** + * <p> + * Initialize Confluence client using the configured parameters + * + * @throws ManifoldCFException + */ + protected void initConfluenceClient() throws ManifoldCFException { + if (confluenceClient == null) { + + if (StringUtils.isEmpty(protocol)) { + throw new ManifoldCFException("Parameter " + ConfluenceConfiguration.Server.PROTOCOL + " required but not set"); + } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence protocol = '" + protocol + "'"); + } + + if (StringUtils.isEmpty(host)) { + throw new ManifoldCFException("Parameter " + ConfluenceConfiguration.Server.HOST + " required but not set"); + } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence host = '" + host + "'"); + } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence port = '" + port + "'"); + } + +// if (StringUtils.isEmpty(path)) { +// throw new ManifoldCFException("Parameter " +// + ConfluenceConfiguration.Server.PATH +// + " required but not set"); +// } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence path = '" + path + "'"); + } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence username = '" + username + "'"); + } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence password '" + password != null ? "set" : "not set" + "'"); + } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence socket timeout = '" + socketTimeout + "'"); + } + + if (Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence connection timeout = '" + connectionTimeout + "'"); + } + + int portInt; + if (port != null && port.length() > 0) { + try { + portInt = Integer.parseInt(port); + } catch (final NumberFormatException e) { + throw new ManifoldCFException("Bad number: " + e.getMessage(), e); + } + } else { + if (protocol.toLowerCase(Locale.ROOT).equals("http")) { + portInt = 80; + } else { + portInt = 443; + } + } + + int socketTimeoutInt; + if (socketTimeout != null && socketTimeout.length() > 0) { + try { + socketTimeoutInt = Integer.parseInt(socketTimeout); + } catch (final NumberFormatException e) { + throw new ManifoldCFException("Bad number: " + e.getMessage(), e); + } + } else { + socketTimeoutInt = 900000; + } + + int connectionTimeoutInt; + if (connectionTimeout != null && connectionTimeout.length() > 0) { + try { + connectionTimeoutInt = Integer.parseInt(connectionTimeout); + } catch (final NumberFormatException e) { + throw new ManifoldCFException("Bad number: " + e.getMessage(), e); + } + } else { + connectionTimeoutInt = 60000; + } + + /* Generating a client to perform Confluence requests */ + confluenceClient = new ConfluenceClient(protocol, host, portInt, path, username, password, socketTimeoutInt, connectionTimeoutInt); + lastSessionFetch = System.currentTimeMillis(); + } + + } + + /** + * This method is called to assess whether to count this connector instance should actually be counted as being connected. + * + * @return true if the connector instance is actually connected. + */ + @Override + public boolean isConnected() { + return confluenceClient != null; + } + + @Override + public void poll() throws ManifoldCFException { + if (lastSessionFetch == -1L) { + return; + } + + final long currentTime = System.currentTimeMillis(); + if (currentTime >= lastSessionFetch + timeToRelease) { + confluenceClient.close(); + confluenceClient = null; + lastSessionFetch = -1L; + } + } + + @Override + public int getMaxDocumentRequest() { + return super.getMaxDocumentRequest(); + } + + /** + * Return the list of relationship types that this connector recognizes. + * + * @return the list. + */ + @Override + public String[] getRelationshipTypes() { + return new String[] {}; + } + + private void fillInServerConfigurationMap(final Map<String, String> serverMap, final IPasswordMapperActivity mapper, final ConfigParams parameters) { + String confluenceProtocol = parameters.getParameter(ConfluenceConfiguration.Server.PROTOCOL); + String confluenceHost = parameters.getParameter(ConfluenceConfiguration.Server.HOST); + String confluencePort = parameters.getParameter(ConfluenceConfiguration.Server.PORT); + String confluencePath = parameters.getParameter(ConfluenceConfiguration.Server.PATH); + String confluenceUsername = parameters.getParameter(ConfluenceConfiguration.Server.USERNAME); + String confluencePassword = parameters.getObfuscatedParameter(ConfluenceConfiguration.Server.PASSWORD); + String confluenceSocketTimeout = parameters.getParameter(ConfluenceConfiguration.Server.SOCKET_TIMEOUT); + String confluenceConnectionTimeout = parameters.getParameter(ConfluenceConfiguration.Server.CONNECTION_TIMEOUT); + + if (confluenceProtocol == null) { + confluenceProtocol = ConfluenceConfiguration.Server.PROTOCOL_DEFAULT_VALUE; + } + if (confluenceHost == null) { + confluenceHost = ConfluenceConfiguration.Server.HOST_DEFAULT_VALUE; + } + if (confluencePort == null) { + confluencePort = ConfluenceConfiguration.Server.PORT_DEFAULT_VALUE; + } + if (confluencePath == null) { + confluencePath = ConfluenceConfiguration.Server.PATH_DEFAULT_VALUE; + } + + if (confluenceUsername == null) { + confluenceUsername = ConfluenceConfiguration.Server.USERNAME_DEFAULT_VALUE; + } + if (confluencePassword == null) { + confluencePassword = ConfluenceConfiguration.Server.PASSWORD_DEFAULT_VALUE; + } else { + confluencePassword = mapper.mapPasswordToKey(confluencePassword); + } + + if (confluenceSocketTimeout == null) { + confluenceSocketTimeout = ConfluenceConfiguration.Server.SOCKET_TIMEOUT_DEFAULT_VALUE; + } + if (confluenceConnectionTimeout == null) { + confluenceConnectionTimeout = ConfluenceConfiguration.Server.CONNECTION_TIMEOUT_DEFAULT_VALUE; + } + + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PROTOCOL, confluenceProtocol); + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.HOST, confluenceHost); + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PORT, confluencePort); + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PATH, confluencePath); + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.USERNAME, confluenceUsername); + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PASSWORD, confluencePassword); + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.SOCKET_TIMEOUT, confluenceSocketTimeout); + serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.CONNECTION_TIMEOUT, confluenceConnectionTimeout); + } + + @Override + public void viewConfiguration(final IThreadContext threadContext, final IHTTPOutput out, final Locale locale, final ConfigParams parameters) throws ManifoldCFException, IOException { + final Map<String, String> paramMap = new HashMap<String, String>(); + + /* Fill server configuration parameters */ + fillInServerConfigurationMap(paramMap, out, parameters); + + Messages.outputResourceWithVelocity(out, locale, VIEW_CONFIG_FORWARD, paramMap, true); + } + + @Override + public void outputConfigurationHeader(final IThreadContext threadContext, final IHTTPOutput out, final Locale locale, final ConfigParams parameters, final List<String> tabsArray) + throws ManifoldCFException, IOException { + // Add the Server tab + tabsArray.add(Messages.getString(locale, CONF_SERVER_TAB_PROPERTY)); + // Map the parameters + final Map<String, String> paramMap = new HashMap<String, String>(); + + /* Fill server configuration parameters */ + fillInServerConfigurationMap(paramMap, out, parameters); + + // Output the Javascript - only one Velocity template for all tabs + Messages.outputResourceWithVelocity(out, locale, EDIT_CONFIG_HEADER_FORWARD, paramMap, true); + } + + @Override + public void outputConfigurationBody(final IThreadContext threadContext, final IHTTPOutput out, final Locale locale, final ConfigParams parameters, final String tabName) + throws ManifoldCFException, IOException { + + // Call the Velocity templates for each tab + final Map<String, String> paramMap = new HashMap<String, String>(); + // Set the tab name + paramMap.put("TabName", tabName); + + // Fill in the parameters + fillInServerConfigurationMap(paramMap, out, parameters); + + // Server tab + Messages.outputResourceWithVelocity(out, locale, EDIT_CONFIG_FORWARD_SERVER, paramMap, true); + + } + + /* + * Repository specification post handle, (server and proxy & client secret etc) + * + * @see org.apache.manifoldcf.core.connector.BaseConnector#processConfigurationPost (org.apache.manifoldcf.core.interfaces.IThreadContext, + * org.apache.manifoldcf.core.interfaces.IPostParameters, org.apache.manifoldcf.core.interfaces.ConfigParams) + */ + @Override + public String processConfigurationPost(final IThreadContext threadContext, final IPostParameters variableContext, final ConfigParams parameters) throws ManifoldCFException { + + final String confluenceProtocol = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PROTOCOL); + if (confluenceProtocol != null) { + parameters.setParameter(ConfluenceConfiguration.Server.PROTOCOL, confluenceProtocol); + } + + final String confluenceHost = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.HOST); + if (confluenceHost != null) { + parameters.setParameter(ConfluenceConfiguration.Server.HOST, confluenceHost); + } + + final String confluencePort = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PORT); + if (confluencePort != null) { + parameters.setParameter(ConfluenceConfiguration.Server.PORT, confluencePort); + } + + final String confluencePath = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PATH); + if (confluencePath != null) { + parameters.setParameter(ConfluenceConfiguration.Server.PATH, confluencePath); + } + + final String confluenceUsername = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.USERNAME); + if (confluenceUsername != null) { + parameters.setParameter(ConfluenceConfiguration.Server.USERNAME, confluenceUsername); + } + + final String confluencePassword = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PASSWORD); + if (confluencePassword != null) { + parameters.setObfuscatedParameter(ConfluenceConfiguration.Server.PASSWORD, variableContext.mapKeyToPassword(confluencePassword)); + } + + final String confluenceSocketTimeout = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.SOCKET_TIMEOUT); + if (confluenceSocketTimeout != null) { + parameters.setParameter(ConfluenceConfiguration.Server.SOCKET_TIMEOUT, confluenceSocketTimeout); + } + + final String confluenceConnectionTimeout = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.CONNECTION_TIMEOUT); + if (confluenceConnectionTimeout != null) { + parameters.setParameter(ConfluenceConfiguration.Server.CONNECTION_TIMEOUT, confluenceConnectionTimeout); + } + + /* null means process configuration has been successful */ + return null; + } + + /** + * <p> + * Fill the configured spaces into the map + * </p> + * + * @param newMap + * @param cs + */ + private void fillInConfSpacesSpecificationMap(final Map<String, Object> newMap, final ConfluenceSpecification cs) { + + newMap.put(ConfluenceConfiguration.Specification.SPACES.toUpperCase(Locale.ROOT), cs.getSpaces()); + } + + private void fillInConfSecuritySpecificationMap(final Map<String, Object> newMap, final ConfluenceSpecification cs) { + + newMap.put(ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY.toUpperCase(Locale.ROOT), cs.isSecurityActive().toString()); + return; + + } + + /** + * <p> + * Fill the pages configuration into the map + * </p> + * + * @param newMap + * @param cs + */ + private void fillInConfPagesSpecificationMap(final Map<String, Object> newMap, final ConfluenceSpecification cs) { + + newMap.put(ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY.toUpperCase(Locale.ROOT), cs.isProcessAttachments().toString()); + newMap.put(ConfluenceConfiguration.Specification.PAGETYPE.toUpperCase(Locale.ROOT), cs.getPageType()); + return; + + } + + @Override + public void viewSpecification(final IHTTPOutput out, final Locale locale, final Specification ds, final int connectionSequenceNumber) throws ManifoldCFException, IOException { + + final Map<String, Object> paramMap = new HashMap<String, Object>(); + paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); + + final ConfluenceSpecification cs = ConfluenceSpecification.from(ds); + + fillInConfSecuritySpecificationMap(paramMap, cs); + fillInConfSpacesSpecificationMap(paramMap, cs); + fillInConfPagesSpecificationMap(paramMap, cs); + + Messages.outputResourceWithVelocity(out, locale, VIEW_SPEC_FORWARD, paramMap); + } + + /* + * Handle job specification post + * + * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# processSpecificationPost + * (org.apache.manifoldcf.core.interfaces.IPostParameters, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification) + */ + + @Override + public String processSpecificationPost(final IPostParameters variableContext, final Locale locale, final Specification ds, final int connectionSequenceNumber) throws ManifoldCFException { + + final String seqPrefix = "s" + connectionSequenceNumber + "_"; + + String xc = variableContext.getParameter(seqPrefix + "spacescount"); + if (xc != null) { + // Delete all preconfigured spaces + int i = 0; + while (i < ds.getChildCount()) { + final SpecificationNode sn = ds.getChild(i); + if (sn.getType().equals(ConfluenceConfiguration.Specification.SPACES)) { + ds.removeChild(i); + } else { + i++; + } + } + + final SpecificationNode spaces = new SpecificationNode(ConfluenceConfiguration.Specification.SPACES); + ds.addChild(ds.getChildCount(), spaces); + final int spacesCount = Integer.parseInt(xc); + i = 0; + while (i < spacesCount) { + final String spaceDescription = "_" + Integer.toString(i); + final String spaceOpName = seqPrefix + "spaceop" + spaceDescription; + xc = variableContext.getParameter(spaceOpName); + if (xc != null && xc.equals("Delete")) { + // Next row + i++; + continue; + } + // Get the stuff we need + final String spaceKey = variableContext.getParameter(seqPrefix + "space" + spaceDescription); + final SpecificationNode node = new SpecificationNode(ConfluenceConfiguration.Specification.SPACE); + node.setAttribute(ConfluenceConfiguration.Specification.SPACE_KEY_ATTRIBUTE, spaceKey); + spaces.addChild(spaces.getChildCount(), node); + i++; + } + + final String op = variableContext.getParameter(seqPrefix + "spaceop"); + if (op != null && op.equals("Add")) { + final String spaceSpec = variableContext.getParameter(seqPrefix + "space"); + final SpecificationNode node = new SpecificationNode(ConfluenceConfiguration.Specification.SPACE); + node.setAttribute(ConfluenceConfiguration.Specification.SPACE_KEY_ATTRIBUTE, spaceSpec); + spaces.addChild(spaces.getChildCount(), node); + } + } + + /* Delete security configuration */ + int i = 0; + while (i < ds.getChildCount()) { + final SpecificationNode sn = ds.getChild(i); + if (sn.getType().equals(ConfluenceConfiguration.Specification.SECURITY)) { + ds.removeChild(i); + } else { + i++; + } + } + + final SpecificationNode security = new SpecificationNode(ConfluenceConfiguration.Specification.SECURITY); + ds.addChild(ds.getChildCount(), security); + + final String activateSecurity = variableContext.getParameter(seqPrefix + ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY); + if (activateSecurity != null && !activateSecurity.isEmpty()) { + security.setAttribute(ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY, String.valueOf(activateSecurity)); + } + + /* Delete pages configuration */ + i = 0; + while (i < ds.getChildCount()) { + final SpecificationNode sn = ds.getChild(i); + if (sn.getType().equals(ConfluenceConfiguration.Specification.PAGES)) { + ds.removeChild(i); + } else { + i++; + } + } + + final SpecificationNode pages = new SpecificationNode(ConfluenceConfiguration.Specification.PAGES); + ds.addChild(ds.getChildCount(), pages); + + final String procAttachments = variableContext.getParameter(seqPrefix + ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY); + if (procAttachments != null && !procAttachments.isEmpty()) { + pages.setAttribute(ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY, String.valueOf(procAttachments)); + } + + final String pageType = variableContext.getParameter(seqPrefix + ConfluenceConfiguration.Specification.PAGETYPE); + if (pageType != null && !pageType.isEmpty()) { + pages.setAttribute(ConfluenceConfiguration.Specification.PAGETYPE, pageType); + } + + return null; + } + + /* + * (non-Javadoc) + * + * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# outputSpecificationBody + * (org.apache.manifoldcf.core.interfaces.IHTTPOutput, java.util.Locale, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, + * java.lang.String) + */ + @Override + public void outputSpecificationBody(final IHTTPOutput out, final Locale locale, final Specification ds, final int connectionSequenceNumber, final int actualSequenceNumber, final String tabName) + throws ManifoldCFException, IOException { + + // Output JIRAQuery tab + final Map<String, Object> paramMap = new HashMap<String, Object>(); + paramMap.put("TabName", tabName); + paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); + paramMap.put("SelectedNum", Integer.toString(actualSequenceNumber)); + + final ConfluenceSpecification cs = ConfluenceSpecification.from(ds); + + fillInConfSecuritySpecificationMap(paramMap, cs); + fillInConfSpacesSpecificationMap(paramMap, cs); + fillInConfPagesSpecificationMap(paramMap, cs); + + Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_FORWARD_SECURITY, paramMap); + + Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_FORWARD_SPACES, paramMap); + + Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_FORWARD_CONF_PAGES, paramMap); + } + + /* + * Header for the specification + * + * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# outputSpecificationHeader + * (org.apache.manifoldcf.core.interfaces.IHTTPOutput, java.util.Locale, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, + * java.util.List) + */ + @Override + public void outputSpecificationHeader(final IHTTPOutput out, final Locale locale, final Specification ds, final int connectionSequenceNumber, final List<String> tabsArray) + throws ManifoldCFException, IOException { + + tabsArray.add(Messages.getString(locale, CONF_SECURITY_TAB_PROPERTY)); + tabsArray.add(Messages.getString(locale, CONF_SPACES_TAB_PROPERTY)); + tabsArray.add(Messages.getString(locale, CONF_PAGES_TAB_PROPERTY)); + + final Map<String, Object> paramMap = new HashMap<String, Object>(); + paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); + + Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_HEADER_FORWARD, paramMap); + } + + /* + * Adding seed documents + * + * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# addSeedDocuments + * (org.apache.manifoldcf.crawler.interfaces.ISeedingActivity, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, long, long, + * int) + */ + @Override + public String addSeedDocuments(final ISeedingActivity activities, final Specification spec, final String lastSeedVersion, final long seedTime, final int jobMode) + throws ManifoldCFException, ServiceInterruption { + + if (!isConnected()) { + initConfluenceClient(); + } + + try { + + /* + * Not uses delta seeding because Confluence can't be queried using dates or in a ordered way, only start and limit which can cause problems + * if an already indexed document is deleted, because we will miss some to-be indexed docs due to the last start parameter stored in the + * last execution + */ + // if(lastSeedVersion != null && !lastSeedVersion.isEmpty()) { + // StringTokenizer tokenizer = new + // StringTokenizer(lastSeedVersion,"|"); + // + // lastStart = new Long(lastSeedVersion); + // } + + final ConfluenceSpecification confluenceSpecification = ConfluenceSpecification.from(spec); + List<String> spaceKeys = confluenceSpecification.getSpaces(); + final String pageType = confluenceSpecification.getPageType(); + + if (spaceKeys.isEmpty()) { + logger.info("No spaces configured. Processing all spaces"); + spaceKeys = getAllSpaceKeys(); + } + + for (final String space : spaceKeys) { + logger.info("Processing configured space {}", space); + addSeedDocumentsForSpace(space, Optional.<String>of(pageType), activities, confluenceSpecification, lastSeedVersion, seedTime, jobMode); + } + + return ""; + } catch (final Exception e) { + handleConfluenceDownException(e, "seeding"); + return null; + } + } + + private List<Page> getPageChilds(final String pageId) throws ManifoldCFException, ServiceInterruption { + long lastStart = 0; + final long defaultSize = 25; + final List<Page> pageChilds = new ArrayList<>(); + + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getPageChilds" })); + } + + try { + Boolean isLast = true; + do { + final ConfluenceResponse<Page> response = confluenceClient.getPageChilds((int) lastStart, (int) defaultSize, pageId); + + int count = 0; + for (final Page page : response.getResults()) { + pageChilds.add(page); + count++; + } + + lastStart += count; + isLast = response.isLast(); + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("New start {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getPageChilds" })); + } + } while (!isLast); + + } catch (final Exception e) { + handleConfluenceDownException(e, "seeding"); + } + return pageChilds; + + } + + private List<Restrictions> getPageReadRestrictions(final String pageId) throws ManifoldCFException, ServiceInterruption { + long lastStart = 0; + final long defaultSize = 200; + final List<Restrictions> restrictionsList = new ArrayList<Restrictions>(); + + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); + } + + try { + Boolean isLast = true; + do { + final ConfluenceRestrictionsResponse<Restrictions> response = confluenceClient.getPageReadRestrictions((int) lastStart, (int) defaultSize, pageId); + + if (response.getResult() != null) { + restrictionsList.add(response.getResult()); + } + + isLast = response.isLast(); + if (!isLast) { + lastStart += defaultSize; + } + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("New start {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); + } + } while (!isLast); + + } catch (final Exception e) { + handleConfluenceDownException(e, "seeding"); + } + return restrictionsList; + } + + private List<String> getAllSpaceKeys() throws ManifoldCFException, ServiceInterruption { + final List<String> spaceKeys = new ArrayList<String>(); + long lastStart = 0; + final long defaultSize = 25; + + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); + } + + try { + Boolean isLast = true; + do { + final ConfluenceResponse<Space> response = confluenceClient.getSpaces((int) lastStart, (int) defaultSize, Optional.<String>absent(), Optional.<String>absent()); + + int count = 0; + for (final Space space : response.getResults()) { + spaceKeys.add(space.getKey()); + count++; + } + + lastStart += count; + isLast = response.isLast(); + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("New start {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); + } + } while (!isLast); + } catch (final Exception e) { + handleConfluenceDownException(e, "seeding"); + } + return spaceKeys; + } + + /** + * <p> + * Add seed documents for a given optional space + * </p> + * + * @throws ServiceInterruption + * @throws ManifoldCFException + */ + private void addSeedDocumentsForSpace(final String space, final Optional<String> pageType, final ISeedingActivity activities, final ConfluenceSpecification confluenceSpec, + final String lastSeedVersion, final long seedTime, final int jobMode) throws ManifoldCFException, ServiceInterruption { + + long lastStart = 0; + final long defaultSize = 50; + + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + final String spaceDesc = "space with key " + space; + Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, spaceDesc })); + } + + try { + Boolean isLast = true; + do { + final ConfluenceResponse<Page> response = confluenceClient.getSpaceRootPages((int) lastStart, (int) defaultSize, space, pageType); +// final ConfluenceResponse<Page> response = confluenceClient.getPages( +// (int) lastStart, (int) defaultSize, space, pageType); + + int count = 0; + for (final Page page : response.getResults()) { + + activities.addSeedDocument(page.getId()); + if (confluenceSpec.isProcessAttachments()) { + processSeedAttachments(page, activities); + } + count++; + } + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("Fetched and added {0} seed documents", Locale.ROOT).format(new Object[] { new Integer(count) })); + } + + lastStart += count; + isLast = response.isLast(); + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("New start {0} and size {1}", Locale.ROOT).format(new Object[] { lastStart, defaultSize })); + } + } while (!isLast); + + } catch (final Exception e) { + handleConfluenceDownException(e, "seeding"); + } + + } + + /** + * <p> + * Process seed attachments for the given page + * </p> + * + * @param page + * @param activities + */ + private void processSeedAttachments(final Page page, final ISeedingActivity activities) throws ManifoldCFException, ServiceInterruption { + long lastStart = 0; + final long defaultSize = 50; + + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("Processing page {} attachments starting from {} and size {}", Locale.ROOT).format(new Object[] { page.getId(), lastStart, defaultSize })); + } + + try { + Boolean isLast = true; + do { + final ConfluenceResponse<Attachment> response = confluenceClient.getPageAttachments(page.getId(), (int) lastStart, (int) defaultSize); + + int count = 0; + for (final Page resultPage : response.getResults()) { + activities.addSeedDocument(ConfluenceUtil.generateRepositoryDocumentIdentifier(resultPage.getId(), page.getId())); + count++; + } + + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("Fetched and added {} seed document attachments for page {}", Locale.ROOT).format(new Object[] { new Integer(count), page.getId() })); + } + + lastStart += count; + isLast = response.isLast(); + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug(new MessageFormat("New start {0} and size {1}", Locale.ROOT).format(new Object[] { lastStart, defaultSize })); + } + } while (!isLast); + + } catch (final Exception e) { + handleConfluenceDownException(e, "seeding"); + } + } + + protected static void handleConfluenceDownException(final Exception e, final String context) throws ManifoldCFException, ServiceInterruption { + final long currentTime = System.currentTimeMillis(); + + // Server doesn't appear to by up. Try for a brief time then give up. + final String message = "Server appears down during " + context + ": " + e.getMessage(); + Logging.connectors.warn(message, e); + throw new ServiceInterruption(message, e, currentTime + interruptionRetryTime, -1L, 3, true); + } + + /** + * Handle page exception : retry 3rd times with a 5 minutes interval without aborting job in case of failure + * + * @param e + * @param context + * The error context (ex: 'page processing') + * @throws ManifoldCFException + * @throws ServiceInterruption + */ + protected static void handlePageException(final Exception e, final String context) throws ManifoldCFException, ServiceInterruption { + final long currentTime = System.currentTimeMillis(); + + // Server doesn't appear to by up. Try for a brief time then give up. + final String message = "Server appears down during " + context + ": " + e.getMessage(); + Logging.connectors.warn(message, e); + throw new ServiceInterruption(message, e, currentTime + interruptionRetryTime, -1L, 3, false); + } + + /* + * Process documents + * + * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# processDocuments(java.lang.String[], java.lang.String[], + * org.apache.manifoldcf.crawler.interfaces.IProcessActivity, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, boolean[]) + */ + @Override + public void processDocuments(final String[] documentIdentifiers, final IExistingVersions statuses, final Specification spec, final IProcessActivity activities, final int jobMode, + final boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption { + + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Process Confluence documents: Inside processDocuments"); + } + + final ConfluenceSpecification confluenceSpecification = ConfluenceSpecification.from(spec); + + final boolean activeSecurity = confluenceSpecification.isSecurityActive(); + + for (int i = 0; i < documentIdentifiers.length; i++) { + final String documentIdentifier = documentIdentifiers[i]; + String pageId = documentIdentifier; + final String version = statuses.getIndexedVersionString(documentIdentifier); + final List<String> parentRestrictions = new ArrayList<>(); + if (pageId.startsWith(CHILD_PREFIX)) { + final JSONParser parser = new JSONParser(); + try { + final JSONObject child = (JSONObject) parser.parse(new StringReader(pageId.substring(CHILD_PREFIX.length()))); + pageId = child.get("id").toString(); + final JSONArray arrParentRestrictions = (JSONArray) child.get("parentRestricions"); + arrParentRestrictions.forEach(pr -> parentRestrictions.add(pr.toString())); + parentRestrictions.sort(String::compareToIgnoreCase); + } catch (IOException | ParseException e) { + handleException(e); + } + } + + final long startTime = System.currentTimeMillis(); + long fileSize = 0L; + final String errorCode = "OK"; + final String errorDesc = StringUtils.EMPTY; + ProcessResult pResult = null; + final boolean doLog = true; + + try { + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence: Processing document identifier '" + pageId + "'"); + } + + /* Ensure Confluence client is connected */ + if (!isConnected()) { + initConfluenceClient(); + } + + if (ConfluenceUtil.isAttachment(pageId)) { + pResult = processPageAsAttachment(activeSecurity, documentIdentifier, parentRestrictions, pageId, version, activities, doLog); + } else { + pResult = processPage(activeSecurity, documentIdentifier, parentRestrictions, pageId, version, activities, doLog, Maps.<String, String>newHashMap()); + } + } catch (final IOException ioe) { + handleIOException(ioe); + } catch (final Exception e) { + handleException(e); + } + + finally { + if (doLog) { + if (pResult != null && pResult.errorCode != null && !pResult.errorCode.isEmpty()) { + activities.recordActivity(new Long(startTime), ACTIVITY_READ, pResult.fileSize, pageId, pResult.errorCode, pResult.errorDescription, null); + } else { + if (pResult != null) { + fileSize = pResult.fileSize; + } + activities.recordActivity(new Long(startTime), ACTIVITY_READ, fileSize, pageId, errorCode, errorDesc, null); + } + } + } + + } + } + + /** + * <p> + * Process the specific page + * </p> + * + * @param activeSecurity + * Security enabled/disabled + * @param documentIdentifier + * The original documentIdentifier + * @param parentRestrictions + * The list of parent restrictions + * @param pageId + * The pageId being an attachment + * @param version + * The version of the page + * @param activities + * @param doLog + * + * @throws ManifoldCFException + * @throws IOException + * @throws ServiceInterruption + */ + private ProcessResult processPage(final boolean activeSecurity, final String documentIdentifier, final List<String> parentRestrictions, final String pageId, final String version, + final IProcessActivity activities, final boolean doLog, final Map<String, String> extraProperties) throws ManifoldCFException, ServiceInterruption, IOException { + Page page = new Page(); + try { + page = confluenceClient.getPage(pageId); + } catch (final Exception e) { + handlePageException(e, "page processing"); + } + return processPageInternal(activeSecurity, parentRestrictions, page, documentIdentifier, version, activities, doLog, extraProperties); + + } + + /** + * <p> + * Process the specific attachment + * </p> + * + * @param activeSecurity + * Security enabled/disabled + * @param documentIdentifier + * The original documentIdentifier + * @param parentRestrictions + * The list of parent restrictions + * @param pageId + * The pageId being an attachment + * @param version + * The version of the page + * @param activities + * @param doLog + * @throws IOException + * @throws ServiceInterruption + */ + private ProcessResult processPageAsAttachment(final boolean activeSecurity, final String documentIdentifier, final List<String> parentRestrictions, final String pageId, final String version, + final IProcessActivity activities, final boolean doLog) throws ManifoldCFException, ServiceInterruption, IOException { + + final String[] ids = ConfluenceUtil.getAttachmentAndPageId(pageId); + Attachment attachment = new Attachment(); + try { + attachment = confluenceClient.getAttachment(ids[0]); + } catch (final Exception e) { + handlePageException(e, "attachment processing"); + } + final Map<String, String> extraProperties = Maps.newHashMap(); + extraProperties.put("attachedBy", ids[1]); + return processPageInternal(activeSecurity, parentRestrictions, attachment, documentIdentifier, version, activities, doLog, extraProperties); + + } + + /** + * <p> + * Process the specific page + * </p> + * + * @param activeSecurity + * Security enabled/disabled + * @param parentRestrictions + * The list of parent restrictions + * @param page + * The page to process + * @param manifoldDocumentIdentifier + * @param version + * The version of the page + * @param activities + * @param doLog + * + * @throws ManifoldCFException + * @throws IOException + * @throws ServiceInterruption + */ + private ProcessResult processPageInternal(final boolean activeSecurity, final List<String> parentRestrictions, final Page page, final String manifoldDocumentIdentifier, final String version, + final IProcessActivity activities, final boolean doLog, final Map<String, String> extraProperties) throws ManifoldCFException, ServiceInterruption, IOException { + + /* Remove page if it has no content */ + /* + * Page does not have content if there was an error trying to get the page + */ + if (!page.hasContent()) { + activities.deleteDocument(manifoldDocumentIdentifier); + return new ProcessResult(page.getLength(), "DELETED", ""); + } + if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { + Logging.connectors.debug("Confluence: This content exists: " + page.getId()); + } + + final RepositoryDocument rd = new RepositoryDocument(); + final Date createdDate = page.getCreatedDate(); + final Date lastModified = page.getLastModifiedDate(); + final DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM, Locale.ROOT); + + /* + * Retain page in Manifold because it has not changed from last time This is needed to keep the identifier in Manifold data, because by + * default if a document is not retained nor ingested, it will be deleted by the framework + */ + final StringBuilder versionBuilder = new StringBuilder(); + versionBuilder.append(df.format(lastModified)); + final List<String> pageRestrictions = new ArrayList<String>(); + if (activeSecurity) { + final List<Restrictions> restrictions = getPageReadRestrictions(page.getId()); + for (final Restrictions res : restrictions) { + final ReadRestrictions rr = res.getReadRestrictions(); + rr.getUsers().forEach(user -> { + pageRestrictions.add("user-" + user.getUserKey()); + }); + rr.getGroups().forEach(group -> { + pageRestrictions.add("group-" + group.getName()); + }); + } + } + // Order the page restrictions alphabetically so the version will be always the same in case the same restrictions between two crawls are + // not retrieved in the same order + pageRestrictions.sort(String::compareToIgnoreCase); + versionBuilder.append("+"); + packList(versionBuilder, pageRestrictions, '+'); + versionBuilder.append("+"); + packList(versionBuilder, parentRestrictions, '+'); + final String lastVersion = versionBuilder.toString(); + + // Get and reference page direct childs if any + if (page.getType() == PageType.PAGE) { + final List<Page> pageChilds = getPageChilds(page.getId()); + for (final Page childPage : pageChilds) { + final JSONObject child = new JSONObject(); + child.put("id", childPage.getId()); + final List<String> childParentRestrictions = new ArrayList<>(); + // MCF only manage one level of parent ACLs, so if the current page has restrictions they must replace the current parent restrictions for + // its child pages + if (activeSecurity) { + if (pageRestrictions.isEmpty()) { + childParentRestrictions.addAll(parentRestrictions); + } else { + childParentRestrictions.addAll(pageRestrictions); + } + } + childParentRestrictions.sort(String::compareToIgnoreCase); + child.put("parentRestricions", childParentRestrictions); + activities.addDocumentReference(CHILD_PREFIX + child.toJSONString()); + } + } + + if (!activities.checkDocumentNeedsReindexing(manifoldDocumentIdentifier, lastVersion)) { + return new ProcessResult(page.getLength(), "RETAINED", ""); + } + + if (!activities.checkLengthIndexable(page.getLength())) { + activities.noDocument(manifoldDocumentIdentifier, lastVersion); + final String errorCode = IProcessActivity.EXCLUDED_LENGTH; + final String errorDesc = "Excluding document because of length (" + page.getLength() + ")"; + return new ProcessResult(page.getLength(), errorCode, errorDesc); + } + + if (!activities.checkMimeTypeIndexable(page.getMediaType())) { + activities.noDocument(manifoldDocumentIdentifier, lastVersion); + final String errorCode = IProcessActivity.EXCLUDED_MIMETYPE; + final String errorDesc = "Excluding document because of mime type (" + page.getMediaType() + ")"; + return new ProcessResult(page.getLength(), errorCode, errorDesc); + } + + if (!activities.checkDateIndexable(lastModified)) { + activities.noDocument(manifoldDocumentIdentifier, lastVersion); + final String errorCode = IProcessActivity.EXCLUDED_DATE; + final String errorDesc = "Excluding document because of date (" + lastModified + ")"; + return new ProcessResult(page.getLength(), errorCode, errorDesc); + } + + if (!activities.checkURLIndexable(page.getWebUrl())) { + activities.noDocument(manifoldDocumentIdentifier, lastVersion); + final String errorCode = IProcessActivity.EXCLUDED_URL; + final String errorDesc = "Excluding document because of URL ('" + page.getWebUrl() + "')"; + return new ProcessResult(page.getLength(), errorCode, errorDesc); + } + + /* Add repository document information */ + rd.setMimeType(page.getMediaType()); + if (createdDate != null) { + rd.setCreatedDate(createdDate); + } + if (lastModified != null) { + rd.setModifiedDate(lastModified); + } + rd.setIndexingDate(new Date()); + + /* Adding Page Metadata */ + final Map<String, Object> pageMetadata = page.getMetadataAsMap(); + for (final Entry<String, Object> entry : pageMetadata.entrySet()) { + if (entry.getValue() instanceof List) { + final List<?> list = (List<?>) entry.getValue(); + rd.addField(entry.getKey(), list.toArray(new String[list.size()])); + } else if (entry.getValue() != null) { + final String key = entry.getKey(); + final String value = entry.getValue().toString(); + rd.addField(key, value); + if (key.toLowerCase().contentEquals("title")) { + rd.addField("stream_name", value); + } + } + } + rd.addField("source", "confluence"); + + /* Adding extra properties */ + for (final Entry<String, String> entry : extraProperties.entrySet()) { + rd.addField(entry.getKey(), entry.getValue()); + } + + final String documentURI = page.getWebUrl(); + + /* Set repository document ACLs */ + if (activeSecurity) { + + rd.setSecurity(RepositoryDocument.SECURITY_TYPE_SHARE, new String[] { "space-" + page.getSpace() }, new String[] { defaultAuthorityDenyToken }); + + if (parentRestrictions.size() > 0) { + rd.setSecurity(RepositoryDocument.SECURITY_TYPE_PARENT, parentRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken }); + } + + if (pageRestrictions.size() > 0) { + rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, pageRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken }); + } + + } + + rd.setBinary(page.getContentStream(), page.getLength()); + rd.addField("size", String.valueOf(page.getLength())); + + /* Ingest document */ + activities.ingestDocumentWithException(manifoldDocumentIdentifier, lastVersion, documentURI, rd); + + return new ProcessResult(page.getLength(), null, null); + } + + /** + * <p> + * Handles IO Exception to manage whether the exception is an interruption so that the process needs to be executed again later on + * </p> + * + * @param e + * The Exception + * @throws ManifoldCFException + * @throws ServiceInterruption + */ + private static void handleIOException(final IOException e) throws ManifoldCFException, ServiceInterruption { + if (!(e instanceof java.net.SocketTimeoutException) && e instanceof InterruptedIOException) { + throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); + } + Logging.connectors.warn("IO exception: " + e.getMessage(), e); + final long currentTime = System.currentTimeMillis(); + throw new ServiceInterruption("IO exception: " + e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false); + } + + /** + * <p> + * Handles general exceptions + * </p> + * + * @param e + * The Exception + * @throws ServiceInterruption + * @throws ManifoldCFException + */ + private static void handleException(final Exception e) throws ServiceInterruption, ManifoldCFException { + if (!(e instanceof ServiceInterruption)) { + Logging.connectors.warn("Exception: " + e.getMessage(), e); + throw new ManifoldCFException("Exception: " + e.getMessage(), e, ManifoldCFException.REPOSITORY_CONNECTION_ERROR); + } else { + throw (ServiceInterruption) e; + } + } + + private class ProcessResult { + private final long fileSize; + private final String errorCode; + private final String errorDescription; + + private ProcessResult(final long fileSize, final String errorCode, final String errorDescription) { + this.fileSize = fileSize; + this.errorCode = errorCode; + this.errorDescription = errorDescription; + } + } + + /** + * <p> + * Internal private class used to parse and keep the specification configuration in object format + * </p> + * + * @author Antonio David Perez Morales <[email protected]> + * + */ + private static class ConfluenceSpecification { + private List<String> spaces; + private Boolean activateSecurity = true; + private Boolean processAttachments = false; + private String pageType = null; + + public Boolean isSecurityActive() { + return this.activateSecurity; + } + + /** + * <p> + * Returns if attachments should be processed + * </p> + * + * @return a {@code Boolean} indicating if the attachments should be processed or not + */ + public Boolean isProcessAttachments() { + return this.processAttachments; + } + + /** + * <p> + * Returns the list of configured spaces or an empty list meaning that all spaces should be processed + * </p> + * + * @return a {@code List<String>} of configured spaces + */ + public List<String> getSpaces() { + return this.spaces; + } + + /** + * <p> + * Returns configured page type + * </p> + * + * @return a {@code String} of configured page type + */ + public String getPageType() { + if (this.pageType == null || this.pageType.isEmpty()) { + return "page"; + } + + return this.pageType; + } + + public static ConfluenceSpecification from(final Specification spec) { + final ConfluenceSpecification cs = new ConfluenceSpecification(); + cs.spaces = Lists.newArrayList(); + for (int i = 0, len = spec.getChildCount(); i < len; i++) { + final SpecificationNode sn = spec.getChild(i); + if (sn.getType().equals(ConfluenceConfiguration.Specification.SPACES)) { + for (int j = 0, sLen = sn.getChildCount(); j < sLen; j++) { + final SpecificationNode specNode = sn.getChild(j); + if (specNode.getType().equals(ConfluenceConfiguration.Specification.SPACE)) { + cs.spaces.add(specNode.getAttributeValue(ConfluenceConfiguration.Specification.SPACE_KEY_ATTRIBUTE)); + + } + } + + } else if (sn.getType().equals(ConfluenceConfiguration.Specification.PAGES)) { + final String s = sn.getAttributeValue(ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY); + cs.processAttachments = Boolean.valueOf(s); + cs.pageType = sn.getAttributeValue(ConfluenceConfiguration.Specification.PAGETYPE); + } else if (sn.getType().equals(ConfluenceConfiguration.Specification.SECURITY)) { + final String s = sn.getAttributeValue(ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY); + cs.activateSecurity = Boolean.valueOf(s); + } + } + + return cs; + + } + } + +}
Added: manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/Messages.java URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/Messages.java?rev=1874903&view=auto ============================================================================== --- manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/Messages.java (added) +++ manifoldcf/branches/CONNECTORS-1637/connectors/confluence-v6/src/main/java/org/apache/manifoldcf/crawler/connectors/confluence/v6/Messages.java Fri Mar 6 15:29:45 2020 @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.manifoldcf.crawler.connectors.confluence.v6; + +import java.util.Locale; +import java.util.Map; + +import org.apache.manifoldcf.core.interfaces.IHTTPOutput; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; + +/** + * <p> + * Messages class + * </p> + * <p> + * Class used to render templates along with specific values + * </p> + * <p> + * Also used to get messages for specific Locales + * </p> + * + * @author Julien Massiera & Antonio David Perez Morales + * + */ +public class Messages extends org.apache.manifoldcf.ui.i18n.Messages { + public static final String DEFAULT_BUNDLE_NAME = "org.apache.manifoldcf.crawler.connectors.confluence.v6.common"; + public static final String DEFAULT_PATH_NAME = "org.apache.manifoldcf.crawler.connectors.confluence.v6"; + + /** + * Constructor - do no instantiate + */ + protected Messages() { + } + + public static String getString(final Locale locale, final String messageKey) { + return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getAttributeString(final Locale locale, final String messageKey) { + return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getBodyString(final Locale locale, final String messageKey) { + return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getAttributeJavascriptString(final Locale locale, final String messageKey) { + return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getBodyJavascriptString(final Locale locale, final String messageKey) { + return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getString(final Locale locale, final String messageKey, final Object[] args) { + return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getAttributeString(final Locale locale, final String messageKey, final Object[] args) { + return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getBodyString(final Locale locale, final String messageKey, final Object[] args) { + return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getAttributeJavascriptString(final Locale locale, final String messageKey, final Object[] args) { + return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getBodyJavascriptString(final Locale locale, final String messageKey, final Object[] args) { + return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + // More general methods which allow bundlenames and class loaders to be specified. + + public static String getString(final String bundleName, final Locale locale, final String messageKey, final Object[] args) { + return getString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getAttributeString(final String bundleName, final Locale locale, final String messageKey, final Object[] args) { + return getAttributeString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getBodyString(final String bundleName, final Locale locale, final String messageKey, final Object[] args) { + return getBodyString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getAttributeJavascriptString(final String bundleName, final Locale locale, final String messageKey, final Object[] args) { + return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getBodyJavascriptString(final String bundleName, final Locale locale, final String messageKey, final Object[] args) { + return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args); + } + + // Resource output + + public static void outputResource(final IHTTPOutput output, final Locale locale, final String resourceKey, final Map<String, String> substitutionParameters, final boolean mapToUpperCase) + throws ManifoldCFException { + outputResource(output, Messages.class, DEFAULT_PATH_NAME, locale, resourceKey, substitutionParameters, mapToUpperCase); + } + + public static void outputResourceWithVelocity(final IHTTPOutput output, final Locale locale, final String resourceKey, final Map<String, String> substitutionParameters, final boolean mapToUpperCase) + throws ManifoldCFException { + outputResourceWithVelocity(output, Messages.class, DEFAULT_BUNDLE_NAME, DEFAULT_PATH_NAME, locale, resourceKey, substitutionParameters, mapToUpperCase); + } + + public static void outputResourceWithVelocity(final IHTTPOutput output, final Locale locale, final String resourceKey, final Map<String, Object> contextObjects) throws ManifoldCFException { + outputResourceWithVelocity(output, Messages.class, DEFAULT_BUNDLE_NAME, DEFAULT_PATH_NAME, locale, resourceKey, contextObjects); + } + +}
