Author: ssmaeklu Date: 2008-01-21 12:26:22 +0100 (Mon, 21 Jan 2008) New Revision: 5996
Added: trunk/sitemap-generator/ trunk/sitemap-generator/src/ trunk/sitemap-generator/src/main/ trunk/sitemap-generator/src/main/java/ trunk/sitemap-generator/src/main/java/no/ trunk/sitemap-generator/src/main/java/no/sesat/ trunk/sitemap-generator/src/main/java/no/sesat/sitemap/ trunk/sitemap-generator/src/main/resources/ Removed: trunk/sitemap-generator/src/ trunk/sitemap-generator/src/main/ trunk/sitemap-generator/src/main/java/ trunk/sitemap-generator/src/main/java/no/ trunk/sitemap-generator/src/main/java/no/sesat/ trunk/sitemap-generator/src/main/java/no/sesat/sitemap/ trunk/sitemap-generator/src/main/resources/ Modified: trunk/ trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java trunk/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java trunk/query-transform-config-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformerConfig.java trunk/query-transform-control-spi/pom.xml trunk/query-transform-control-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformer.java trunk/search-command-control-spi/pom.xml trunk/search-servlet-handler-spi/pom.xml trunk/sitemap-generator/src/main/java/no/sesat/sitemap/SitemapGenerator.java trunk/skinresourcefeed/src/main/java/no/sesat/commons/resourcefeed/ResourceServlet.java trunk/war/src/main/java/no/sesat/search/http/servlet/SearchServlet.java Log: Merged revisions 5931,5942,5945-5946,5949-5952,5954-5955,5964 via svnmerge from http://sesat.no/svn/sesat-kernel/branches/2.15 ........ r5931 | ssthkjer | 2007-11-26 11:23:50 +0100 (Mon, 26 Nov 2007) | 1 line search-3784 - changed label on radiobuttons TOO EARLY ........ r5942 | ssenrogn | 2007-11-28 10:31:41 +0100 (Wed, 28 Nov 2007) | 1 line Added Eclipse files to ignore list. ........ r5945 | ssanjamt | 2007-12-05 10:31:32 +0100 (Wed, 05 Dec 2007) | 1 line SEARCH-3888 Support for iso-8859-1 html output for external ad network partners ........ r5946 | ssanjamt | 2007-12-05 14:31:56 +0100 (Wed, 05 Dec 2007) | 1 line SEARCH-3784 Fjerne faner p?\195?\165 netts?\195?\184k - - Vi g?\195?\165r for radiobuttons ........ r5949 | ssmaeklu | 2007-12-13 13:55:07 +0100 (Thu, 13 Dec 2007) | 1 line Import of SitemapGenerator into SESAT. ........ r5950 | ssmaeklu | 2007-12-18 15:26:20 +0100 (Tue, 18 Dec 2007) | 1 line Added debugging code. SEARCH-3742 ........ r5951 | ssmaeklu | 2007-12-18 18:00:18 +0100 (Tue, 18 Dec 2007) | 1 line Use level WARN ........ r5952 | ssmaeklu | 2007-12-20 16:59:41 +0100 (Thu, 20 Dec 2007) | 1 line New version of newsadmin-service to support multiple aggregators ........ r5954 | ssmaeklu | 2008-01-08 13:52:27 +0100 (Tue, 08 Jan 2008) | 1 line Remove operator characters from query before sending it off to the list server. ........ r5955 | ssmaeklu | 2008-01-08 15:59:03 +0100 (Tue, 08 Jan 2008) | 1 line SEARCH-3742. Remove operator characters from query before sending it off to the fast list server. Also happens to fix SEARCH-3883 ........ r5964 | ssenrogn | 2008-01-11 11:37:52 +0100 (Fri, 11 Jan 2008) | 1 line Added number tool to the velocity context. ........ Property changes on: trunk ___________________________________________________________________ Name: svnmerge-integrated - /branches/2.10:1-4690,4692-4745 /branches/2.11:1-4933 /branches/2.12:1-5051,5053-5106 /branches/2.13:1-5378 /branches/2.14:1-5508 /branches/2.15:1-5923,5932-5933 /branches/2.6:1-3877 /branches/2.7:1-4160 /branches/2.8:1-4446 /branches/2.9:1-4626 /branches/MAP_SEARCHv2:1-4544 + /branches/2.10:1-4690,4692-4745 /branches/2.11:1-4933 /branches/2.12:1-5051,5053-5106 /branches/2.13:1-5378 /branches/2.14:1-5508 /branches/2.15:1-5995 /branches/2.6:1-3877 /branches/2.7:1-4160 /branches/2.8:1-4446 /branches/2.9:1-4626 /branches/MAP_SEARCHv2:1-4544 Modified: trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java =================================================================== --- trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java 2008-01-21 11:26:22 UTC (rev 5996) @@ -67,7 +67,9 @@ {'\u00f7', '\u00f7'}, {'\u2010', '\u2015'} }; - + + char[] OPERATOR_CHARACTERS = {'*', '-', '+', '(', ')'}; + /** The Context an QueryParser implementation needs to work off. * The QueryParser is not responsible for * - holding the user's orginal inputted query string, Modified: trunk/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java =================================================================== --- trunk/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java 2008-01-21 11:26:22 UTC (rev 5996) @@ -50,6 +50,7 @@ import org.apache.log4j.Logger; +import org.apache.commons.lang.StringUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -95,6 +96,7 @@ private static final int CACHE_CAPACITY = 100; // smaller than usual as each entry can contain up to 600 values! private static final String SKIP_REGEX; + private static final String OPERATOR_REGEX; // Attributes ---------------------------------------------------- @@ -116,6 +118,18 @@ builder.setLength(builder.length() - 1); // our skip regular expression SKIP_REGEX = '(' + builder.toString() + ')'; + + final StringBuilder operatorRegexpBuilder = new StringBuilder(); + + operatorRegexpBuilder.append("["); + + for (char c : QueryParser.OPERATOR_CHARACTERS) { + operatorRegexpBuilder.append('\\').append(c); + } + + operatorRegexpBuilder.append("]"); + + OPERATOR_REGEX = operatorRegexpBuilder.toString(); } // Constructors ------------------------------------------------- @@ -138,7 +152,13 @@ httpClient = HTTPClient.instance(host, port); init(); - analysisResult = queryFast(context.getQueryString()); + + // Remove whitespace (except space itself) and operator characters. + analysisResult = queryFast(context.getQueryString() + .replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces. + .replaceAll(SKIP_REGEX, "") + .replaceAll(OPERATOR_REGEX, "") + .replaceAll("xxKEEPWSxx", " ")); // Hack to keep spaces. } // Public -------------------------------------------------------- @@ -172,7 +192,8 @@ } else { // HACK since DefaultOperatorClause wraps its children in parenthesis - final String hackTerm = term.replaceAll("\\(|\\)",""); + // Also remove any operator characters. (SEARCH-3883) + final String hackTerm = term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, ""); for (TokenMatch occurance : analysisResult.get(listname)) { @@ -394,9 +415,10 @@ final String expr = "\\b" + match + "\\b"; final Pattern pattern = Pattern.compile(expr, RegExpEvaluatorFactory.REG_EXP_OPTIONS); + final String qNew = query.replaceAll("\\b" + SKIP_REGEX + "+\\b", " "); final Matcher m = pattern.matcher( // remove words made solely of characters that the parser considers whitespace - query.replaceAll("\\b" + SKIP_REGEX + "+\\b", " ")); + qNew); while (m.find()) { @@ -407,6 +429,10 @@ } result.get(name).add(tknMatch); + + if (result.get(name).size() % 100 == 0) { + LOG.warn("Pattern: " + pattern.pattern() + " name: " + name + " query: " + query + " match: " + match + " query2: " + qNew); + } } } Modified: trunk/query-transform-config-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformerConfig.java =================================================================== --- trunk/query-transform-config-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformerConfig.java 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/query-transform-config-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformerConfig.java 2008-01-21 11:26:22 UTC (rev 5996) @@ -45,19 +45,27 @@ private static final String UNCLUSTERED_DELAY = "unclustered-delay"; private static final String UNCLUSTERED_DELAY_IN_MINUTES = "unclustered-delay-in-minutes"; private static final String TIME_ZONE = "time-zone"; - + private static final String AGGREGATOR_ID = "aggregator-id"; + private static final String DEFAULT_CONVERT_ELEMENT = "default-convert"; private String timeZone = "UTC"; private String queryType; private String queryParameter; private String typeParameter; private String defaultType; + private String aggregatorIdStr; + /* + * NO->aggregator_id=1; + * SE->aggregator_id=2; + */ + private int aggregatorId=1; private boolean unclusteredDelayFilter = false; private int unclusteredDelayInMinutes = 10; private Map<String, String[]> typeConversions; - - - /** + + + + /** * @return */ public String getQueryType() { @@ -79,8 +87,14 @@ public String getDefaultType() { return defaultType; } + + public int getAggregatorId() { + + return aggregatorId; + } + public boolean isUnclusteredDelayFilter() { return unclusteredDelayFilter; } @@ -99,7 +113,13 @@ @Override public NewsCaseQueryTransformerConfig readQueryTransformer(final Element element) { - queryType = element.getAttribute(QUERY_TYPE); + + aggregatorIdStr = element.getAttribute(AGGREGATOR_ID); + if (aggregatorIdStr != null && aggregatorIdStr.length() > 0) { + aggregatorId = Integer.parseInt(aggregatorIdStr); + } + + queryType = element.getAttribute(QUERY_TYPE); if (element.getAttribute(QUERY_PARAMETER) != null && element.getAttribute(QUERY_PARAMETER).length() > 0) { queryParameter = element.getAttribute(QUERY_PARAMETER); } @@ -136,4 +156,7 @@ return this; } + + + } Modified: trunk/query-transform-control-spi/pom.xml =================================================================== --- trunk/query-transform-control-spi/pom.xml 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/query-transform-control-spi/pom.xml 2008-01-21 11:26:22 UTC (rev 5996) @@ -85,8 +85,8 @@ </dependency> <dependency> - <groupId>schibstedsok.newsadmin</groupId> - <artifactId>ha-services</artifactId> + <groupId>schibstedsok</groupId> + <artifactId>newsadmin-services</artifactId> </dependency> <!-- Testing --> Modified: trunk/query-transform-control-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformer.java =================================================================== --- trunk/query-transform-control-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformer.java 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/query-transform-control-spi/src/main/java/no/sesat/search/query/transform/NewsCaseQueryTransformer.java 2008-01-21 11:26:22 UTC (rev 5996) @@ -64,7 +64,7 @@ } LOG.debug("Original query is: '" + queryString + "'"); if (queryString != null && queryString.length() > 0) { - String transformedQuery = dataAccess.getQuery(queryString, config.getQueryType()); + String transformedQuery = dataAccess.getQuery(queryString, config.getQueryType(),config.getAggregatorId()); if (transformedQuery == null) { transformedQuery = defaultTransform(queryString); } @@ -163,12 +163,12 @@ * @param queryType * @return */ - public String getQuery(String newsCaseName, String queryType) { + public String getQuery(String newsCaseName, String queryType, int aggrId) { try { LOG.debug("Looking up query for: " + newsCaseName); final NewsCaseFacadeInterface newsCaseFacade = lookupDataService(); if (newsCaseFacade != null) { - String newsQuery = newsCaseFacade.searchForQuery(newsCaseName, queryType); + String newsQuery = newsCaseFacade.searchForQuery(newsCaseName, queryType, aggrId); if (newsQuery != null) { return newsQuery; } Modified: trunk/search-command-control-spi/pom.xml =================================================================== --- trunk/search-command-control-spi/pom.xml 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/search-command-control-spi/pom.xml 2008-01-21 11:26:22 UTC (rev 5996) @@ -100,8 +100,8 @@ </dependency> <dependency> - <groupId>schibstedsok.newsadmin</groupId> - <artifactId>ha-services</artifactId> + <groupId>schibstedsok</groupId> + <artifactId>newsadmin-services</artifactId> </dependency> <!-- Testing --> Modified: trunk/search-servlet-handler-spi/pom.xml =================================================================== --- trunk/search-servlet-handler-spi/pom.xml 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/search-servlet-handler-spi/pom.xml 2008-01-21 11:26:22 UTC (rev 5996) @@ -85,8 +85,8 @@ </dependency> <dependency> - <groupId>schibstedsok.newsadmin</groupId> - <artifactId>ha-services</artifactId> + <groupId>schibstedsok</groupId> + <artifactId>newsadmin-services</artifactId> </dependency> <!-- Testing --> Copied: trunk/sitemap-generator (from rev 5964, branches/2.15/sitemap-generator) Copied: trunk/sitemap-generator/src (from rev 5964, branches/2.15/sitemap-generator/src) Copied: trunk/sitemap-generator/src/main (from rev 5964, branches/2.15/sitemap-generator/src/main) Copied: trunk/sitemap-generator/src/main/java (from rev 5964, branches/2.15/sitemap-generator/src/main/java) Copied: trunk/sitemap-generator/src/main/java/no (from rev 5964, branches/2.15/sitemap-generator/src/main/java/no) Copied: trunk/sitemap-generator/src/main/java/no/sesat (from rev 5964, branches/2.15/sitemap-generator/src/main/java/no/sesat) Copied: trunk/sitemap-generator/src/main/java/no/sesat/sitemap (from rev 5964, branches/2.15/sitemap-generator/src/main/java/no/sesat/sitemap) Modified: trunk/sitemap-generator/src/main/java/no/sesat/sitemap/SitemapGenerator.java =================================================================== Copied: trunk/sitemap-generator/src/main/resources (from rev 5964, branches/2.15/sitemap-generator/src/main/resources) Modified: trunk/skinresourcefeed/src/main/java/no/sesat/commons/resourcefeed/ResourceServlet.java =================================================================== --- trunk/skinresourcefeed/src/main/java/no/sesat/commons/resourcefeed/ResourceServlet.java 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/skinresourcefeed/src/main/java/no/sesat/commons/resourcefeed/ResourceServlet.java 2008-01-21 11:26:22 UTC (rev 5996) @@ -324,11 +324,14 @@ // Remove path, site name and version suffix. final String jarName = path .substring(path.lastIndexOf('/') + 1) - .replaceAll("-(\\d+\\.?)+(-SNAPSHOT)?(-.*)?\\.jar$", "") + .replaceAll("-(\\d+\\.?)+(-SNAPSHOT).*\\.jar", "") .replaceAll("^([\\p{Alnum}]+\\.?)+-", ""); + - LOG.debug("Checking against " + jarName); - + if (LOG.isDebugEnabled()) { + LOG.debug("Checking against " + jarName); + } + if (jarName.equals(baseName)) { LOG.warn("Loading jarfile " + path); return servletConfig.getServletContext().getResource(path).openConnection().getInputStream(); Modified: trunk/war/src/main/java/no/sesat/search/http/servlet/SearchServlet.java =================================================================== --- trunk/war/src/main/java/no/sesat/search/http/servlet/SearchServlet.java 2008-01-21 11:00:01 UTC (rev 5995) +++ trunk/war/src/main/java/no/sesat/search/http/servlet/SearchServlet.java 2008-01-21 11:26:22 UTC (rev 5996) @@ -327,6 +327,9 @@ final String charset = "utf-8"; response.setCharacterEncoding(charset); response.setContentType("text/xml; charset=" + charset); + } else if (request.getParameter("encoding") != null && request.getParameter("encoding").equals("iso-8859-1")){ + response.setContentType("text/html; charset=iso-8859-1"); // for external javascript document.write(), where server uses iso encoding + response.setCharacterEncoding("iso-8859-1"); } else { final String charset = "utf-8"; response.setContentType("text/html; charset=" + charset); _______________________________________________ Kernel-commits mailing list [email protected] http://sesat.no/mailman/listinfo/kernel-commits
