[
https://issues.apache.org/jira/browse/SDAP-66?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16472890#comment-16472890
]
ASF GitHub Bot commented on SDAP-66:
------------------------------------
lewismc closed pull request #14: SDAP-66 Build harvester for EONET EventsEB
URL: https://github.com/apache/incubator-sdap-mudrod/pull/14
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/core/src/main/java/org/apache/sdap/mudrod/driver/ESDriver.java
b/core/src/main/java/org/apache/sdap/mudrod/driver/ESDriver.java
index 6a751f2..6d675fb 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/driver/ESDriver.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/driver/ESDriver.java
@@ -128,7 +128,7 @@ public void afterBulk(long executionId, BulkRequest
request, Throwable failure)
public void destroyBulkProcessor() {
try {
- getBulkProcessor().awaitClose(20, TimeUnit.MINUTES);
+ getBulkProcessor().awaitClose(10, TimeUnit.MINUTES);
setBulkProcessor(null);
refreshIndex();
} catch (InterruptedException e) {
diff --git a/core/src/main/java/org/apache/sdap/mudrod/main/MudrodEngine.java
b/core/src/main/java/org/apache/sdap/mudrod/main/MudrodEngine.java
index 6784f04..80b0856 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/main/MudrodEngine.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/main/MudrodEngine.java
@@ -30,10 +30,6 @@
import org.apache.sdap.mudrod.driver.ESDriver;
import org.apache.sdap.mudrod.driver.SparkDriver;
import org.apache.sdap.mudrod.integration.LinkageIntegration;
-import org.jdom2.Document;
-import org.jdom2.Element;
-import org.jdom2.JDOMException;
-import org.jdom2.input.SAXBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,7 +43,6 @@
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
-import java.util.List;
import java.util.Properties;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@@ -183,7 +178,8 @@ private String decompressSVMWithSGDModel(String
archiveName) throws IOException
FileUtils.copyURLToFile(scmArchive, archiveFile);
// Decompress archive
- int BUFFER_SIZE = 512000;
+ int bufferSize = 512000;
+ @SuppressWarnings("resource")
ZipInputStream zipIn = new ZipInputStream(new
FileInputStream(archiveFile));
ZipEntry entry;
while ((entry = zipIn.getNextEntry()) != null) {
@@ -200,10 +196,10 @@ private String decompressSVMWithSGDModel(String
archiveName) throws IOException
LOG.error("Unable to create directory '{}', during extraction of
archive contents.", f.getParentFile().getAbsolutePath());
}
int count;
- byte data[] = new byte[BUFFER_SIZE];
+ byte data[] = new byte[bufferSize];
FileOutputStream fos = new FileOutputStream(new File(tempDir,
entry.getName()), false);
- try (BufferedOutputStream dest = new BufferedOutputStream(fos,
BUFFER_SIZE)) {
- while ((count = zipIn.read(data, 0, BUFFER_SIZE)) != -1) {
+ try (BufferedOutputStream dest = new BufferedOutputStream(fos,
bufferSize)) {
+ while ((count = zipIn.read(data, 0, bufferSize)) != -1) {
dest.write(data, 0, count);
}
}
diff --git a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java
b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java
index a4fe686..ce0183a 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java
@@ -43,8 +43,8 @@
*
*/
private static final long serialVersionUID = 1L;
- DecimalFormat NDForm = new DecimalFormat("#.##");
- final Integer MAX_CHAR = 700;
+ DecimalFormat ndForm = new DecimalFormat("#.##");
+ static final Integer MAX_CHAR = 700;
public Searcher(Properties props, ESDriver es, SparkDriver spark) {
super(props, es, spark);
@@ -92,52 +92,58 @@ public Double getPop(Double pop) {
*/
@SuppressWarnings("unchecked")
public List<SResult> searchByQuery(String index, String type, String query,
String queryOperator, String rankOption) {
- boolean exists =
es.getClient().admin().indices().prepareExists(index).execute().actionGet().isExists();
+ boolean exists = es.getClient()
+ .admin()
+ .indices()
+ .prepareExists(index)
+ .execute()
+ .actionGet()
+ .isExists();
if (!exists) {
return new ArrayList<>();
}
SortOrder order = null;
- String sortFiled = "";
+ String sortField;
switch (rankOption) {
case "Rank-AllTimePopularity":
- sortFiled = "Dataset-AllTimePopularity";
+ sortField = "Dataset-AllTimePopularity";
order = SortOrder.DESC;
break;
case "Rank-MonthlyPopularity":
- sortFiled = "Dataset-MonthlyPopularity";
+ sortField = "Dataset-MonthlyPopularity";
order = SortOrder.DESC;
break;
case "Rank-UserPopularity":
- sortFiled = "Dataset-UserPopularity";
+ sortField = "Dataset-UserPopularity";
order = SortOrder.DESC;
break;
case "Rank-LongName-Full":
- sortFiled = "Dataset-LongName.raw";
+ sortField = "Dataset-LongName";
order = SortOrder.ASC;
break;
case "Rank-ShortName-Full":
- sortFiled = "Dataset-ShortName.raw";
+ sortField = "Dataset-ShortName";
order = SortOrder.ASC;
break;
case "Rank-GridSpatialResolution":
- sortFiled = "Dataset-GridSpatialResolution";
+ sortField = "Dataset-GridSpatialResolution";
order = SortOrder.DESC;
break;
case "Rank-SatelliteSpatialResolution":
- sortFiled = "Dataset-SatelliteSpatialResolution";
+ sortField = "Dataset-SatelliteSpatialResolution";
order = SortOrder.DESC;
break;
case "Rank-StartTimeLong-Long":
- sortFiled = "DatasetCoverage-StartTimeLong-Long";
+ sortField = "DatasetCoverage-StartTimeLong-Long";
order = SortOrder.ASC;
break;
case "Rank-StopTimeLong-Long":
- sortFiled = "DatasetCoverage-StopTimeLong-Long";
+ sortField = "DatasetCoverage-StopTimeLong-Long";
order = SortOrder.DESC;
break;
default:
- sortFiled = "Dataset-ShortName.raw";
+ sortField = "Dataset-ShortName";
order = SortOrder.ASC;
break;
}
@@ -146,12 +152,18 @@ public Double getPop(Double pop) {
BoolQueryBuilder qb = dp.createSemQuery(query, 1.0, queryOperator);
List<SResult> resultList = new ArrayList<>();
- SearchRequestBuilder builder =
es.getClient().prepareSearch(index).setTypes(type).setQuery(qb).addSort(sortFiled,
order).setSize(500).setTrackScores(true);
+ SearchRequestBuilder builder = es.getClient()
+ .prepareSearch(index)
+ .setTypes(type)
+ .setQuery(qb)
+ .addSort(sortField, order)
+ .setSize(500)
+ .setTrackScores(true);
SearchResponse response = builder.execute().actionGet();
for (SearchHit hit : response.getHits().getHits()) {
Map<String, Object> result = hit.getSource();
- Double relevance = Double.valueOf(NDForm.format(hit.getScore()));
+ Double relevance = Double.valueOf(ndForm.format(hit.getScore()));
String shortName = (String) result.get("Dataset-ShortName");
String longName = (String) result.get("Dataset-LongName");
diff --git a/core/src/main/java/org/apache/sdap/mudrod/tools/EONETIngester.java
b/core/src/main/java/org/apache/sdap/mudrod/tools/EONETIngester.java
new file mode 100644
index 0000000..f3d36b3
--- /dev/null
+++ b/core/src/main/java/org/apache/sdap/mudrod/tools/EONETIngester.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sdap.mudrod.tools;
+
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+
+import java.io.IOException;
+import java.util.Properties;
+import java.util.concurrent.ExecutionException;
+
+import javax.net.ssl.SSLHandshakeException;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.util.EntityUtils;
+import org.apache.sdap.mudrod.discoveryengine.MudrodAbstract;
+import org.apache.sdap.mudrod.driver.ESDriver;
+import org.apache.sdap.mudrod.driver.SparkDriver;
+import org.apache.sdap.mudrod.main.MudrodConstants;
+import org.apache.sdap.mudrod.main.MudrodEngine;
+import org.elasticsearch.action.bulk.BulkProcessor;
+import org.elasticsearch.action.index.IndexRequest;
+import org.elasticsearch.action.update.UpdateRequest;
+import org.elasticsearch.action.update.UpdateResponse;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.get.GetResult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+
+/**
+ * Entry point providing ingestion logic of <a
href="https://eonet.sci.gsfc.nasa.gov/">
+ * Earth Observatory Natural Event Tracker (EONET)</a> data into
+ * the SDAP search server.
+ * @param <E>
+ */
+public class EONETIngester extends MudrodAbstract {
+
+ private static final long serialVersionUID = 1L;
+
+ private static final Logger LOG =
LoggerFactory.getLogger(EONETIngester.class);
+
+ private static final String[] EVENTS_URLS = {
+ "https://eonet.sci.gsfc.nasa.gov/api/v2.1/events?status=closed",
+ "https://eonet.sci.gsfc.nasa.gov/api/v2.1/events?status=open"};
+
+ public EONETIngester(Properties props, ESDriver es, SparkDriver spark) {
+ super(props, es, spark);
+ initMudrod();
+ }
+
+ /**
+ * @param args there are no arguments for this service...
+ * It ingests the entire EONET database. In the future this will
+ * most likely change to provide finer grained controls over
+ * what kind of EONET data we wish to ingest e.g. recent events from the
+ * last day only, etc.
+ */
+ public static void main(String[] args) {
+ MudrodEngine mudrodEngine = checkConfigInit(new MudrodEngine());
+ EONETIngester eonetIngester = new EONETIngester(
+ mudrodEngine.getConfig(), mudrodEngine.getESDriver(), null);
+ eonetIngester.acquireAllEvents(mudrodEngine);
+ }
+
+ private static MudrodEngine checkConfigInit(MudrodEngine mudrodEngine) {
+ if (mudrodEngine.getConfig().isEmpty()) {
+ mudrodEngine.loadConfig();
+ mudrodEngine.setESDriver(mudrodEngine.startESDriver());
+ }
+ return mudrodEngine;
+
+ }
+
+ public String acquireAllEvents(MudrodEngine mEngine) {
+ ESDriver esDriver = mEngine.getESDriver();
+ if (mEngine.getConfig().isEmpty()) {
+ mEngine.loadConfig();
+ esDriver = mEngine.startESDriver();
+ }
+ String result = null;
+ try {
+ result = executeBulkIndexRequest(mEngine, esDriver,
executeEonetGetOperations());
+ } catch (SSLHandshakeException e) {
+ e.printStackTrace();
+ }
+ if (result != null) {
+ return result;
+ } else {
+ return "";
+ }
+ }
+
+ private String executeBulkIndexRequest(MudrodEngine mEngine, ESDriver
esDriver, JsonArray jsonEventsArray) {
+ esDriver.createBulkProcessor();
+ BulkProcessor bp = esDriver.getBulkProcessor();
+ GetResult result = null;
+ String index =
mEngine.getConfig().getProperty(MudrodConstants.ES_INDEX_NAME);
+ String eventType = "eonet_event";
+ //for each event
+ for (JsonElement jsonElement : jsonEventsArray) {
+ UpdateRequest updateRequest = null;
+ JsonObject event = jsonElement.getAsJsonObject();
+ String eventId = event.get("id").toString();
+ try {
+ IndexRequest indexRequest = new IndexRequest(
+ index, eventType, eventId).source(executeEventMapping(event));
+ updateRequest =
+ new UpdateRequest(index, eventType,
eventId).upsert(indexRequest);
+ updateRequest.doc(indexRequest);
+ bp.add(updateRequest);
+ } catch (NumberFormatException e) {
+ LOG.error("Error whilst processing numbers", e);
+ }
+ UpdateResponse updateResponse = null;
+ try {
+ updateResponse = esDriver.getClient().update(updateRequest).get();
+ } catch (InterruptedException | ExecutionException e) {
+ e.printStackTrace();
+ }
+ if (updateResponse != null) {
+ result = updateResponse.getGetResult();
+ }
+ }
+ esDriver.destroyBulkProcessor();
+ //return result.getSource().toString();
+ return "";
+ }
+
+ private XContentBuilder executeEventMapping(JsonObject event) {
+ //for an individual event
+ XContentBuilder eventMapping = null;
+ if (null != event.get("closed")) {
+ try {
+ eventMapping = jsonBuilder()
+ .startObject()
+ .field("id", event.get("id"))
+ .field("title", event.get("title"))
+ .field("description", event.get("description"))
+ .field("link", event.get("link"))
+ .field("closed", event.get("closed"))
+ .field("categories", event.get("categories").getAsJsonArray())
+ .field("sources", event.get("sources").getAsJsonArray())
+ .field("geometries", event.get("geometries").getAsJsonArray())
+ .endObject();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ } else {
+ try {
+ eventMapping = jsonBuilder()
+ .startObject()
+ .field("id", event.get("id"))
+ .field("title", event.get("title"))
+ .field("description", event.get("description"))
+ .field("link", event.get("link"))
+ .field("categories", event.get("categories").getAsJsonArray())
+ .field("sources", event.get("sources").getAsJsonArray())
+ .field("geometries", event.get("geometries").getAsJsonArray())
+ .endObject();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ return eventMapping;
+ }
+
+ private JsonArray executeEonetGetOperations() throws SSLHandshakeException {
+ HttpClient client = HttpClientBuilder.create().build();
+ HttpResponse response = null;
+ JsonArray eventArray = new JsonArray();
+ for (String string : EVENTS_URLS) {
+ HttpGet request = new HttpGet(string);
+ // add request header
+ request.addHeader("User-Agent", "Apache SDAP MUDROD EONETIngester");
+ LOG.info("Executing: {}", request.toString());
+ try {
+ response = client.execute(request);
+ HttpEntity entity = response.getEntity();
+ JsonArray partialEventsArray =
extractEventsArrayFromJsonResponse(EntityUtils.toString(entity, "UTF-8"));
+ for (JsonElement jsonElement : partialEventsArray) {
+ eventArray.add(jsonElement);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ return eventArray;
+ }
+
+ private JsonArray extractEventsArrayFromJsonResponse(String string) {
+ return ((JsonObject) new
JsonParser().parse(string)).getAsJsonArray("events");
+ }
+
+}
diff --git a/core/src/main/java/org/apache/sdap/mudrod/tools/InstallCert.java
b/core/src/main/java/org/apache/sdap/mudrod/tools/InstallCert.java
new file mode 100644
index 0000000..304454b
--- /dev/null
+++ b/core/src/main/java/org/apache/sdap/mudrod/tools/InstallCert.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Sun Microsystems nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.apache.sdap.mudrod.tools;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.security.KeyStore;
+import java.security.MessageDigest;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.SSLException;
+import javax.net.ssl.SSLSocket;
+import javax.net.ssl.SSLSocketFactory;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.TrustManagerFactory;
+import javax.net.ssl.X509TrustManager;
+
+public class InstallCert {
+
+ public static void main(String[] args) throws Exception {
+ String host;
+ int port;
+ char[] passphrase;
+ if ((args.length == 1) || (args.length == 2)) {
+ String[] c = args[0].split(":");
+ host = c[0];
+ port = (c.length == 1) ? 443 : Integer.parseInt(c[1]);
+ String p = (args.length == 1) ? "changeit" : args[1];
+ passphrase = p.toCharArray();
+ } else {
+ System.out.println("Usage: java InstallCert <host>[:port]
[passphrase]");
+ return;
+ }
+
+ File file = new File("jssecacerts");
+ if (file.isFile() == false) {
+ char SEP = File.separatorChar;
+ File dir = new File(System.getProperty("java.home") + SEP
+ + "lib" + SEP + "security");
+ file = new File(dir, "jssecacerts");
+ if (file.isFile() == false) {
+ file = new File(dir, "cacerts");
+ }
+ }
+ System.out.println("Loading KeyStore " + file + "...");
+ InputStream in = new FileInputStream(file);
+ KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType());
+ ks.load(in, passphrase);
+ in.close();
+
+ SSLContext context = SSLContext.getInstance("TLS");
+ TrustManagerFactory tmf =
+
TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm());
+ tmf.init(ks);
+ X509TrustManager defaultTrustManager =
(X509TrustManager)tmf.getTrustManagers()[0];
+ SavingTrustManager tm = new SavingTrustManager(defaultTrustManager);
+ context.init(null, new TrustManager[] {tm}, null);
+ SSLSocketFactory factory = context.getSocketFactory();
+
+ System.out.println("Opening connection to " + host + ":" + port + "...");
+ SSLSocket socket = (SSLSocket)factory.createSocket(host, port);
+ socket.setSoTimeout(10000);
+ try {
+ System.out.println("Starting SSL handshake...");
+ socket.startHandshake();
+ socket.close();
+ System.out.println();
+ System.out.println("No errors, certificate is already trusted");
+ } catch (SSLException e) {
+ System.out.println();
+ e.printStackTrace(System.out);
+ }
+
+ X509Certificate[] chain = tm.chain;
+ if (chain == null) {
+ System.out.println("Could not obtain server certificate chain");
+ return;
+ }
+
+ BufferedReader reader =
+ new BufferedReader(new InputStreamReader(System.in));
+
+ System.out.println();
+ System.out.println("Server sent " + chain.length + " certificate(s):");
+ System.out.println();
+ MessageDigest sha1 = MessageDigest.getInstance("SHA1");
+ MessageDigest md5 = MessageDigest.getInstance("MD5");
+ for (int i = 0; i < chain.length; i++) {
+ X509Certificate cert = chain[i];
+ System.out.println
+ (" " + (i + 1) + " Subject " + cert.getSubjectDN());
+ System.out.println(" Issuer " + cert.getIssuerDN());
+ sha1.update(cert.getEncoded());
+ System.out.println(" sha1 " + toHexString(sha1.digest()));
+ md5.update(cert.getEncoded());
+ System.out.println(" md5 " + toHexString(md5.digest()));
+ System.out.println();
+ }
+
+ System.out.println("Enter certificate to add to trusted keystore or 'q' to
quit: [1]");
+ String line = reader.readLine().trim();
+ int k;
+ try {
+ k = (line.length() == 0) ? 0 : Integer.parseInt(line) - 1;
+ } catch (NumberFormatException e) {
+ System.out.println("KeyStore not changed");
+ return;
+ }
+
+ X509Certificate cert = chain[k];
+ String alias = host + "-" + (k + 1);
+ ks.setCertificateEntry(alias, cert);
+
+ OutputStream out = new FileOutputStream("jssecacerts");
+ ks.store(out, passphrase);
+ out.close();
+
+ System.out.println();
+ System.out.println(cert);
+ System.out.println();
+ System.out.println
+ ("Added certificate to keystore 'jssecacerts' using alias '"
+ + alias + "'");
+ }
+
+ private static final char[] HEXDIGITS = "0123456789abcdef".toCharArray();
+
+ private static String toHexString(byte[] bytes) {
+ StringBuilder sb = new StringBuilder(bytes.length * 3);
+ for (int b : bytes) {
+ b &= 0xff;
+ sb.append(HEXDIGITS[b >> 4]);
+ sb.append(HEXDIGITS[b & 15]);
+ sb.append(' ');
+ }
+ return sb.toString();
+ }
+
+ private static class SavingTrustManager implements X509TrustManager {
+
+ private final X509TrustManager tm;
+ private X509Certificate[] chain;
+
+ SavingTrustManager(X509TrustManager tm) {
+ this.tm = tm;
+ }
+
+ public X509Certificate[] getAcceptedIssuers() {
+ throw new UnsupportedOperationException();
+ }
+
+ public void checkClientTrusted(X509Certificate[] chain, String authType)
+ throws CertificateException {
+ throw new UnsupportedOperationException();
+ }
+
+ public void checkServerTrusted(X509Certificate[] chain, String authType)
+ throws CertificateException {
+ this.chain = chain;
+ tm.checkServerTrusted(chain, authType);
+ }
+ }
+
+}
\ No newline at end of file
diff --git
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java
index 306ce2e..c7622d6 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java
@@ -263,7 +263,14 @@ public void parseSingleLineFTP(String log, String index,
String type) {
IndexRequest ir;
try {
ir = new IndexRequest(index, type)
- .source(jsonBuilder().startObject().field("LogType",
MudrodConstants.FTP_LOG).field("IP", ip).field("Time", date).field("Request",
request).field("Bytes", Long.parseLong(bytes)).endObject());
+ .source(jsonBuilder()
+ .startObject()
+ .field("LogType", MudrodConstants.FTP_LOG)
+ .field("IP", ip)
+ .field("Time", date)
+ .field("Request", request)
+ .field("Bytes", Long.parseLong(bytes))
+ .endObject());
es.getBulkProcessor().add(ir);
} catch (NumberFormatException e) {
LOG.error("Error whilst processing numbers", e);
@@ -307,8 +314,7 @@ public void parseSingleLineHTTP(String log, String index,
String type) {
if (!crawlerDe.checkKnownCrawler(agent)) {
boolean tag = false;
String[] mimeTypes =
props.getProperty(MudrodConstants.BLACK_LIST_REQUEST).split(",");
- for(String str:mimeTypes)
- {
+ for(String str:mimeTypes) {
if (request.contains(str.trim())) {
tag = true;
break;
@@ -325,9 +331,18 @@ public void parseSingleLineHTTP(String log, String index,
String type) {
private void executeBulkRequest(IndexRequest ir, String index, String type,
Matcher matcher, Date date, String bytes) {
IndexRequest newIr = ir;
try {
- newIr = new IndexRequest(index, type).source(
- jsonBuilder().startObject().field("LogType",
MudrodConstants.HTTP_LOG).field("IP", matcher.group(1)).field("Time",
date).field("Request", matcher.group(5)).field("Response", matcher.group(6))
- .field("Bytes", Integer.parseInt(bytes)).field("Referer",
matcher.group(8)).field("Browser", matcher.group(9)).endObject());
+ newIr = new IndexRequest(index, type)
+ .source(jsonBuilder()
+ .startObject()
+ .field("LogType", MudrodConstants.HTTP_LOG)
+ .field("IP", matcher.group(1))
+ .field("Time", date)
+ .field("Request", matcher.group(5))
+ .field("Response", matcher.group(6))
+ .field("Bytes", Integer.parseInt(bytes))
+ .field("Referer", matcher.group(8))
+ .field("Browser", matcher.group(9))
+ .endObject());
es.getBulkProcessor().add(newIr);
} catch (NumberFormatException e) {
diff --git a/core/src/main/resources/config.properties
b/core/src/main/resources/config.properties
index 4c8991e..6e2bd54 100644
--- a/core/src/main/resources/config.properties
+++ b/core/src/main/resources/config.properties
@@ -11,7 +11,7 @@
# limitations under the License.
# Database configuration
-mudrod.cluster.name=MudrodES
+mudrod.cluster.name = MudrodES
mudrod.es.transport.tcp.port = 9300
mudrod.es.unicast.hosts = 127.0.0.1
mudrod.es.http.port = 9200
diff --git a/core/src/main/resources/elastic_mappings.json
b/core/src/main/resources/elastic_mappings.json
index 685f49e..ddb1952 100644
--- a/core/src/main/resources/elastic_mappings.json
+++ b/core/src/main/resources/elastic_mappings.json
@@ -1,68 +1,116 @@
{
- "_default_": {
- "properties": {
- "keywords": {
- "type": "text",
- "analyzer": "csv",
- "fielddata": true
- },
- "views": {
- "type": "string",
- "analyzer": "csv"
- },
- "downloads": {
- "type": "string",
- "analyzer": "csv"
- },
- "RequestUrl": {
- "type": "string",
- "include_in_all": false,
- "index": "no"
- },
- "IP": {
- "type": "keyword",
- "index": "not_analyzed"
- },
- "Browser": {
- "type": "string",
- "include_in_all": false,
- "index": "no"
- },
- "SessionURL": {
- "type": "string",
- "include_in_all": false,
- "index": "no"
- },
- "Referer": {
- "type": "string",
- "index": "not_analyzed"
- },
- "SessionID": {
- "type": "string",
- "index": "not_analyzed"
- },
- "Response": {
- "type": "string",
- "include_in_all": false,
- "index": "no"
- },
- "Request": {
- "type": "string",
- "include_in_all": false,
- "index": "no"
- },
- "Coordinates": {
- "type": "geo_point",
- "include_in_all": false,
- "index": "no"
- },
- "LogType": {
- "type": "string",
- "index": "not_analyzed"
- },
- "Dataset-Metadata": {
- "type": "completion"
- }
+ "mappings": {
+ "doc": {
+ "dynamic_templates": [
+ {
+ "geo_point_mapping": {
+ "match_mapping_type": "nested",
+ "match": "Point",
+ "mapping": {
+ "type": "geo_point"
+ }
+ },
+ "geo_shape_mapping": {
+ "match_mapping_type": "nested",
+ "match": "Polygon",
+ "mapping": {
+ "type": "geo_shape"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "eonet_event": {
+ "properties": {
+ "sources": {
+ "type": "nested",
+ "fields": {
+ "id": {
+ "ignore_above": 256,
+ "type": "keyword"
+ },
+ "url": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ }
+ },
+ "geometries": {
+ "type": "nested",
+ "fields": {
+ "date": {
+ "ignore_above": 256,
+ "type": "date"
+ },
+ "type": {
+ "ignore_above": 256,
+ "type": "keyword"
+ },
+ "coordinates": {
+ "ignore_above": 256,
+ "dynamic": true
+ }
+ }
+ },
+ "link": {
+ "type": "text",
+ "fields": {
+ "keyword": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ }
+ },
+ "closed": {
+ "type": "text",
+ "fields": {
+ "keyword": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ }
+ },
+ "description": {
+ "type": "text",
+ "fields": {
+ "keyword": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ }
+ },
+ "categories": {
+ "type": "nested",
+ "fields": {
+ "id": {
+ "ignore_above": 256,
+ "type": "integer"
+ },
+ "title": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ }
+ },
+ "id": {
+ "type": "text",
+ "fields": {
+ "keyword": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ }
+ },
+ "title": {
+ "type": "completion",
+ "fields": {
+ "keyword": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ }
+ }
+ }
}
- }
}
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 6d1414d..11312bf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -499,7 +499,7 @@
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
- <version>2.5</version>
+ <version>2.8.4</version>
<scope>compile</scope>
</dependency>
<dependency>
diff --git
a/service/src/main/java/org/apache/sdap/mudrod/services/DefaultExceptionMapper.java
b/service/src/main/java/org/apache/sdap/mudrod/services/DefaultExceptionMapper.java
index d8e20fa..e07b300 100644
---
a/service/src/main/java/org/apache/sdap/mudrod/services/DefaultExceptionMapper.java
+++
b/service/src/main/java/org/apache/sdap/mudrod/services/DefaultExceptionMapper.java
@@ -38,7 +38,10 @@ public Response toResponse(Throwable e) {
UUID errorId = UUID.randomUUID();
LOG.error("Internal server error " + errorId.toString(), e);
- String errorString = "An error occurred while processing your request.
Please contact the system administrator and provide the following error log ID
" + errorId.toString();
+ String errorString =
+ "An error occurred while processing your request. "
+ + "Please contact the system administrator and provide "
+ + "the following error log ID " + errorId.toString();
return Response.serverError().entity(new
Gson().toJson(errorString)).build();
}
diff --git
a/service/src/main/java/org/apache/sdap/mudrod/services/MudrodContextListener.java
b/service/src/main/java/org/apache/sdap/mudrod/services/MudrodContextListener.java
index 72130d3..7d89508 100644
---
a/service/src/main/java/org/apache/sdap/mudrod/services/MudrodContextListener.java
+++
b/service/src/main/java/org/apache/sdap/mudrod/services/MudrodContextListener.java
@@ -21,10 +21,9 @@
import org.apache.sdap.mudrod.driver.ESDriver;
import org.apache.sdap.mudrod.driver.SparkDriver;
import org.apache.sdap.mudrod.main.MudrodEngine;
-import org.apache.sdap.mudrod.ontology.Ontology;
-import org.apache.sdap.mudrod.ontology.OntologyFactory;
import org.apache.sdap.mudrod.ssearch.Ranker;
import org.apache.sdap.mudrod.ssearch.Searcher;
+import org.apache.sdap.mudrod.tools.EONETIngester;
import java.util.Properties;
@@ -60,13 +59,16 @@ public void contextInitialized(ServletContextEvent arg0) {
Properties props = me.loadConfig();
me.setESDriver(new ESDriver(props));
me.setSparkDriver(new SparkDriver(props));
+ ESDriver es = me.getESDriver();
ServletContext ctx = arg0.getServletContext();
- Searcher searcher = new Searcher(props, me.getESDriver(), null);
- Ranker ranker = new Ranker(props, me.getESDriver(), me.getSparkDriver());
+ Searcher searcher = new Searcher(props, es, null);
+ Ranker ranker = new Ranker(props, es, me.getSparkDriver());
+ EONETIngester eonetIngester = new EONETIngester(props, es, null);
ctx.setAttribute("MudrodInstance", me);
ctx.setAttribute("MudrodSearcher", searcher);
ctx.setAttribute("MudrodRanker", ranker);
+ ctx.setAttribute("MudrodEONETIngester", eonetIngester);
}
}
diff --git
a/service/src/main/java/org/apache/sdap/mudrod/services/eonet/EONETIngestionResource.java
b/service/src/main/java/org/apache/sdap/mudrod/services/eonet/EONETIngestionResource.java
new file mode 100644
index 0000000..6b410f4
--- /dev/null
+++
b/service/src/main/java/org/apache/sdap/mudrod/services/eonet/EONETIngestionResource.java
@@ -0,0 +1,57 @@
+/**
+ *
+ */
+package org.apache.sdap.mudrod.services.eonet;
+
+import javax.servlet.ServletContext;
+import javax.ws.rs.Consumes;
+import javax.ws.rs.GET;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
+import org.apache.sdap.mudrod.main.MudrodEngine;
+import org.apache.sdap.mudrod.tools.EONETIngester;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+
+/**
+ * An endpoint to execute ingestion of <a
href="https://eonet.sci.gsfc.nasa.gov/">
+ * Earth Observatory Natural Event Tracker (EONET)</a> data into
+ * the MUDROD search server.
+ */
+@Path("/eonet")
+public class EONETIngestionResource {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(EONETIngestionResource.class);
+ private EONETIngester eonetIngester;
+ private MudrodEngine mEngine;
+
+ public EONETIngestionResource(@Context ServletContext sc) {
+ this.mEngine = (MudrodEngine) sc.getAttribute("MudrodInstance");
+ this.eonetIngester = (EONETIngester)
sc.getAttribute("MudrodEONETIngester");
+ }
+
+ @GET
+ @Path("/status")
+ @Produces("text/html")
+ public Response status() {
+ return Response.ok("<h1>This is MUDROD EONET Ingestion Resource: running
correctly...</h1>").build();
+ }
+
+ @GET
+ @Path("/ingestAllEvents")
+ @Produces(MediaType.APPLICATION_JSON)
+ @Consumes("text/plain")
+ public Response executeEONETIngestion() {
+ String result = eonetIngester.acquireAllEvents(mEngine);
+ String json = new Gson().toJson(result);
+ LOG.info("Response received: {}", json);
+ return Response.ok(json, MediaType.APPLICATION_JSON).build();
+ }
+
+}
diff --git
a/service/src/main/java/org/apache/sdap/mudrod/services/eonet/package-info.java
b/service/src/main/java/org/apache/sdap/mudrod/services/eonet/package-info.java
new file mode 100644
index 0000000..519430b
--- /dev/null
+++
b/service/src/main/java/org/apache/sdap/mudrod/services/eonet/package-info.java
@@ -0,0 +1,14 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sdap.mudrod.services.eonet;
\ No newline at end of file
diff --git a/service/src/main/webapp/WEB-INF/web.xml
b/service/src/main/webapp/WEB-INF/web.xml
index e62e668..2e939f6 100644
--- a/service/src/main/webapp/WEB-INF/web.xml
+++ b/service/src/main/webapp/WEB-INF/web.xml
@@ -26,12 +26,13 @@
<param-name>jaxrs.serviceClasses</param-name>
<param-value>
org.apache.sdap.mudrod.services.autocomplete.AutoCompleteResource,
-
org.apache.sdap.mudrod.services.search.SearchDatasetDetailResource,
+ org.apache.sdap.mudrod.services.eonet.EONETIngestionResource,
+ org.apache.sdap.mudrod.services.ontology.OntologyResource,
org.apache.sdap.mudrod.services.recommendation.HybridRecomDatasetsResource,
+
org.apache.sdap.mudrod.services.search.SearchDatasetDetailResource,
org.apache.sdap.mudrod.services.search.SearchMetadataResource,
org.apache.sdap.mudrod.services.search.SearchVocabResource,
org.apache.sdap.mudrod.services.search.SessionDetailResource,
- org.apache.sdap.mudrod.services.ontology.OntologyResource,
</param-value>
</init-param>
<init-param>
diff --git a/web/pom.xml b/web/pom.xml
index 3b637b0..5ad9782 100644
--- a/web/pom.xml
+++ b/web/pom.xml
@@ -17,9 +17,12 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>org.apache.sdap.mudrod</groupId>
- <artifactId>mudrod-web</artifactId>
- <version>0.0.1-SNAPSHOT</version>
+ <parent>
+ <groupId>org.apache.sdap.mudrod</groupId>
+ <artifactId>mudrod-parent</artifactId>
+ <version>0.0.1-SNAPSHOT</version>
+ <relativePath>../</relativePath>
+ </parent>
<build>
<resources>
<resource>
@@ -28,14 +31,12 @@
</resource>
</resources>
</build>
- <packaging>jar</packaging>
+ <artifactId>mudrod-web</artifactId>
+ <packaging>jar</packaging>
<name>Mudrod :: Web</name>
+ <description>Mudrod Web UI application.</description>
<url>https://github.com/apache/incubator-sdap-mudrod.git</url>
- <description>MUDROD: Mining and Utilizing Dataset Relevancy from
- Oceanographic Datasets to Improve Data Discovery and Access.
- MUDROD is a semantic discovery and search project.
- </description>
<licenses>
<license>
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Build harvester for EONET EventsEB
> ----------------------------------
>
> Key: SDAP-66
> URL: https://issues.apache.org/jira/browse/SDAP-66
> Project: Apache Science Data Analytics Platform
> Issue Type: Task
> Reporter: Lewis John McGibbney
> Assignee: Lewis John McGibbney
> Priority: Major
>
> [EONET|https://eonet.sci.gsfc.nasa.gov] provides a rich knowledge resource
> and a repository of metadata about natural events. It is accessible via web
> services so we could augment MUDROD search results with metadata and
> recommendations about natural events. Specifically, we could for example
> correlate recommendation results for a user query with [previous (recent)
> severe
> storms|https://eonet.sci.gsfc.nasa.gov/api/v2.1/categories/10?status=closed].
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)