This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch 1169 in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git
commit ed587b60a8f5611df5a1b9f01db84e9140cb0752 Author: rz <[email protected]> AuthorDate: Fri Mar 29 21:57:56 2024 +0100 #1169 - Enable Apache RAT and fix missing licenses Signed-off-by: rz <[email protected]> --- .github/FUNDING.yml | 12 ----- .github/workflows/maven.yml | 15 ++++++ THIRD-PARTY.txt | 4 +- .../stormcrawler/util/CharsetIdentification.java | 16 ++++++ .../parse/filter/CollectionTaggerTest.java | 16 ++++++ .../META-INF/archetype-post-generate.groovy | 16 ++++++ .../BulkItemResponseToFailedFlag.java | 16 ++++++ .../elasticsearch/bolt/DeletionBolt.java | 16 ++++++ .../META-INF/archetype-post-generate.groovy | 16 ++++++ .../opensearch/BulkItemResponseToFailedFlag.java | 16 ++++++ external/solr/cores/docs/conf/schema.xml | 18 +++++++ external/solr/cores/docs/conf/solrconfig.xml | 18 +++++++ external/solr/cores/metrics/conf/schema.xml | 18 +++++++ external/solr/cores/metrics/conf/solrconfig.xml | 18 +++++++ external/solr/cores/solr.xml | 18 +++++++ external/solr/cores/status/conf/schema.xml | 18 +++++++ external/solr/cores/status/conf/solrconfig.xml | 18 +++++++ pom.xml | 59 ++++++++++++++++++++++ 18 files changed, 314 insertions(+), 14 deletions(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index ae6af18c..00000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1,12 +0,0 @@ -# These are supported funding model platforms - -github: [DigitalPebble] -patreon: # Replace with a single Patreon username -open_collective: # Replace with a single Open Collective username -ko_fi: # Replace with a single Ko-fi username -tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel -community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry -liberapay: # Replace with a single Liberapay username -issuehunt: # Replace with a single IssueHunt username -otechie: # Replace with a single Otechie username -custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index c3045d9d..2aae6785 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -6,6 +6,21 @@ name: Java CI with Maven on: [push, pull_request] jobs: + rat: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/cache@v3 + with: + path: ~/.m2/repository + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + distribution: adopt + java-version: 17 + - name: Build with Maven + run: mvn -B --no-transfer-progress -Prat -DskipTests verify + build: runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.experimental }} diff --git a/THIRD-PARTY.txt b/THIRD-PARTY.txt index 98cd4b9c..bfe6d04c 100644 --- a/THIRD-PARTY.txt +++ b/THIRD-PARTY.txt @@ -346,9 +346,9 @@ List of third-party dependencies grouped by their license type. * sniffer (org.elasticsearch.client:elasticsearch-rest-client-sniffer:7.17.7 - https://github.com/elastic/elasticsearch) * sniffer (org.opensearch.client:opensearch-rest-client-sniffer:2.12.0 - https://github.com/opensearch-project/OpenSearch.git) * SparseBitSet (com.zaxxer:SparseBitSet:1.2 - https://github.com/brettwooldridge/SparseBitSet) - * storm-autocreds (org.apache.storm:storm-autocreds:2.6.1 - https://storm.apache.org/external/storm-autocreds) + * storm-autocreds (org.apache.storm:storm-autocreds:2.6.1 - https://storm.apache.org/storm-autocreds) * Storm Client (org.apache.storm:storm-client:2.6.1 - https://storm.apache.org/storm-client) - * storm-hdfs (org.apache.storm:storm-hdfs:2.6.1 - https://storm.apache.org/external/storm-hdfs) + * storm-hdfs (org.apache.storm:storm-hdfs:2.6.1 - https://storm.apache.org/storm-hdfs) * swagger-annotations-jakarta (io.swagger.core.v3:swagger-annotations-jakarta:2.2.17 - https://github.com/swagger-api/swagger-core/modules/swagger-annotations-jakarta) * TagSoup (org.ccil.cowan.tagsoup:tagsoup:1.2.1 - http://home.ccil.org/~cowan/XML/tagsoup/) * T-Digest (com.tdunning:t-digest:3.2 - https://github.com/tdunning/t-digest) diff --git a/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java b/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java index 14d59d52..b9a767a9 100644 --- a/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java +++ b/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.stormcrawler.util; import com.ibm.icu.text.CharsetDetector; diff --git a/core/src/test/java/org/apache/stormcrawler/parse/filter/CollectionTaggerTest.java b/core/src/test/java/org/apache/stormcrawler/parse/filter/CollectionTaggerTest.java index fcb12ae5..fec98fd0 100644 --- a/core/src/test/java/org/apache/stormcrawler/parse/filter/CollectionTaggerTest.java +++ b/core/src/test/java/org/apache/stormcrawler/parse/filter/CollectionTaggerTest.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.stormcrawler.parse.filter; import com.fasterxml.jackson.databind.node.NullNode; diff --git a/external/elasticsearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy b/external/elasticsearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy index 499d2294..b1ba5c9f 100644 --- a/external/elasticsearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy +++ b/external/elasticsearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ def file = new File(request.getOutputDirectory(), request.getArtifactId() + "/ES_IndexInit.sh") file.setExecutable(true, false) diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/BulkItemResponseToFailedFlag.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/BulkItemResponseToFailedFlag.java index d9492215..1b535481 100644 --- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/BulkItemResponseToFailedFlag.java +++ b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/BulkItemResponseToFailedFlag.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.stormcrawler.elasticsearch; import java.io.IOException; diff --git a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/DeletionBolt.java b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/DeletionBolt.java index cbdfa9f9..ed37fa16 100644 --- a/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/DeletionBolt.java +++ b/external/elasticsearch/src/main/java/org/apache/stormcrawler/elasticsearch/bolt/DeletionBolt.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.stormcrawler.elasticsearch.bolt; import java.io.IOException; diff --git a/external/opensearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy b/external/opensearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy index 274f599e..bbdb5497 100644 --- a/external/opensearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy +++ b/external/opensearch/archetype/src/main/resources/META-INF/archetype-post-generate.groovy @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ def file1 = new File(request.getOutputDirectory(), request.getArtifactId() + "/dashboards/importDashboards.sh") file1.setExecutable(true, false) diff --git a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java index a978ffe9..cfceefb0 100644 --- a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java +++ b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.stormcrawler.opensearch; import java.io.IOException; diff --git a/external/solr/cores/docs/conf/schema.xml b/external/solr/cores/docs/conf/schema.xml index 9d5fde2f..dbc2bc9a 100755 --- a/external/solr/cores/docs/conf/schema.xml +++ b/external/solr/cores/docs/conf/schema.xml @@ -1,4 +1,22 @@ <?xml version="1.0" encoding="UTF-8" ?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> <schema version="1.5" name="simplest-solr"> <fieldType name="string" class="solr.StrField"/> diff --git a/external/solr/cores/docs/conf/solrconfig.xml b/external/solr/cores/docs/conf/solrconfig.xml index 025742f8..b366eff7 100755 --- a/external/solr/cores/docs/conf/solrconfig.xml +++ b/external/solr/cores/docs/conf/solrconfig.xml @@ -1,4 +1,22 @@ <?xml version="1.0" encoding="UTF-8" ?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> <config> <luceneMatchVersion>7.5.0</luceneMatchVersion> <requestDispatcher handleSelect="false"> diff --git a/external/solr/cores/metrics/conf/schema.xml b/external/solr/cores/metrics/conf/schema.xml index 77b1feb1..b9d95a46 100755 --- a/external/solr/cores/metrics/conf/schema.xml +++ b/external/solr/cores/metrics/conf/schema.xml @@ -1,4 +1,22 @@ <?xml version="1.0" encoding="UTF-8" ?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> <schema version="1.5" name="simplest-solr"> <fieldType name="string" class="solr.StrField"/> <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> diff --git a/external/solr/cores/metrics/conf/solrconfig.xml b/external/solr/cores/metrics/conf/solrconfig.xml index fabd9a88..f703df44 100755 --- a/external/solr/cores/metrics/conf/solrconfig.xml +++ b/external/solr/cores/metrics/conf/solrconfig.xml @@ -1,4 +1,22 @@ <?xml version="1.0" encoding="UTF-8" ?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> <config> <luceneMatchVersion>7.5.0</luceneMatchVersion> <requestDispatcher handleSelect="false"> diff --git a/external/solr/cores/solr.xml b/external/solr/cores/solr.xml index 191e51f5..9ec41dcf 100644 --- a/external/solr/cores/solr.xml +++ b/external/solr/cores/solr.xml @@ -1,2 +1,20 @@ <?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> <solr></solr> diff --git a/external/solr/cores/status/conf/schema.xml b/external/solr/cores/status/conf/schema.xml index 93763c03..ccdd2377 100755 --- a/external/solr/cores/status/conf/schema.xml +++ b/external/solr/cores/status/conf/schema.xml @@ -1,4 +1,22 @@ <?xml version="1.0" encoding="UTF-8" ?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> <schema version="1.5" name="simplest-solr"> <fieldType name="string" class="solr.StrField"/> <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> diff --git a/external/solr/cores/status/conf/solrconfig.xml b/external/solr/cores/status/conf/solrconfig.xml index 131d4497..ec916f90 100755 --- a/external/solr/cores/status/conf/solrconfig.xml +++ b/external/solr/cores/status/conf/solrconfig.xml @@ -1,4 +1,22 @@ <?xml version="1.0" encoding="UTF-8" ?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> <config> <luceneMatchVersion>7.5.0</luceneMatchVersion> <requestDispatcher handleSelect="false"> diff --git a/pom.xml b/pom.xml index f79f0578..f39cdc9a 100644 --- a/pom.xml +++ b/pom.xml @@ -367,6 +367,65 @@ under the License. </plugins> </build> + <profiles> + <!-- Run Apache Rat license checks in a separate profile, because during local builds it doesn't skip files + that are not checked into Git --> + <profile> + <id>rat</id> + <build> + <plugins> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <version>0.15</version> + <executions> + <execution> + <phase>verify</phase> + <goals> + <goal>check</goal> + </goals> + </execution> + </executions> + <configuration> + <outputDirectory>${project.basedir}/rat</outputDirectory> + <excludes> + <exclude>**/*.yml</exclude> + <exclude>**/*.yaml</exclude> + <exclude>**/*.ndjson</exclude> + <exclude>**/*.sh</exclude> + <exclude>**/*.mapping</exclude> + <exclude>**/*.flux</exclude> + <exclude>**/*.script</exclude> + <exclude>**/*.properties</exclude> + <exclude>**/*.txt</exclude> + <exclude>**/README.md</exclude> + <exclude>**/target/**</exclude> + <exclude>**/test/resources/**/*</exclude> <!-- test data for "old" ITs. --> + <exclude>LICENSE</exclude> + <exclude>NOTICE</exclude> + <exclude>DISCLAIMER</exclude> + <exclude>CONTRIBUTING.md</exclude> + <exclude>THIRD-PARTY.properties</exclude> + <exclude>THIRD-PARTY.txt</exclude> + <exclude>.mvn/**</exclude> + <exclude>.github/*.md</exclude> + <exclude>.github/workflows/**</exclude> + <exclude>.gitattributes</exclude> + <exclude>.asf.yaml</exclude> + <exclude>**/dependency-reduced-pom.xml</exclude> + <exclude>.editorconfig</exclude> + <exclude>**/.settings/**/*</exclude> <!-- Eclipse --> + <exclude>**/.classpath</exclude> <!-- Eclipse --> + <exclude>**/.project</exclude> <!-- Eclipse --> + <exclude>**/.idea</exclude> <!-- IntelliJ --> + </excludes> + </configuration> + </plugin> + </plugins> + </build> + </profile> + </profiles> + <dependencyManagement> <dependencies> <dependency>
