This is an automated email from the ASF dual-hosted git repository. gcruz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/allura.git
commit 760dede19609f818410b6f6c9a37b754337f3ac9 Author: Dave Brondsema <[email protected]> AuthorDate: Wed Aug 14 09:57:25 2024 -0400 clean up unused solr fields --- Allura/allura/lib/solr.py | 7 ++++ solr_config/allura/conf/schema.xml | 69 -------------------------------------- 2 files changed, 7 insertions(+), 69 deletions(-) diff --git a/Allura/allura/lib/solr.py b/Allura/allura/lib/solr.py index 49c76afb3..7635b36d6 100644 --- a/Allura/allura/lib/solr.py +++ b/Allura/allura/lib/solr.py @@ -154,6 +154,13 @@ class MockSOLR: for o in objects: o['text'] = ''.join(o['text']) json.dumps(o) # ensure no errors (since pysolr 3.9+ uses json API to solr) + for k in o.keys(): + if k.endswith(('_i', '_s', '_l', '_t', '_b', '_f', '_d', '_dt', '_ws')): + continue + elif k in ('id', 'text', 'title'): + continue + else: + raise ValueError(f'Unexpected solr field {k!r}, probably not in schema.xml') self.db[o['id']] = o def commit(self): diff --git a/solr_config/allura/conf/schema.xml b/solr_config/allura/conf/schema.xml index afca49639..fcb1ced3b 100644 --- a/solr_config/allura/conf/schema.xml +++ b/solr_config/allura/conf/schema.xml @@ -111,72 +111,7 @@ <field name="text" type="text_general" indexed="true" stored="true" multiValued="true"/> <!-- END of required field values --> - <field name="description" type="text_general" indexed="true" stored="true" /> - <field name="group_id" type="tint" indexed="true" stored="true"/> - <field name="group_ranking" type="tint" indexed="true" stored="true"/> - <field name="has_file" type="tint" indexed="true" stored="true"/> - <field name="help_wanted" type="boolean" indexed="true" stored="true" /> - <field name="latest_file_date" type="date" indexed="true" stored="true" /> - <field name="license" type="string" indexed="true" stored="true" /> - <field name="license_other" type="string" indexed="true" stored="true" /> - <field name="name" type="text_general" indexed="true" stored="true"/> - <field name="num_developers" type="tint" indexed="true" stored="true"/> - <field name="num_downloads" type="tint" indexed="true" stored="true"/> - <field name="num_downloads_week" type="tint" indexed="true" stored="true"/> - <field name="num_services" type="tint" indexed="true" stored="true"/> - <field name="percentile" type="float" indexed="true" stored="true"/> - <field name="project_type" type="tint" indexed="true" stored="true" /> - <field name="project_doc_id" type="string" indexed="true" stored="true" /> - <field name="registration_date" type="date" indexed="true" stored="true" /> - <field name="screenshot_url" type="string" indexed="true" stored="true" /> - <field name="trove" type="text_general" indexed="true" stored="true" /> - <field name="unix_group_name" type="string" indexed="true" stored="true" /> - <field name="source" type="string" indexed="true" stored="true" /> - <field name="rating" type="float" indexed="true" stored="true" /> - <field name="review_count" type="tint" indexed="true" stored="true" /> - - <!-- Common metadata fields, named specifically to match up with - SolrCell metadata when parsing rich documents such as Word, PDF. - Some fields are multiValued only because Tika currently may return - multiple values for them. Some metadata is parsed from the documents, - but there are some which come from the client context: - "content_type": From the HTTP headers of incoming stream - "resourcename": From SolrCell request param resource.name - --> <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> - <field name="subject" type="text_general" indexed="true" stored="true"/> - <field name="comments" type="text_general" indexed="true" stored="true"/> - <field name="author" type="text_general" indexed="true" stored="true"/> - <field name="keywords" type="text_general" indexed="true" stored="true"/> - <field name="category" type="text_general" indexed="true" stored="true"/> - <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> - <field name="last_modified" type="date" indexed="true" stored="true"/> - <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> - - <!-- Main body of document extracted by SolrCell. - NOTE: This field is not indexed by default, since it is also copied to "text" - using copyField below. This is to save space. Use this field for returning and - highlighting document content. Use the "text" field to search the content. --> - - - <!-- catchall field, containing all other searchable text fields (implemented - via copyField further on in this schema --> - - <!-- catchall text field that indexes tokens both normally and in reverse for efficient - leading wildcard queries. --> - - <!-- non-tokenized version of manufacturer to make it easier to sort or group - results by manufacturer. copied from "manu" via copyField --> - <!-- - Some fields such as popularity and manu_exact could be modified to - leverage doc values: - <field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" /> - <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" /> - - Although it would make indexing slightly slower and the index bigger, it - would also make the index faster to load, more memory-efficient and more - NRT-friendly. - --> <!-- Dynamic field definitions allow using convention over configuration for fields via the specification of patterns to match field names. @@ -184,10 +119,6 @@ RESTRICTION: the glob-like pattern in the name attribute must have a "*" only at the start or the end. --> - <!-- Type used to index the lat and lon components for the "location" FieldType --> - - <!-- some trie-coded dynamic fields for faster range queries --> - <!-- uncomment the following to ignore any fields that don't already match an existing field name or dynamic field, rather than reporting them as an error. alternately, change the type="ignored" to some other type e.g. "text" if you want
