Diogo Simões has proposed merging 
lp:~diogo-simoes89/zorba/data-cleaning-thesaurus into 
lp:zorba/data-cleaning-module.

Requested reviews:
  Zorba Coders (zorba-coders)

For more details, see:
https://code.launchpad.net/~diogo-simoes89/zorba/data-cleaning-thesaurus/+merge/100683

This revision includes a new normalization function: capitalize($string as 
xs:string) as xs:string.

It also includes the thesaurus-based module, with the check-related ( $s1 as 
xs:string, $s2 as xs:string, $uri as xs:string, $type as xs:string ) and the 
related-terms ( $s1 as xs:string, $uri as xs:string, $type as xs:string ) 
functions.
-- 
https://code.launchpad.net/~diogo-simoes89/zorba/data-cleaning-thesaurus/+merge/100683
Your team Zorba Coders is requested to review the proposed merge of 
lp:~diogo-simoes89/zorba/data-cleaning-thesaurus into 
lp:zorba/data-cleaning-module.
=== modified file 'src/com/zorba-xquery/www/modules/data-cleaning/normalization.xq'
--- src/com/zorba-xquery/www/modules/data-cleaning/normalization.xq	2011-11-08 21:16:29 +0000
+++ src/com/zorba-xquery/www/modules/data-cleaning/normalization.xq	2012-04-03 20:16:21 +0000
@@ -31,12 +31,34 @@
 module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization";;
 
 import module namespace http = "http://www.zorba-xquery.com/modules/http-client";;
+import module namespace ft = "http://www.zorba-xquery.com/modules/full-text";;
 
 declare namespace ann = "http://www.zorba-xquery.com/annotations";;
 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";;
 declare option ver:module-version "2.0";
 
 (:~
+: Converts a given string into a capitalized representation.
+:
+: @param $string The string to be capitalized.
+:
+: @return The string resulting from the conversion.
+: @example test/Queries/data-cleaning/normalization/capitalize.xq
+:)
+declare function normalization:capitalize ($string as xs:string) as xs:string{
+  let $ttokens := tokenize ($string, " ")
+  let $cap-tokens :=
+    for $toks in $ttokens[position()>1]
+    let $capitalized-tokens := 
+      if (not(ft:is-stop-word($toks)))
+      then concat(upper-case(substring($toks, 1,1)), substring(lower-case($toks), 2), " ")
+      else concat(lower-case($toks), " ")
+    return $capitalized-tokens
+  let $cap-string := concat(concat(upper-case(substring($ttokens[position()=1], 1,1)), substring(lower-case($ttokens[position()=1]), 2), " "), string-join($cap-tokens))
+  return substring($cap-string, 1, string-length($cap-string)-1)
+};
+
+(:~
  : Converts a given string representation of a date value into a date representation valid according 
  : to the corresponding XML Schema type.
  :

=== added file 'src/com/zorba-xquery/www/modules/data-cleaning/thesaurus-based.xq'
--- src/com/zorba-xquery/www/modules/data-cleaning/thesaurus-based.xq	1970-01-01 00:00:00 +0000
+++ src/com/zorba-xquery/www/modules/data-cleaning/thesaurus-based.xq	2012-04-03 20:16:21 +0000
@@ -0,0 +1,74 @@
+(:
+ : Copyright 2006-2009 The FLWOR Foundation.
+ :
+ : Licensed under the Apache License, Version 2.0 (the "License");
+ : you may not use this file except in compliance with the License.
+ : You may obtain a copy of the License at
+ :
+ : http://www.apache.org/licenses/LICENSE-2.0
+ :
+ : Unless required by applicable law or agreed to in writing, software
+ : distributed under the License is distributed on an "AS IS" BASIS,
+ : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ : See the License for the specific language governing permissions and
+ : limitations under the License.
+ :)
+
+(:~
+ : This library module provides thesaurus functions for checking semantic relations between strings 
+ : and for checking abbreviations.
+
+ : These functions are particularly useful in tasks related to the creation of semantic mappings.
+ : 
+ :
+ : @author Bruno Martins and Diogo Simões
+ :)
+
+module namespace thesaurus = "http://www.zorba-xquery.com/modules/data-cleaning/thesaurus";;
+
+import module namespace ft = "http://www.zorba-xquery.com/modules/full-text";;
+
+(:~
+ : Checks if two strings have a relationship defined in a given thesaurus.
+ : The implementation of this function depends on the full-text module.
+ :
+ :
+ : @param $s1 The first string.
+ : @param $s2 The second string.
+ : @param $uri The uri of the thesaurus to be considered.
+ : @param $type An identifyer for the type of relationship.
+ :
+ : @return true if the first string has the provided relationship with the second string defined in the thesaurus and false otherwise.
+ : @example test/Queries/data-cleaning/thesaurus-based/check-related.xq 
+ : 
+ :)
+declare function thesaurus:check-related ( $s1 as xs:string, $s2 as xs:string, $uri as xs:string, $type as xs:string ) as xs:boolean {
+  let $relation := ft:thesaurus-lookup( $uri,
+                                      $s2,
+                                      xs:language("en"),
+                                      $type )
+  return $relation = $s1
+};
+
+(:~
+ : Returns a sequence with the strings that have a relationship, 
+ : defined in a given thesaurus, with the string provided as input.
+ : The implementation of this function depends on the full-text module.
+ :
+ :
+ : @param $s1 The string with the query term.
+ : @param $uri The uri of the thesaurus to be considered.
+ : @param $type An identifyer for the type of relationship.
+ :
+ : @return A sequence with the strings that have the provided relationship, defined in the thesaurus, with the query term.
+ : @example test/Queries/data-cleaning/thesaurus-based/related-terms.xq
+ :)
+declare function thesaurus:related-terms ( $s1 as xs:string, $uri as xs:string, $type as xs:string ) as xs:string* {
+ let $synonyms := ft:thesaurus-lookup( $uri,
+                                      $s1,
+                                      xs:language("en"),
+                                      $type )
+ return $synonyms
+};
+
+

=== added file 'test/ExpQueryResults/data-cleaning/normalization/capitalize.xml.res'
--- test/ExpQueryResults/data-cleaning/normalization/capitalize.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/data-cleaning/normalization/capitalize.xml.res	2012-04-03 20:16:21 +0000
@@ -0,0 +1,1 @@
+The Lord of the Rings

=== added directory 'test/ExpQueryResults/data-cleaning/thesaurus-based'
=== added file 'test/ExpQueryResults/data-cleaning/thesaurus-based/check-related.xml.res'
--- test/ExpQueryResults/data-cleaning/thesaurus-based/check-related.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/data-cleaning/thesaurus-based/check-related.xml.res	2012-04-03 20:16:21 +0000
@@ -0,0 +1,1 @@
+true

=== added file 'test/ExpQueryResults/data-cleaning/thesaurus-based/related-terms.xml.res'
--- test/ExpQueryResults/data-cleaning/thesaurus-based/related-terms.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/data-cleaning/thesaurus-based/related-terms.xml.res	2012-04-03 20:16:21 +0000
@@ -0,0 +1,1 @@
+chromatic color chromatic colour spectral color spectral colour clothing article of clothing vesture wear wearable habiliment organization organisation sky dye dyestuff amobarbital lycaenid lycaenid butterfly discolor discolour colour color coloring colouring covering consumer goods social group atmosphere coloring material colouring material barbiturate truth serum truth drug butterfly change

=== added file 'test/Queries/data-cleaning/normalization/capitalize.xq'
--- test/Queries/data-cleaning/normalization/capitalize.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/data-cleaning/normalization/capitalize.xq	2012-04-03 20:16:21 +0000
@@ -0,0 +1,3 @@
+import module namespace normalization = "http://www.zorba-xquery.com/modules/data-cleaning/normalization";;
+
+normalization:capitalize ("the lord of the rings")

=== added directory 'test/Queries/data-cleaning/thesaurus-based'
=== added file 'test/Queries/data-cleaning/thesaurus-based/check-related.xq'
--- test/Queries/data-cleaning/thesaurus-based/check-related.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/data-cleaning/thesaurus-based/check-related.xq	2012-04-03 20:16:21 +0000
@@ -0,0 +1,25 @@
+import namespace thesaurus = "http://www.zorba-xquery.com/modules/data-cleaning/thesaurus";;
+
+thesaurus:check-related ( "animal", "dog", "http://wordnet.princeton.edu";, "BT" )
+
+(: Example configuration (taken from zorba testsuite):
+ 
+Args: 
+--thesaurus 
+http://wordnet.princeton.edu:=$RBKT_BINARY_DIR/thesauri/wordnet-en.zth
+ 
+ 
+---------------------------------------------------------------------------------------
+Args:  --thesaurus  http://wordnet.princeton.edu:=$RBKT_BINARY_DIR/thesauri/wordnet-en.zth 
+ 
+---------------------------------------------------------------------------------------
+ 
+ 
+ 
+Expected output:
+ 
+true
+ 
+ 
+:)
+

=== added file 'test/Queries/data-cleaning/thesaurus-based/related-terms.xq'
--- test/Queries/data-cleaning/thesaurus-based/related-terms.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/data-cleaning/thesaurus-based/related-terms.xq	2012-04-03 20:16:21 +0000
@@ -0,0 +1,24 @@
+import namespace thesaurus = "http://www.zorba-xquery.com/modules/data-cleaning/thesaurus";;
+
+thesaurus:related-terms( "blue", "http://wordnet.princeton.edu";, "BT" )
+
+(: Example configuration (taken from zorba testsuite):
+ 
+Args: 
+--thesaurus 
+http://wordnet.princeton.edu:=$RBKT_BINARY_DIR/thesauri/wordnet-en.zth
+ 
+ 
+---------------------------------------------------------------------------------------
+Args:  --thesaurus  http://wordnet.princeton.edu:=$RBKT_BINARY_DIR/thesauri/wordnet-en.zth 
+ 
+---------------------------------------------------------------------------------------
+ 
+ 
+ 
+Expected output:
+ 
+true
+ 
+ 
+:)

-- 
Mailing list: https://launchpad.net/~zorba-coders
Post to     : zorba-coders@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zorba-coders
More help   : https://help.launchpad.net/ListHelp

Reply via email to