Changeset: 4ff1a5e59c20 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ff1a5e59c20
Modified Files:
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Optimize the function for checking the subset-superset (i.e., isSubset())
between CSs.
diffs (138 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1665,14 +1665,14 @@ oid putaCStoHash(CSBats *csBats, oid* ke
/* Return 1 if sorted arr2[] is a subset of sorted arr1[]
* arr1 has m members, arr2 has n members
* */
-
static int isSubset(oid* arr1, oid* arr2, int m, int n)
{
int i = 0, j = 0;
- if(m < n)
- return 0;
-
+ // m > n
+ //
+ if (arr2[n-1] > arr1[m-1]) return 0;
+
while( i < n && j < m )
{
if( arr1[j] < arr2[i] )
@@ -1692,80 +1692,6 @@ static int isSubset(oid* arr1, oid* arr2
return 1;
}
-/*
-static int isSubset(oid* arr1, oid* arr2, int m, int n)
-{
- int i = 0, j = 0;
-
- if(m < n)
- return 0;
-
- while( i < n && j < m )
- {
- if( arr1[j] == arr2[i] )
- {
- j++;
- i++;
- }
- else if ( arr1[j] < arr2[i] )
- {
- break;
- //Check whether arr2[] is a subset of sorted arr1[]
- }
- else if( arr1[j] > arr2[i] ){
- i++;
- //Check whether arr2[] is a subset of sorted arr1[]
- }
- }
-
- if (arr1[j] < arr2[i]){
- j++;
- while( i < n && j < m )
- {
- if( arr1[j] < arr2[i] )
- j++;
- else if( arr1[j] == arr2[i] )
- {
- j++;
- i++;
- }
- else if( arr1[j] > arr2[i] )
- return 0;
- }
-
- if (i < n)
- return 0;
- else
- return 1;
-
- }
-
-
- if (arr1[j] > arr2[i]){
- i++;
-
- while( i < n && j < m )
- {
- if( arr1[j] > arr2[i] )
- i++;
- else if( arr1[j] == arr2[i] )
- {
- j++;
- i++;
- }
- else if( arr1[j] < arr2[i] )
- return 0;
- }
-
- if (j < m)
- return 0;
- else
- return -1;
- }
-
- return 0;
-}
-*/
/*
* Using TF-IDF for calculating the similarity score
@@ -1932,18 +1858,24 @@ void getMaximumFreqCSs(CSset *freqCSset,
for (i = 0; i < numFreqCS; i++){
if (freqCSset->items[i].parentFreqIdx != -1) continue;
for (j = (i+1); j < numFreqCS; j++){
- if (isSubset(freqCSset->items[i].lstProp,
freqCSset->items[j].lstProp,
-
freqCSset->items[i].numProp,freqCSset->items[j].numProp) == 1) {
- /* CSj is a subset of CSi */
- freqCSset->items[j].parentFreqIdx = i;
+ if (freqCSset->items[j].numProp >
freqCSset->items[i].numProp){
+ if (isSubset(freqCSset->items[j].lstProp,
freqCSset->items[i].lstProp,
+
freqCSset->items[j].numProp,freqCSset->items[i].numProp) == 1) {
+ /* CSj is a superset of CSi */
+ freqCSset->items[i].parentFreqIdx = j;
+ break;
+ }
}
- else if (isSubset(freqCSset->items[j].lstProp,
freqCSset->items[i].lstProp,
-
freqCSset->items[j].numProp,freqCSset->items[i].numProp) == 1) {
- /* CSj is a subset of CSi */
- freqCSset->items[i].parentFreqIdx = j;
- break;
+ else if (freqCSset->items[j].numProp <
freqCSset->items[i].numProp){
+ if (isSubset(freqCSset->items[i].lstProp,
freqCSset->items[j].lstProp,
+
freqCSset->items[i].numProp,freqCSset->items[j].numProp) == 1) {
+ /* CSj is a subset of CSi */
+ freqCSset->items[j].parentFreqIdx = i;
+ }
+
}
-
+
+ //Do not need to consider the case that the numProps
are the same
}
/* By the end, if this CS is not a subset of any other CS */
if (freqCSset->items[i].parentFreqIdx == -1){
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list