Hello. I try to implemetnt full text search in file content, using embedded Oak version 1.16.0.NodeStore in RDB MS SQL 2008r2
Tried to create index like it said in Oak documentation to index all properties. /oak:index/assetType - jcr:primaryType = "oak:QueryIndexDefinition" - type = "lucene" - compatVersion = 2 - async = "async" + indexRules - jcr:primaryType = "nt:unstructured" + nt:base + properties - jcr:primaryType = "nt:unstructured" + allProps - name = ".*" - isRegexp = true - nodeScopeIndex = true //--------------------------------------------------- Create index. Tried different combinations of node types. Nothing work. public static void createIndex(Repository repository) { Session session = null; try { session = repository.login(); Node root = session.getRootNode(); Node index = root.getNode("oak:index"); Node lucineIndex = index.addNode("assetType","oak:QueryIndexDefinition"); lucineIndex.setProperty("compatVersion", "2"); lucineIndex.setProperty("type", "lucene"); lucineIndex.setProperty("async", "async"); Node rules = lucineIndex.addNode("indexRules", "nt:unstructured"); Node base = rules.addNode("nt:base"); Node properties = base.addNode("properties", "nt:unstructured"); Node allProps = properties.addNode("allProps"); allProps.setProperty("jcr:content", ".*"); allProps.setProperty("isRegexp", true); allProps.setProperty("nodeScopeIndex", true); session.save(); } catch (LoginException e) { e.printStackTrace(); } catch (RepositoryException e) { e.printStackTrace(); } finally { session.logout(); } } //--------------------------------------------------- Add some file public static void saveFileIfNotExist(byte[] rawFile, String fileName, String folderName, String mimeType, Repository repository) { Session session = null; try { session = repository.login(new SimpleCredentials("admin", "admin".toCharArray())); Node root = session.getRootNode(); Binary binary = session.getValueFactory().createBinary(new ByteArrayInputStream(rawFile)); if(!root.hasNode(folderName)) { System.out.println("NO FOLDER"); Node folder = root.addNode(folderName, "nt:folder"); Node file = folder.addNode(fileName, "nt:file"); Node content = file.addNode("jcr:content", "nt:resource"); content.setProperty("jcr:mimeType", mimeType); content.setProperty("jcr:data", binary); } else { System.out.println("FOLDER EXIST"); } session.save(); } catch (RepositoryException e) { e.printStackTrace(); } finally { session.logout(); } } //--------------------------------------------------- File content: An implementation of the Value interface must override the inherited method Object.equals(Object) so that, given Value instances V1 and V2, V1.equals(V2) will return true if. //--------------------------------------------------- Try to search file content DocumentNodeStore rdb = new DocumentNodeStore(new RDBDocumentNodeStoreBuilder().setRDBConnection(dataSource)); Repository repo = new Jcr(new Oak(rdb)).with(new OpenSecurityProvider()).createRepository(); createIndex(repo); byte[] rawFile = readBytes("D:\\file.txt"); saveFileIfNotExist(rawFile, "txt_folder", "text_file", "text/plain", repo); Session session = null; try { session = repo.login(); Node root = session.getRootNode(); Node index = root.getNode("oak:index"); QueryManager queryManager = session.getWorkspace().getQueryManager();session.getWorkspace().getQueryManager(); Query query = queryManager.createQuery("SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn)", Query.JCR_SQL2); QueryResult result = query.execute(); RowIterator ri = result.getRows(); while (ri.hasNext()) { Row row = ri.nextRow(); System.out.println("Row: " + row.toString()); } } catch (RepositoryException e) { e.printStackTrace(); } finally { session.logout(); ((RepositoryImpl) repo).shutdown(); rdb.dispose(); } //--------------------------------------------------- But nothing returns, and warns in log: 2019-10-03 11:54:46,054 [main] DEBUG QueryEngineImpl - Parsing JCR-SQL2 statement: SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) 2019-10-03 11:54:46,054 [main] DEBUG QueryEngineImpl - Parsing JCR-SQL2 statement: SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) 2019-10-03 11:54:46,070 [main] DEBUG QueryImpl - cost using filter Filter(query=SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) fullText="must", path=*) 2019-10-03 11:54:46,070 [main] DEBUG QueryImpl - cost using filter Filter(query=SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) fullText="must", path=*) 2019-10-03 11:54:46,073 [main] DEBUG QueryImpl - cost for UUIDDiffIndex is Infinity 2019-10-03 11:54:46,073 [main] DEBUG QueryImpl - cost for UUIDDiffIndex is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for reference is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for reference is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for property is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for property is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for nodeType is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for nodeType is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for traverse is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - cost for traverse is Infinity 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - no proper index was found for filter Filter(query=SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) fullText="must", path=*) 2019-10-03 11:54:46,076 [main] DEBUG QueryImpl - no proper index was found for filter Filter(query=SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) fullText="must", path=*) 2019-10-03 11:54:46,077 [main] WARN QueryImpl - Traversal query (query without index): SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn); consider creating an index 2019-10-03 11:54:46,077 [main] WARN QueryImpl - Traversal query (query without index): SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn); consider creating an index 2019-10-03 11:54:46,078 [main] DEBUG QueryRecorder - count: 1 query: SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'x') option(traversal warn) 2019-10-03 11:54:46,078 [main] DEBUG QueryRecorder - count: 1 query: SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'x') option(traversal warn) 2019-10-03 11:54:46,079 [main] DEBUG QueryManagerImpl - Executed query [SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn)] in [35] ms 2019-10-03 11:54:46,081 [main] DEBUG QueryImpl - query execute SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) 2019-10-03 11:54:46,081 [main] DEBUG QueryImpl - query execute SELECT * FROM [nt:resource] AS s WHERE CONTAINS(s.*, 'must') option(traversal warn) 2019-10-03 11:54:46,082 [main] DEBUG QueryImpl - query plan [nt:resource] as [s] /* traverse "*" where contains([s].[*], 'must') */ 2019-10-03 11:54:46,082 [main] DEBUG QueryImpl - query plan [nt:resource] as [s] /* traverse "*" where contains([s].[*], 'must') */ So, how to make proper index and make correct request to search in file content? How to search in pdf documents?