Hi, could you please re-ask this on the user's list ([email protected])?
The dev list is for discussing the development of Lucene's source code. Thanks. Mike McCandless http://blog.mikemccandless.com On Fri, Aug 9, 2013 at 8:37 AM, Lingviston <[email protected]> wrote: > Hi, I'm trying to use Lucene in my Android project. To start with I've > created a small demo app. It works with .txt files but I need to work with > .pdf. So analyzing my code I understand that it will have some issues with > .pdfs due to memory management. However the question I want to ask here is > not related to memory but to hit highlighting. It works now but using of > `Highlighter` class with pdfs is not what I want. So to implement my own > highlighting I need to know some kind of coordinates of found words in the > text. How can I get them? I'm using lucene 4.4.0 while all of the examples > like here > <http://stackoverflow.com/questions/1311199/finding-the-position-of-search-hits-from-lucene> > are for much older versions. Here is my code: > > public class MainActivity extends Activity { > > //----------------------------------------------------------------------------------------------------- > // > // Constants > // > > //----------------------------------------------------------------------------------------------------- > public static final String FIELD_PATH = "path"; > public static final String FIELD_CONTENTS = "contents"; > > > //----------------------------------------------------------------------------------------------------- > // > // Fields > // > > //----------------------------------------------------------------------------------------------------- > private EditText mEditText; > private TextView mTextView; > > > //----------------------------------------------------------------------------------------------------- > // > // Methods > // > > //----------------------------------------------------------------------------------------------------- > @Override > protected void onCreate(Bundle savedInstanceState) { > super.onCreate(savedInstanceState); > setContentView(R.layout.activity_main); > findViews(); > initViews(); > createIndex(); > } > > private void findViews() { > mEditText = (EditText) findViewById(R.id.activity_main_edittext); > mTextView = (TextView) findViewById(R.id.activity_main_textview); > } > > private void initViews() { > mEditText.setOnEditorActionListener(mEditorActionListener); > } > > private void performSearch(String searchString) { > try { > Directory directory = > NIOFSDirectory.open(getExternalFilesDir(null)); > DirectoryReader ireader = > DirectoryReader.open(directory); > IndexSearcher isearcher = new IndexSearcher(ireader); > > Analyzer analyzer = new > StandardAnalyzer(Version.LUCENE_44); > QueryParser queryParser = new > AnalyzingQueryParser(Version.LUCENE_44, > FIELD_CONTENTS, analyzer); > Query query = queryParser.parse(searchString); > TopDocs topDocs = isearcher.search(query, null, 1000); > ScoreDoc[] docs = topDocs.scoreDocs; > > StringBuilder result = new StringBuilder(); > StringBuilder debugInfo = new StringBuilder(); > debugInfo.append("Number of hits: "); > debugInfo.append(docs.length); > debugInfo.append("\n"); > > // Iterate through the results: > for (int i = 0; i < docs.length; i++) { > Document hitDoc = isearcher.doc(docs[i].doc); > > String path = hitDoc.get(FIELD_PATH); > debugInfo.append("Path: "); > debugInfo.append(path); > debugInfo.append("\n"); > > > result.append("-------------------------------------------------------"); > result.append("File: "); > result.append(path); > > result.append("-------------------------------------------------------"); > result.append("<br>"); > > String content = hitDoc.get(FIELD_CONTENTS); > QueryScorer scorer = new QueryScorer(query); > Highlighter highlighter = new Highlighter(new > SimpleHTMLFormatter("", > ""), scorer); > highlighter.setTextFragmenter(new > SimpleSpanFragmenter(scorer, > Integer.MAX_VALUE)); > String highlighted = > highlighter.getBestFragment(analyzer, > FIELD_CONTENTS, content); > > result.append("-------------------------------------------------------"); > result.append("Contents: "); > > result.append("-------------------------------------------------------"); > result.append("<br>"); > result.append(highlighted); > result.append("<br><br><br>"); > } > > //not working > /*PostingsHighlighter highlighter = new > PostingsHighlighter(); > String highlights[] = > highlighter.highlight(FIELD_CONTENTS, query, > isearcher, topDocs);*/ > mTextView.setText(Html.fromHtml(result.toString())); > Log.d(getClass().getSimpleName(), > debugInfo.toString()); > } catch (Exception e) { > e.printStackTrace(); > Log.e(getClass().getSimpleName(), e.getMessage()); > } > > } > > private void createIndex() { > try { > //Create directory for index. > Directory indexDirectory = new > NIOFSDirectory(getExternalFilesDir(null)); > > Analyzer analyzer = new > StandardAnalyzer(Version.LUCENE_44); > > IndexWriterConfig config = new > IndexWriterConfig(Version.LUCENE_44, > analyzer); > config.setOpenMode(OpenMode.CREATE); > > IndexWriter indexWriter = new > IndexWriter(indexDirectory, config); > > //Loop through files in specified directory and > adding them to index. > File dir = new > File(Environment.getExternalStorageDirectory() + > "/lucene"); > File[] files = dir.listFiles(); > for (File file : files) { > Document document = new Document(); > > { > FieldType fieldType = new > FieldType(TextField.TYPE_STORED); > > fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); > > String path = file.getCanonicalPath(); > document.add(new Field(FIELD_PATH, > path, fieldType)); > } > > { > FieldType fieldType = new > FieldType(TextField.TYPE_STORED); > fieldType.setIndexed(true); > > fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); > fieldType.setStored(true); > fieldType.setStoreTermVectors(true); > fieldType.setTokenized(true); > > fieldType.setStoreTermVectorOffsets(true); > String content = readFully(new > FileReader(file)); //we can't store > Reader objects but we need to be able to access the content for highlighting > document.add(new > Field(FIELD_CONTENTS, content, fieldType)); > } > > indexWriter.addDocument(document); > } > indexWriter.close(); > } catch (Exception e) { > e.printStackTrace(); > } > } > > public static String readFully(Reader reader) throws IOException { > char[] arr = new char[8*1024]; // 8K at a time > StringBuffer buf = new StringBuffer(); > int numChars; > > while ((numChars = reader.read(arr, 0, arr.length)) > 0) { > buf.append(arr, 0, numChars); > } > > return buf.toString(); > } > > @Override > public boolean onCreateOptionsMenu(Menu menu) { > getMenuInflater().inflate(R.menu.main, menu); > return true; > } > > > //----------------------------------------------------------------------------------------------------- > // > // Listeners > // > > //----------------------------------------------------------------------------------------------------- > private OnEditorActionListener mEditorActionListener = new > OnEditorActionListener() { > @Override > public boolean onEditorAction(TextView v, int actionId, > KeyEvent event) { > if (actionId == EditorInfo.IME_ACTION_SEARCH) { > performSearch(v.getText().toString()); > return true; > } > return false; > } > }; > } > > So how can I get hit coordinates and maybe you have any other advices what > I'm doing wrong? > > > > -- > View this message in context: > http://lucene.472066.n3.nabble.com/How-to-get-hits-coordinates-in-Lucene-4-4-0-tp4083508.html > Sent from the Lucene - Java Developer mailing list archive at Nabble.com. > > --------------------------------------------------------------------- > To unsubscribe, e-mail: [email protected] > For additional commands, e-mail: [email protected] > --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
