hace un tiempo alguien en la lista ofreció, propuso, linkeó (no recuerdo) una aplicación en python: look.py. A mí me funciona perfectamente en Debian y Windows. No tengo el link, pero sí la aplicación. La adjunto, pero creo que la lista no acepta adjuntos. Si a alguien le sirve y no llega el adjunto, me avisa y la reenvío. saludos, Gonzalo
El 1 de junio de 2010 22:13, Alexandro Colorado <[email protected]>escribió: > Aqui dice que esta soportado: > http://beagle-project.org/Supported_Filetypes > > 2010/6/1 Alexandro Colorado <[email protected]> > > > se supone que beagle hacia eso... Pero no estoy seguro si habra algo mas. > > > > > > 2010/6/1 Roman Gelbort <[email protected]> > > > > ¿Alguien conoce alguna manera de hacer búsquedas de archivos basándose > >> en alguna palabra del contenido? > >> > >> Por ejemplo: quiero encontrar los archivos .odt que contengan la palabra > >> "Perez". > >> > >> Desde Ubuntu puedo buscar archivos de texto plano de esta forma, pero no > >> archivos .odt > >> > >> -- > >> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > >> Prof. Román H. Gelbort > >> Hagamos Cultura y Software Libres entre todos > >> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > >> > >> --------------------------------------------------------------------- > >> To unsubscribe, e-mail: [email protected] > >> For additional commands, e-mail: [email protected] > >> > >> > > > > > > -- > > Alexandro Colorado > > OpenOffice.org Español > > http://es.openoffice.org > > > > > > > -- > Alexandro Colorado > OpenOffice.org Español > http://es.openoffice.org >
#!/usr/bin/python # # loook.py 0.6.5 (2006-07-16) - Search in OpenOffice.org files # # Homepage: http://www.danielnaber.de/loook/ # # Copyright (C) 2003, 2006 Daniel Naber, Lutz Haseloff # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # TODO: # -use better defaults for the file/dir dialogs: # askdirectory(initialdir="..."... # -support KOffice and Abiword? # -show title (use HListbox?) import ConfigParser import codecs import os import re import string import time import zipfile import locale from Tkinter import * import tkFont import tkMessageBox import tkFileDialog class Application: CONFIGFILE = ".loook.cfg" def __init__(self, master=None): """Load configuration or use sensible default values.""" self.master = master self.stopped = 0 self.ooo_path_str = None self.search_path_str = None config_path = None if os.getenv('USERPROFILE'): config_path = os.getenv('USERPROFILE') elif os.getenv('HOME'): config_path = os.getenv('HOME') elif os.name == 'dos': config_path = "c:/" if config_path: self.configfile = os.path.join(config_path, self.CONFIGFILE) self.config = ConfigParser.ConfigParser() self.config.read(self.configfile) try: self.ooo_path_str = self.config.get("General", "ooo_path") self.search_path_str = self.config.get("General", "search_path") except ConfigParser.NoSectionError: pass else: print >> sys.stderr, "Cannot find home directory, settings will not be saved." self.configfile = None self.createWidgets() return def createWidgets(self): """Build and show the GUI elements.""" if os.name != 'nt': Label(self.master, text="Viewer:").grid(row=0, sticky=E) Label(self.master, text="Search path:").grid(row=1, sticky=E) Label(self.master, text="Search terms:").grid(row=2, sticky=E) Label(self.master, text="Mode:").grid(row=3, sticky=E) Label(self.master, text="Matches:").grid(row=4, sticky=N+E) self.ooo_path = Entry(self.master) if self.ooo_path_str: self.ooo_path.insert(END, self.ooo_path_str) else: self.ooo_path.insert(END, "soffice") self.ooo_path_b = Button(self.master) self.ooo_path_b.bind("<Button-1>", self.selectOOoPath) self.ooo_path_b["text"] = ">" self.search_path = Entry(self.master) if len(sys.argv) >= 2: self.search_path.insert(END, unicode(sys.argv[1], sys.getfilesystemencoding())) elif self.search_path_str: self.search_path.insert(END, self.search_path_str) else: self.search_path.insert(END, unicode(os.getcwd(), sys.getfilesystemencoding())) self.search_path_b = Button(self.master) self.search_path_b.bind("<Button-1>", self.selectSearchPath) self.search_path_b["text"] = ">" self.search_query = Entry(self.master) self.search_query.bind('<Return>', self.startSearch) if len(sys.argv) >= 2: lang, enc = locale.getdefaultlocale() self.search_query.insert(END, unicode(string.join(sys.argv[2:], ' '), enc)) self.search_query.focus() self.mode_button = Button(self.master) self.mode_button.bind("<Button-1>", self.popupMode) self.mode_button["text"] = "AND" self.mode_menu = Menu(self.master, tearoff=0) self.mode_menu.add_command(label="AND", command=self.setModeAND) self.mode_menu.add_command(label="OR", command=self.setModeOR) self.mode_menu.add_command(label="Phrase", command=self.setModePhrase) pad = 1 if os.name != 'nt': self.ooo_path.grid(columnspan=2, row=0, column=1, sticky=E+W, pady=pad, padx=pad) self.ooo_path_b.grid(row=0, column=3, sticky=E+W, pady=pad, padx=pad) self.search_path.grid(columnspan=2, row=1, column=1, sticky=E+W, pady=pad, padx=pad) self.search_path_b.grid(row=1, column=3, sticky=E+W, pady=pad, padx=pad) self.search_query.grid(columnspan=3, row=2, column=1, sticky=E+W, pady=pad, padx=pad) self.mode_button.grid(columnspan=3, row=3, column=1, sticky=W, pady=pad, padx=pad) self.scrollbar = Scrollbar(self.master) self.scrollbar.grid(row=4, column=3, sticky=N+S, pady=pad, padx=pad) self.listbox = Listbox(self.master, yscrollcommand=self.scrollbar.set) self.listbox.bind('<Double-Button-1>', self.showDoc) self.listbox.grid(columnspan=2, row=4, column=1, sticky=E+W+S+N, pady=pad, padx=pad) self.scrollbar.config(command=self.listbox.yview) f = Frame(self.master) self.search = Button(f) self.search["text"] = "Search" self.search["command"] = self.startSearch self.search.pack(side=LEFT) self.quit_button = Button(f) self.quit_button["text"] = "Quit" self.quit_button["command"] = self.doQuit self.quit_button.pack(side=RIGHT) self.stop_button = Button(f) self.stop_button["text"] = "Stop" self.stop_button["command"] = self.stop self.stop_button["state"] = DISABLED self.stop_button.pack(side=RIGHT) f.grid(row=5, columnspan=2, column=2, sticky=E, pady=pad, padx=pad) self.status = Label(self.master, text="", bd=1, relief=SUNKEN, anchor=W) self.status.config(text="Ready.") self.status.grid(row=6, columnspan=4, column=0, sticky=E+W, pady=pad, padx=pad) return def doQuit(self): """Save configuration, the quit.""" self.saveConfig() self.master.quit() return def saveConfig(self): """Save path settings in configuration file in the user's HOME.""" if self.configfile: file = codecs.open(self.configfile, "w", "utf-8") if not self.config.has_section("General"): self.config.add_section("General") file.write("[General]\n") file.write("ooo_path=%s\n" % self.ooo_path.get()) file.write("search_path=%s\n" % self.search_path.get()) file.close() return def selectOOoPath(self, event): d = tkFileDialog.askopenfilename() self.ooo_path.delete(0, END) self.ooo_path.insert(END, d) return def selectSearchPath(self, event): d = tkFileDialog.askdirectory() self.search_path.delete(0, END) self.search_path.insert(END, d) return def setMode(self, mode): self.mode = mode self.mode_button["text"] = mode return def setModeAND(self): self.setMode("AND") return def setModeOR(self): self.setMode("OR") return def setModePhrase(self): self.setMode("Phrase") return def popupMode(self, event): try: self.mode_menu.tk_popup(event.x_root, event.y_root, 0) finally: # make sure to release the grab (Tk 8.0a1 only) self.mode_menu.grab_release() return def stop(self): self.stopped = 1 return def showDoc(self, event): """Start OOo to view the file. This method lacks error handling (TODO).""" items = event.widget.curselection() try: items = map(string.atoi, items) except ValueError: pass filename = "%s%s" % (self.search_path.get(), event.widget.get(items[0])) filename = os.path.normpath(filename) if os.name == 'nt': try: os.startfile(filename) except: print "Warning: File could not be opened. - %s" % filename else: prg = self.ooo_path.get() if not prg and os.name != 'nt': tkMessageBox.showwarning('Error', 'Set viewer first.') else: #fixme: feedback if this fails #fixme: feedback while OOo starts? #fixme: fails if there's a quote in the prg variable (does this matter?) #fixme: does a filename with quotes work on windows? filename = filename.replace('"', '\\"') if os.name == 'dos': filename = filename.replace('/', '\\') prg = prg.replace('/', '\\') cmd = '"%s" "%s" &' % (prg, filename) if os.name == 'dos': cmd = '"%s" "%s"' % (prg, filename) self.status.config(text="Starting viewer...") print cmd try: res = os.system(cmd.encode('utf-8')) except UnicodeError: res = os.system(cmd) if res != 0: # don't show a dialog, this check might not be system-indepenent: print "Warning: Command returned code != 0: %s" % cmd return def removeXMLMarkup(self, s, replace_with_space): s = re.compile("<!--.*?-->", re.DOTALL).sub('', s) repl = '' if replace_with_space: repl = ' ' s = re.compile("<[^>]*>", re.DOTALL).sub(repl, s) return s def match(self, query, docstring): mode = self.mode_button["text"] if mode == "Phrase": # match only documents that contain the phrase: regex = re.compile(re.escape(query.lower()), re.DOTALL) if regex.search(docstring): return 1 else: parts = re.split("\s+", query.strip()) if mode == "AND": # match only documents that contain all words: match = 1 for part in parts: regex = re.compile(re.escape(part.lower()), re.DOTALL) if not regex.search(docstring): match = 0 break return match elif mode == "OR": # match documents that contain at leats one word: match = 0 for part in parts: regex = re.compile(re.escape(part.lower()), re.DOTALL) if regex.search(docstring): match = 1 break return match else: print "Error: unknown search mode '%s'" % mode return 0 def processFile(self, filename, query): suffix = self.getSuffix(filename) try: # Handle OpenOffice.org files: if suffix in ('sxw', 'stw', # OOo 1.x swriter 'sxc', 'stc', # OOo 1.x scalc 'sxi', 'sti' # OOo 1.x simpress 'sxg', # OOo 1.x master document 'sxm', # OOo 1.x formula 'sxd', 'std', # OOo 1.x sdraw 'odt', 'ott', # OOo 2.x swriter 'odp', 'otp', # OOo 2.x simpress 'odf', # OOo 2.x formula 'odg', 'otg', # OOo 2.x sdraw 'ods', 'ots' # OOo 2.x scalc ): zip = zipfile.ZipFile(filename, "r") content = "" docinfo = "" try: # TODO: are all OOo files utf-8? content = unicode(zip.read("content.xml"), 'utf-8') # TODO: is replace_with_space=0 correct? content = self.removeXMLMarkup(content, replace_with_space=0) docinfo = unicode(zip.read("meta.xml"), 'utf-8') docinfo = self.removeXMLMarkup(docinfo, replace_with_space=0) self.ooo_count = self.ooo_count + 1 except KeyError, err: print "Warning: %s not found in '%s'" % (err, filename) return None title = "" title_match = re.compile("<dc:title>(.*?)</dc:title>", re.DOTALL|re.IGNORECASE).search(docinfo) if title_match: title = title_match.group(1) if self.match(query, "%s %s" % (content.lower(), docinfo.lower())): return (filename, title) # Handle KWord files: # TODO: this will need its own viewer #elif suffix == 'kwd': # zip = zipfile.ZipFile(filename, "r") # content = unicode(zip.read("maindoc.xml"), 'utf-8') # content = self.removeXMLMarkup(content, replace_with_space=0) # docinfo = unicode(zip.read("documentinfo.xml"), 'utf-8') # docinfo = self.removeXMLMarkup(docinfo, replace_with_space=0) # title = "" # title_match = re.compile("<about.*?<title>(.*?)</title>", re.DOTALL|re.IGNORECASE).search(docinfo) # if title_match: # title = title_match.group(1) # if regex.search(content.lower()) or regex.search(docinfo.lower()): # return (filename, title) except zipfile.BadZipfile, err: print "Warning: Supposed ZIP file '%s' could not be opened: %s" % (filename, err) #if suffix == 'kwd': # print "Note that the old KOffice (< 1.2) file format is not supported." except IOError, err: print "Warning: File '%s' could not be opened: %s" % (filename, err) return None def startSearch(self, event=None): self.stopped = 0 self.last_update = 0 self.match_count = 0 self.ooo_count = 0 self.listbox.delete(0, END) self.stop_button["state"] = NORMAL self.quit_button["state"] = DISABLED if not os.path.exists(self.search_path.get()): tkMessageBox.showwarning('Error', 'Path "%s" doesn\'t exist' % self.search_path.get()) else: start_time = time.time() self.recursiveSearch(self.search_path.get()) duration = time.time() - start_time #print "time=%.2f" % duration count_str = "in %d files" % self.ooo_count if self.stopped: self.status.config(text="%d matches so far %s (search stopped)" % (self.match_count, count_str)) self.stopped = 0 else: self.status.config(text="%d matches %s" % (self.match_count, count_str)) self.stop_button["state"] = DISABLED self.quit_button["state"] = NORMAL return def getSuffix(self, filename): suffix_match = re.compile(".*\.(.*)").match(filename) if suffix_match: suffix = suffix_match.group(1).lower() else: suffix = None return suffix def recursiveSearch(self, directory): len_limit = 15 # avoid resizing window dir_part = os.path.split(directory)[1] if len(dir_part) > len_limit: dir_part = "%s..." % dir_part[0:len_limit] #print "'%s'" % dir_part self.status.config(text="Searching in %s" % dir_part) try: dir_content = os.listdir(directory) dir_content.sort(lambda x, y: cmp(string.lower(x), string.lower(y))) except OSError, err: print "Warning: %s: %s" % (directory, err) return except UnicodeError, err: print "Warning: Unicode problem with directory name..." return for filename in dir_content: if self.stopped: return filename = os.path.join(directory, filename) if os.path.isfile(filename): match = self.processFile(filename, self.search_query.get()) update_interval = 0.05 time_diff = time.time() - self.last_update if time_diff > update_interval: self.master.update() self.last_update = time.time() if match: title = match[1] if not title: title = "Untitled" # TODO: reactive title: #self.listbox.insert('end', "%s - %s" % (filename, title)) display_filename = filename.replace(self.search_path.get(), '') self.listbox.insert('end', "%s" % display_filename) self.match_count = self.match_count + 1 elif os.path.isdir(filename) and not os.path.islink(filename): self.recursiveSearch(filename) return if __name__ == "__main__": if len(sys.argv) >= 2 and (sys.argv[1] == '--help' or sys.argv[1] == '-h'): print "Usage: loook.py [-h|--help] [directory] [search term]..." sys.exit(1) root = Tk() root.minsize(380, 200) version = "0.6.5" root.title("loook.py %s - OpenOffice.org File Finder" % version) root.columnconfigure(1, weight=1) root.rowconfigure(4, weight=1) root.columnconfigure(1, weight=1) root.rowconfigure(5, weight=0) app = Application(root) root.protocol("WM_DELETE_WINDOW", app.doQuit) root.mainloop()
--------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
