Paul wrote:

> Suppose I have some text that has a lot of quoted speech in it, but it's
> supplied using standard (") straight single and double quotes.
> 
> Is there some pre-processing tool that will try to convert them to
> proper curly quotes suitable for LaTeX (``) and ('')?
> 
> I know it can't be done perfectly and will need manual tweaking, but
> there must be something to do most of the work.
> 
> All I can think of is to use some word processor that has a "clever
> quotes" function (e.g. MS Word) and then use something like wvLatex to
> export that to LaTeX format. Or do a search and replace.
> 
> Is there a command-line tool that does this using some heuristics to
> cover most areas that could be problematic?

sed, perl, python or any other scripting language would do it for you.
Here's something in python:

$ cat trial.txt
The newspaper reported "he said 'The quick brown
fox jumped over
the lazy dog' 500 times in a row and then dropped down dead".

$ python quotes.py trial.txt
The newspaper reported ``he said `The quick brown
fox jumped over
the lazy dog' 500 times in a row and then dropped down dead''.

Regards,
Angus

#! /usr/bin/env python

import sys

def usage(prog_name):
    return "Usage: %s 'input text file'\n" % prog_name


def warning(message):
    sys.stderr.write(message + '\n')


def error(message):
    sys.stderr.write(message + '\n')
    sys.exit(1)


def manipulate(filename):
    doubleq = '"'
    singleq = "'"
    inside_double = 0
    inside_single = 0

    double_latex_lq = '``'
    double_latex_rq = "''"
    single_latex_lq = '`'
    single_latex_rq = "'"

    try:
        output = []
        for line in open(filename, 'r').readlines():
            for c in line:
                if c == doubleq:
                    if not inside_double:
                        output.append(double_latex_lq)
                        inside_double = 1
                    else:
                        output.append(double_latex_rq)
                        inside_double = 0
                elif c == singleq:
                    if not inside_single:
                        output.append(single_latex_lq)
                        inside_single = 1
                    else:
                        output.append(single_latex_rq)
                        inside_single = 0
                else:
                    output.append(c)

        return ''.join(output)

    except:
        warning('Unable to read %s' % filename)
        return None


def main(argv):
    if len(argv) != 2:
        error(usage(argv[0]))

    input_file = argv[1]
    manipulated_text = manipulate(input_file)

    if manipulated_text != None:
        print manipulated_text,


if __name__ == "__main__":
    main(sys.argv)


Reply via email to