Re: [patch] Get rid of InsetLaTeXAccent - finally

Georg Baum Sun, 28 Jan 2007 13:35:14 -0800

Am Mittwoch, 24. Januar 2007 10:16 schrieb José Matos:
> On Wednesday 24 January 2007 8:45:01 am Georg Baum wrote:
> >
> > I won't have time to finish this before the weekend unfortunately.
> 
>   No problem.
> 
> > What do you say about the font change of spaces? This can lead to 
different
> > output, so it would be a file format change, but I am not sure whether 
we
> > can convert that in lyx2lyx easily.
> 
>   We should try to fix it, this is a file format.
> 
>   I will try to look into possible solutions.


Can you please do that? I have resolved the speed problems and committed 
the part that does not touch the font stuff and InsetLatexAccent. It is 
useful as is, but will not catch all accents on spaces.
The attached patch is the remaining part, it can go in as soon as you have 
space conversion in lyx2lyx ready.


Georg

Index: src/insets/insetlatexaccent.C
===================================================================
--- src/insets/insetlatexaccent.C	(Revision 16920)
+++ src/insets/insetlatexaccent.C	(Arbeitskopie)
@@ -1,626 +0,0 @@
-/**
- * \file insetlatexaccent.C
- * This file is part of LyX, the document processor.
- * Licence details can be found in the file COPYING.
- *
- * \author Lars Gullik Bjønnes
- *
- * Full author contact details are available in file CREDITS.
- */
-
-#include <config.h>
-
-#include "insetlatexaccent.h"
-
-#include "debug.h"
-#include "language.h"
-#include "LColor.h"
-#include "lyxlex.h"
-#include "lyxrc.h"
-#include "metricsinfo.h"
-
-#include "frontends/FontMetrics.h"
-#include "frontends/Painter.h"
-
-#include "support/lstrings.h"
-
-
-namespace lyx {
-
-using support::contains;
-using support::trim;
-
-using std::endl;
-using std::string;
-using std::auto_ptr;
-using std::ostream;
-
-
-/* LatexAccent. Proper handling of accented characters */
-/* This part is done by Ivan Schreter, [EMAIL PROTECTED] */
-/* Later modified by Lars G. Bjønnes, [EMAIL PROTECTED] */
-
-InsetLatexAccent::InsetLatexAccent()
-	: candisp(false)
-{}
-
-
-InsetLatexAccent::InsetLatexAccent(string const & str)
-	: contents(str)
-{
-	checkContents();
-}
-
-
-auto_ptr<InsetBase> InsetLatexAccent::doClone() const
-{
-	return auto_ptr<InsetBase>(new InsetLatexAccent(contents));
-}
-
-
-void InsetLatexAccent::checkContents()
-	// check, if we know the modifier and can display it ok on screen
-{
-	candisp = false;
-
-	if (contents.empty() || contents.length() < 2) {
-		lyxerr[Debug::KEY] << "Cannot decode: " << contents << endl;
-		return;
-	}
-
-	contents = trim(contents);
-	if (contents[0] != '\\') { // demand that first char is a '\\'
-		lyxerr[Debug::KEY] << "Cannot decode: " << contents << endl;
-		return;
-	}
-
-	lyxerr[Debug::KEY] << "Decode: " << contents << endl;
-
-	remdot = false;
-	plusasc = false;
-	plusdesc = false;
-
-	switch (contents[1]) { // second char should be one of these
-	case '\'':  // acute
-		modtype = ACUTE;    // acute
-		plusasc = true;    // at the top of character
-		break;
-	case '`':   // grave
-		modtype = GRAVE;    // grave
-		plusasc = true;    // at the top
-		break;
-	case '=':   // macron
-		modtype = MACRON;    // macron
-		plusasc = true;    // at the top
-		break;
-	case '~':   // tilde
-		modtype = TILDE;    // tilde
-		plusasc = true;    // at the top
-		break;
-	case 'b':   // underbar
-		modtype = UNDERBAR;    // underbar
-		plusdesc = true;   // at the bottom
-		break;
-	case 'c':   // cedilla
-		modtype = CEDILLA;    // cedilla
-		plusdesc = true;   // at the bottom
-		break;
-	case 'd':   // underdot
-		modtype = UNDERDOT;    // underdot
-		plusdesc = true;   // at the bottom
-		break;
-	case 'r':   // circle
-		modtype = CIRCLE;    // circle
-		plusasc = true;    // at the top
-		break;
-	case 't':   // tie
-		modtype = TIE;    // tie
-		plusasc = true;    // at the top
-		break;
-	case 'u':   // breve
-		modtype = BREVE;    // breve
-		plusasc = true;    // at the top
-		break;
-	case 'v':   // caron
-		modtype = CARON;   // caron
-		plusasc = true;    // at the top
-		break;
-	case 'q':   // special caron
-		modtype = SPECIAL_CARON;   // special caron
-		plusasc = true;    // at the top
-		break;
-	case 'H':   // hungarian umlaut
-		modtype = HUNGARIAN_UMLAUT;   // hungarian umlaut
-		plusasc = true;    // at the top
-		break;
-	case '"':   // umlaut
-		modtype = UMLAUT;   // umlaut
-		plusasc = true;    // at the top
-		break;
-	case '.':   // dot
-		modtype = DOT;   // dot
-		plusasc = true;    // at the top
-		break;
-	case '^':   // circumflex
-		modtype = CIRCUMFLEX;   // circumflex
-		plusasc = true;    // at the top
-		break;
-	case 'k':   // ogonek
-		modtype = OGONEK;  // ogonek
-		plusdesc = true;
-		break;
-	case 'i': // dot-less-i
-		modtype = DOT_LESS_I;  // dot-less-i
-		plusasc = true; // at the top (not really needed)
-		remdot = true;
-		break;
-	case 'j': // dot-less-j
-		modtype = DOT_LESS_J; // dot-less-j
-		plusasc = true; // at the top (not really needed)
-		remdot = true;
-		break;
-	case 'l': // lslash
-		modtype = lSLASH;
-		plusasc = true; // at the top (not really needed)
-		break;
-	case 'L': // lslash
-		modtype = LSLASH;
-		plusasc = true; // at the top (not really needed)
-		break;
-	default:
-		lyxerr[Debug::KEY] << "Default" << endl;
-		// unknown accent (or something else)
-		return;
-	}
-
-	// we demand that third char is a '{' (Lgb)
-	if (contents[2] != '{') return;
-
-	// special clause for \i{}, \j{} \l{} and \L{}
-	if ((modtype == DOT_LESS_I || modtype == DOT_LESS_J
-	     || modtype == lSLASH || modtype == LSLASH)
-	    && contents[3] == '}') {
-		switch (modtype) {
-		case DOT_LESS_I: ic = 'i'; break;
-		case DOT_LESS_J: ic = 'j'; break;
-		case lSLASH:     ic = 'l'; break;
-		case LSLASH:     ic = 'L'; break;
-		default:
-			// if this happens something is really wrong
-			lyxerr << "InsetLaTexAccent: weird error." << endl;
-			break;
-		}
-		//ic = (modtype == DOT_LESS_J ? 'j' : 'i');
-		lyxerr[Debug::KEY] << "Contents: [" << contents << ']'
-				   << ", ic: " << ic
-				   << ", top: " << plusasc
-				   << ", bot: " << plusdesc
-				   << ", dot: " << remdot
-				   << ", mod: " << modtype << endl;
-		// Special case for space
-	} else if (contents[3] == '}') {
-		ic = ' ';
-	} else {
-		int i = 3;
-
-		// now get the char
-		ic = contents[3]; // i will always be 3 here
-
-		// ic should now be a alfa-char or '\\'
-		if (ic == '\\') {
-			ic = contents[++i]; // will only allow \<foo>{\i} and \<foo>{\j}
-			if (ic == 'i' || ic == 'j')
-				remdot = true;
-			else
-				return;
-		} else if ((ic == 'i'|| ic == 'j') && contents[4] == '}') {
-			// Do a rewrite: \<foo>{i} --> \<foo>{\i}
-			string temp = contents;
-			temp.erase(3, string::npos);
-			temp += '\\';
-			temp += char(ic);
-			for (string::size_type j = 4;
-			    j < contents.length(); ++j)
-				temp+= contents[j];
-			contents= temp;
-			++i;
-			remdot = true;
-		}
-
-		// demand a '}' at the end
-		if (contents[++i] != '}' && contents[++i]) return;
-
-		// fine, the char is properly decoded now (hopefully)
-		lyxerr[Debug::KEY] << "Contents: [" << contents << ']'
-				   << ", ic: " << ic
-				   << ", top: " << plusasc
-				   << ", bot: " << plusdesc
-				   << ", dot: " << remdot
-				   << ", mod: " << modtype << endl;
-	}
-	candisp = true;
-}
-
-
-bool InsetLatexAccent::metrics(MetricsInfo & mi, Dimension & dim) const
-{
-	LyXFont & font = mi.base.font;
-	frontend::FontMetrics const & fm =	theFontMetrics(font);
-
-	// This function is a bit too simplistic and is just a
-	// "try to make a fit for all accents" approach, to
-	// make it better we need to know what kind of accent is
-	// used and add to max based on that.
-	if (candisp) {
-		if (ic == ' ')
-			dim.asc = fm.ascent('a');
-		else
-			dim.asc = fm.ascent(ic);
-		if (plusasc)
-			dim.asc += (fm.maxAscent() + 3) / 3;
-
-		if (ic == ' ')
-			dim.des = fm.descent('a');
-		else
-			dim.des = fm.descent(ic);
-		if (plusdesc)
-			dim.des += 3;
-
-		dim.wid = fm.width(ic);
-	} else {
-		dim.asc = fm.maxAscent() + 4;
-		dim.des = fm.maxDescent() + 4;
-		docstring dcon(contents.begin(), contents.end());
-		dim.wid = fm.width(dcon) + 4;
-	}
-	bool const changed = dim_ != dim;
-	dim_ = dim;
-	return changed;
-}
-
-
-bool InsetLatexAccent::displayISO8859_9(PainterInfo & pi, int x, int y) const
-{
-	unsigned char tmpic = ic;
-
-	switch (modtype) {
-
-	case CEDILLA: {
-		if (ic == 'c') tmpic = '\xe7';
-		if (ic == 'C') tmpic = '\xc7';
-		if (ic == 's') tmpic = '\xfe';
-		if (ic == 'S') tmpic = '\xde';
-		break;
-	}
-
-	case BREVE: {
-		if (ic == 'g') tmpic = '\xf0';
-		if (ic == 'G') tmpic = '\xd0';
-		break;
-	}
-
-	case UMLAUT: {
-		if (ic == 'o') tmpic = '\xf6';
-		if (ic == 'O') tmpic = '\xd6';
-		if (ic == 'u') tmpic = '\xfc';
-		if (ic == 'U') tmpic = '\xdc';
-		break;
-	}
-
-	case DOT:
-		if (ic == 'I') tmpic = '\xdd';
-		break;
-
-	case DOT_LESS_I:
-		tmpic = '\xfd';
-		break;
-
-	default:
-		return false;
-	}
-
-	if (tmpic == ic)
-		return false;
-
-	pi.pain.text(x, y, char(tmpic), pi.base.font);
-	return true;
-}
-
-
-void InsetLatexAccent::drawAccent(PainterInfo const & pi, int x, int y,
-	char_type accent) const
-{
-	LyXFont const & font = pi.base.font;
-	frontend::FontMetrics const & fm =	theFontMetrics(font);
-
-	x -= fm.center(accent);
-	y -= fm.ascent(ic);
-	y -= fm.descent(accent);
-	y -= fm.height(accent) / 2;
-	pi.pain.text(x, y, accent, font);
-}
-
-
-void InsetLatexAccent::draw(PainterInfo & pi, int x, int baseline) const
-{
-	if (lyxrc.font_norm_type == LyXRC::ISO_8859_9)
-		if (displayISO8859_9(pi, x, baseline))
-			return;
-
-	// All the manually drawn accents in this function could use an
-	// overhaul. Different ways of drawing (what metrics to use)
-	// should also be considered.
-
-	LyXFont font = pi.base.font;
-	if (lyxrc.font_norm_type == LyXRC::ISO_10646_1)
-		font.setLanguage(english_language);
-
-	frontend::FontMetrics const & fm =	theFontMetrics(font);
-
-	if (candisp) {
-		int x2 = int(x + (fm.rbearing(ic) - fm.lbearing(ic)) / 2);
-		int hg;
-		int y;
-		if (plusasc) {
-			// mark at the top
-			hg = fm.maxDescent();
-			y = baseline - dim_.asc;
-			if (font.shape() == LyXFont::ITALIC_SHAPE)
-				x2 += int(0.8 * hg); // italic
-		} else {
-			// at the bottom
-			hg = dim_.des;
-			y = baseline;
-		}
-
-		double hg35 = hg * 0.6;
-
-		// display with proper accent mark
-		// first the letter
-		pi.pain.text(x, baseline, ic, font);
-
-		if (remdot) {
-			int tmpvar = baseline - fm.ascent('i');
-			int tmpx = 0;
-			if (font.shape() == LyXFont::ITALIC_SHAPE)
-				tmpx += int(0.8 * hg); // italic
-			lyxerr[Debug::KEY] << "Removing dot." << endl;
-			// remove the dot first
-			pi.pain.fillRectangle(x + tmpx, tmpvar, dim_.wid,
-					   fm.ascent('i') -
-					   fm.ascent('x') - 1,
-					   backgroundColor());
-			// the five lines below is a simple hack to
-			// make the display of accent 'i' and 'j'
-			// better. It makes the accent be written
-			// closer to the top of the dot-less 'i' or 'j'.
-			char tmpic = ic; // store the ic when we
-			ic = 'x';        // calculates the ascent of
-#ifdef WITH_WARNINGS
-#warning metrics?
-#endif
-			int asc = ascent(); // the dot-less version (here: 'x')
-			ic = tmpic;      // set the orig ic back
-			y = baseline - asc; // update to new y coord.
-		}
-
-		// now the rest - draw within (x, y, x + wid, y + hg)
-		switch (modtype) {
-		case ACUTE:
-			//drawAccent(pi, x2, baseline, '\xB4');
-			drawAccent(pi, x2, baseline, 0xB4);
-			break;
-
-		case GRAVE:
-			//drawAccent(pi, x2, baseline, '\x60');
-			drawAccent(pi, x2, baseline, 0x60);
-			break;
-
-		case MACRON:
-			//drawAccent(pi, x2, baseline, '\xAF');
-			drawAccent(pi, x2, baseline, 0xAF);
-			break;
-
-		case TILDE:
-			drawAccent(pi, x2, baseline, '~');
-			break;
-
-		case UNDERBAR: {
-			char_type const underbar = 0x5F; //('\x5F');
-			pi.pain.text(x2 - fm.center(underbar),
-				     baseline, underbar, font);
-			break;
-		}
-
-		case CEDILLA: {
-			char_type const cedilla = 0xB8; //('\xB8');
-			pi.pain.text(x2  - fm.center(cedilla),
-				     baseline, cedilla, font);
-			break;
-		}
-
-		case UNDERDOT:
-			pi.pain.text(x2  - fm.center('.'),
-				  int(baseline + 1.5 * fm.height('.')),
-				  '.', font);
-			break;
-
-		case DOT:
-			drawAccent(pi, x2, baseline, '.');
-			break;
-
-		case CIRCLE:
-			//drawAccent(pi, x2, baseline, '\xB0');
-			drawAccent(pi, x2, baseline, 0xB0);
-			break;
-
-		case TIE:
-			pi.pain.arc(int(x2 + hg35), y + hg / 2, 2 * hg, hg, 0, 360 * 32,
-				    LColor::foreground);
-			break;
-
-		case BREVE:
-			pi.pain.arc(int(x2 - hg / 2), y, hg, hg, 0, -360*32,
-				    LColor::foreground);
-			break;
-
-		case CARON: {
-			int xp[3], yp[3];
-			xp[0] = int(x2 - hg35);    yp[0] = int(y + hg35);
-			xp[1] = int(x2);           yp[1] = int(y + hg);
-			xp[2] = int(x2 + hg35);    yp[2] = int(y + hg35);
-			pi.pain.lines(xp, yp, 3, LColor::foreground);
-			break;
-		}
-
-		case SPECIAL_CARON: {
-			switch (ic) {
-				case 'L': dim_.wid = int(4.0 * dim_.wid / 5.0); break;
-				case 't': y -= int(hg35 / 2.0); break;
-			}
-			int xp[3], yp[3];
-			xp[0] = int(x + dim_.wid);
-			yp[0] = int(y + hg35 + hg);
-
-			xp[1] = int(x + dim_.wid + (hg35 / 2.0));
-			yp[1] = int(y + hg + (hg35 / 2.0));
-
-			xp[2] = int(x + dim_.wid + (hg35 / 2.0));
-			yp[2] = y + int(hg);
-
-			pi.pain.lines(xp, yp, 3, LColor::foreground);
-			break;
-		}
-
-		case HUNGARIAN_UMLAUT:
-			drawAccent(pi, x2, baseline, 0x02DD);
-			break;
-
-		case UMLAUT:
-			drawAccent(pi, x2, baseline, '"');
-			break;
-
-		case CIRCUMFLEX:
-			drawAccent(pi, x2, baseline, '\x5E');
-			break;
-
-		case OGONEK: {
-			// this does probably not look like an ogonek, so
-			// it should certainly be refined
-			int xp[4], yp[4];
-
-			xp[0] = x2;
-			yp[0] = y;
-
-			xp[1] = x2;
-			yp[1] = y + int(hg35);
-
-			xp[2] = int(x2 - hg35);
-			yp[2] = y + hg / 2;
-
-			xp[3] = x2 + hg / 4;
-			yp[3] = y + int(hg);
-
-			pi.pain.lines(xp, yp, 4, LColor::foreground);
-			break;
-		}
-
-		case lSLASH:
-		case LSLASH: {
-			int xp[2], yp[2];
-
-			xp[0] = x;
-			yp[0] = y + int(3 * hg);
-
-			xp[1] = int(x + dim_.wid * 0.75);
-			yp[1] = y + int(hg);
-
-			pi.pain.lines(xp, yp, 2, LColor::foreground);
-			break;
-		}
-
-		case DOT_LESS_I: // dotless-i
-		case DOT_LESS_J: // dotless-j
-			// nothing to do for these
-			break;
-		}
-
-	} else {
-		pi.pain.fillRectangle(x + 1,
-				      baseline - dim_.asc + 1, dim_.wid - 2,
-				      dim_.asc + dim_.des - 2,
-				      backgroundColor());
-		pi.pain.rectangle(x + 1, baseline - dim_.asc + 1,
-				  dim_.wid - 2, dim_.asc + dim_.des - 2,
-				  LColor::foreground);
-		docstring dcon(contents.begin(), contents.end());
-		pi.pain.text(x + 2, baseline, dcon, font);
-	}
-}
-
-
-void InsetLatexAccent::write(Buffer const &, ostream & os) const
-{
-	os << "\\i " << contents << "\n";
-}
-
-
-void InsetLatexAccent::read(Buffer const &, LyXLex & lex)
-{
-	lex.eatLine();
-	contents = lex.getString();
-	checkContents();
-}
-
-
-int InsetLatexAccent::latex(Buffer const &, odocstream & os,
-			    OutputParams const &) const
-{
-	os << from_ascii(contents);
-	return 0;
-}
-
-
-int InsetLatexAccent::plaintext(Buffer const &, odocstream & os,
-			    OutputParams const &) const
-{
-	os << from_ascii(contents);
-	return 0;
-}
-
-
-int InsetLatexAccent::docbook(Buffer const &, odocstream & os,
-			      OutputParams const &) const
-{
-        // FIXME UNICODE
-        os << from_ascii(contents);
-	return 0;
-}
-
-
-int InsetLatexAccent::textString(Buffer const & buf, odocstream & os,
-		       OutputParams const & op) const
-{
-	return plaintext(buf, os, op);
-}
-
-
-bool InsetLatexAccent::directWrite() const
-{
-	return true;
-}
-
-
-InsetBase::Code InsetLatexAccent::lyxCode() const
-{
-	return InsetBase::ACCENT_CODE;
-}
-
-
-ostream & operator<<(ostream & o, InsetLatexAccent::ACCENT_TYPES at)
-{
-	return o << int(at);
-}
-
-
-} // namespace lyx
Index: src/insets/insetlatexaccent.h
===================================================================
--- src/insets/insetlatexaccent.h	(Revision 16920)
+++ src/insets/insetlatexaccent.h	(Arbeitskopie)
@@ -1,152 +0,0 @@
-// -*- C++ -*-
-/**
- * \file insetlatexaccent.h
- * This file is part of LyX, the document processor.
- * Licence details can be found in the file COPYING.
- *
- * \author Lars Gullik Bjønnes
- *
- * Full author contact details are available in file CREDITS.
- */
-
-#ifndef INSET_LATEX_ACCENT_H
-#define INSET_LATEX_ACCENT_H
-
-#include "inset.h"
-#include "support/types.h"
-
-
-namespace lyx {
-
-class Dimension;
-
-
-/** Insertion of accents
-
-  Proper handling of accented characters.
-  This is class is supposed to handle all LaTeX accents, it
-  is also possible that the class will change a bit so that
-  it also can handle other special characters (e.g. Hstroke)
-  Initiated by Ivan Schreter, later modified by Lgb.
-  */
-class InsetLatexAccent : public InsetOld {
-public:
-	///
-	InsetLatexAccent();
-	///
-	explicit InsetLatexAccent(std::string const & str);
-	///
-	bool metrics(MetricsInfo &, Dimension &) const;
-	///
-	void draw(PainterInfo & pi, int x, int y) const;
-	///
-	bool displayISO8859_9(PainterInfo & pi, int x, int y) const;
-	///
-	void write(Buffer const &, std::ostream &) const;
-	///
-	void read(Buffer const &, LyXLex & lex);
-	///
-	int latex(Buffer const &, odocstream &,
-		  OutputParams const &) const;
-	///
-	int plaintext(Buffer const &, odocstream &,
-		  OutputParams const &) const;
-	///
-	int docbook(Buffer const &, odocstream &,
-		    OutputParams const &) const;
-	/// the string that is passed to the TOC
-	virtual int textString(Buffer const &, odocstream &,
-		OutputParams const &) const;
-	///
-	bool directWrite() const;
-	///
-	InsetBase::Code lyxCode()const;
-	///
-	inline bool canDisplay();
-	// should this inset be handled like a normal charater
-	bool isChar() const { return true; }
-
-	/// is this equivalent to a letter?
-	virtual bool isLetter() const { return candisp; }
-
-	/// all the accent types
-	enum ACCENT_TYPES{
-		///
-		ACUTE, // 0
-		///
-		GRAVE,
-		///
-		MACRON,
-		///
-		TILDE,
-		///
-		UNDERBAR,
-		///
-		CEDILLA, // 5
-		///
-		UNDERDOT,
-		///
-		CIRCLE,
-		///
-		TIE,
-		///
-		BREVE,
-		///
-		CARON, // 10
-		///
-		SPECIAL_CARON,
-		///
-		HUNGARIAN_UMLAUT,
-		///
-		UMLAUT,
-		///
-		DOT,
-		///
-		CIRCUMFLEX, // 15
-		///
-		OGONEK,
-		///
-		DOT_LESS_I,
-		///
-		DOT_LESS_J, // 18
-		///
-		lSLASH,
-		///
-		LSLASH
-	};
-private:
-	friend std::ostream & operator<<(std::ostream &, ACCENT_TYPES);
-
-	virtual std::auto_ptr<InsetBase> doClone() const;
-
-	/// Check if we know the modifier and can display it ok on screen.
-	void checkContents();
-	///
-	void drawAccent(PainterInfo const & pi, int x, int y, char_type accent) const;
-	///
-	std::string contents;
-	/// can display as proper char
-	bool  candisp;
-	/// modifier type
-	ACCENT_TYPES  modtype;
-
-	/// remove dot from 'i' and 'j' or transform l, L into lslash, LSLaSH
-	bool  remdot;
-	/// add something to ascent - accent at the top
-	bool  plusasc;
-	/// add something to descent - underlined char
-	bool  plusdesc;
-	/// international char
-	mutable char  ic;
-};
-
-
-bool InsetLatexAccent::canDisplay()
-{
-	return candisp;
-}
-
-
-} // namespace lyx
-
-#endif
Index: src/insets/Makefile.am
===================================================================
--- src/insets/Makefile.am	(Revision 16920)
+++ src/insets/Makefile.am	(Arbeitskopie)
@@ -77,8 +77,6 @@ libinsets_la_SOURCES = \
 	insetindex.h \
 	insetlabel.C \
 	insetlabel.h \
-	insetlatexaccent.C \
-	insetlatexaccent.h \
 	insetline.C \
 	insetline.h \
 	insetmarginal.h \
Index: src/buffer.C
===================================================================
--- src/buffer.C	(Revision 16920)
+++ src/buffer.C	(Arbeitskopie)
@@ -141,7 +141,7 @@ using std::string;
 
 namespace {
 
-int const LYX_FORMAT = 256;
+int const LYX_FORMAT = 257;
 
 } // namespace anon
 
Index: src/trans_mgr.C
===================================================================
--- src/trans_mgr.C	(Revision 16920)
+++ src/trans_mgr.C	(Arbeitskopie)
@@ -22,8 +22,6 @@
 #include "lyxtext.h"
 #include "trans.h"
 
-#include "insets/insetlatexaccent.h"
-
 #include "support/lstrings.h"
 
 
@@ -287,14 +285,7 @@ void TransManager::insert(string const &
 	if (chset_.getName() != lyxrc.font_norm ||
 	    !enc.first) {
 		// Could not find an encoding
-		InsetLatexAccent ins(str);
-		if (ins.canDisplay()) {
-			cap::replaceSelection(cur);
-			cur.insert(new InsetLatexAccent(ins));
-			cur.posRight();
-		} else {
-			insertVerbatim(str, text, cur);
-		}
+		insertVerbatim(str, text, cur);
 		return;
 	}
 	string const tmp(1, static_cast<char>(enc.second));
Index: src/paragraph.C
===================================================================
--- src/paragraph.C	(Revision 16920)
+++ src/paragraph.C	(Arbeitskopie)
@@ -1041,16 +1041,6 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 
 		LyXFont const last_font = running_font;
 
-		// Spaces at end of font change are simulated to be
-		// outside font change, i.e. we write "\textXX{text} "
-		// rather than "\textXX{text }". (Asger)
-		if (open_font && c == ' ' && i <= size() - 2) {
-			LyXFont const & next_font = getFont(bparams, i + 1, outerfont);
-			if (next_font != running_font && next_font != font) {
-				font = next_font;
-			}
-		}
-
 		// We end font definition before blanks
 		if (open_font &&
 		    (font != running_font ||
@@ -1064,7 +1054,17 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 			open_font = false;
 		}
 
-		// Blanks are printed before start of fontswitch
+		// Do we need to change font?
+		if ((font != running_font ||
+		     font.language() != running_font.language()) &&
+			i != body_pos - 1)
+		{
+			column += font.latexWriteStartChanges(
+					os, basefont, last_font, bparams);
+			running_font = font;
+			open_font = true;
+		}
+
 		if (c == ' ') {
 			// Do not print the separation of the optional argument
 			if (i != body_pos - 1) {
@@ -1083,17 +1083,6 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 			}
 		}
 
-		// Do we need to change font?
-		if ((font != running_font ||
-		     font.language() != running_font.language()) &&
-			i != body_pos - 1)
-		{
-			column += font.latexWriteStartChanges(
-					os, basefont, last_font, bparams);
-			running_font = font;
-			open_font = true;
-		}
-
 		Change::Type changeType = pimpl_->lookupChange(i).type;
 
 		column += Changes::latexMarkChange(os, runningChangeType,
Index: src/text.C
===================================================================
--- src/text.C	(Revision 16920)
+++ src/text.C	(Arbeitskopie)
@@ -58,7 +58,6 @@
 #include "insets/insettext.h"
 #include "insets/insetbibitem.h"
 #include "insets/insethfill.h"
-#include "insets/insetlatexaccent.h"
 #include "insets/insetline.h"
 #include "insets/insetnewline.h"
 #include "insets/insetpagebreak.h"
@@ -240,10 +239,6 @@ void readParToken(Buffer const & buf, Pa
 			par.insertInset(par.size(), inset.release(),
 					font, change);
 		}
-	} else if (token == "\\i") {
-		auto_ptr<InsetBase> inset(new InsetLatexAccent);
-		inset->read(buf, lex);
-		par.insertInset(par.size(), inset.release(), font, change);
 	} else if (token == "\\backslash") {
 		par.insertChar(par.size(), '\\', font, change);
 	} else if (token == "\\newline") {
Index: lib/lyx2lyx/LyX.py
===================================================================
--- lib/lyx2lyx/LyX.py	(Revision 16920)
+++ lib/lyx2lyx/LyX.py	(Arbeitskopie)
@@ -73,7 +73,7 @@ format_relation = [("0_06",    [200], ge
                    ("1_2",     [220], generate_minor_versions("1.2" , 4)),
                    ("1_3",     [221], generate_minor_versions("1.3" , 7)),
                    ("1_4", range(222,246), generate_minor_versions("1.4" , 3)),
-                   ("1_5", range(246,257), generate_minor_versions("1.5" , 0))]
+                   ("1_5", range(246,258), generate_minor_versions("1.5" , 0))]
 
 
 def formats_list():
Index: lib/lyx2lyx/lyx_1_5.py
===================================================================
--- lib/lyx2lyx/lyx_1_5.py	(Revision 16920)
+++ lib/lyx2lyx/lyx_1_5.py	(Arbeitskopie)
@@ -20,7 +20,9 @@
 """ Convert files to the file format generated by lyx 1.5"""
 
 import re
-from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value
+import unicodedata
+
+from parser_tools import find_re, find_token, find_token_exact, find_tokens, find_end_of, get_value
 from LyX import get_encoding
 
 
@@ -720,6 +722,251 @@ def revert_encodings(document):
     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 
 
+# Accents of InsetLaTeXAccent
+accent_map = {
+    "`" : u'\u0300', # grave
+    "'" : u'\u0301', # acute
+    "^" : u'\u0302', # circumflex
+    "~" : u'\u0303', # tilde
+    "=" : u'\u0304', # macron
+    "u" : u'\u0306', # breve
+    "." : u'\u0307', # dot above
+    "\"": u'\u0308', # diaresis
+    "r" : u'\u030a', # ring above
+    "H" : u'\u030b', # double acute
+    "v" : u'\u030c', # caron
+    "b" : u'\u0320', # minus sign below
+    "d" : u'\u0323', # dot below
+    "c" : u'\u0327', # cedilla
+    "k" : u'\u0328', # ogonek
+    "t" : u'\u0361'  # tie. This is special: It spans two characters, but
+                     # only one is given as argument, so we don't need to
+                     # treat it differently.
+}
+
+
+# special accents of InsetLaTeXAccent without argument
+special_accent_map = {
+    'i' : u'\u0131', # dotless i
+    'j' : u'\u0237', # dotless j
+    'l' : u'\u0142', # l with stroke
+    'L' : u'\u0141'  # L with stroke
+}
+
+
+# special accent arguments of InsetLaTeXAccent
+accented_map = {
+    '\\i' : u'\u0131', # dotless i
+    '\\j' : u'\u0237'  # dotless j
+}
+
+
+def _convert_accent(accent, accented_char):
+    type = accent
+    char = accented_char
+    if char == '':
+        if type in special_accent_map:
+            return special_accent_map[type]
+        # a missing char is treated as space by LyX
+        char = ' '
+    elif type == 'q' and char in ['t', 'd', 'l', 'L']:
+        # Special caron, only used with t, d, l and L.
+        # It is not in the map because we convert it to the same unicode
+        # character as the normal caron: \q{} is only defined if babel with
+        # the czech or slovak language is used, and the normal caron
+        # produces the correct output if the T1 font encoding is used.
+        # For the same reason we never convert to \q{} in the other direction.
+        type = 'v'
+    elif char in accented_map:
+        char = accented_map[char]
+    elif (len(char) > 1):
+        # We can only convert accents on a single char
+        return ''
+    a = accent_map.get(type)
+    if a:
+        return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+    return ''
+
+
+def convert_ertbackslash(body, i, ert, default_layout):
+    r""" -------------------------------------------------------------------------------------------
+    Convert backslashes and '\n' into valid ERT code, append the converted
+    text to body[i] and return the (maybe incremented) line index i"""
+
+    for c in ert:
+        if c == '\\':
+            body[i] = body[i] + '\\backslash '
+            i = i + 1
+            body.insert(i, '')
+        elif c == '\n':
+            body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
+            i = i + 4
+        else:
+            body[i] = body[i] + c
+    return i
+
+
+def convert_accent(document):
+    # The following forms are supported by LyX:
+    # '\i \"{a}' (standard form, as written by LyX)
+    # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
+    # '\i \"{ }' (also accepted if the accented char is a space)
+    # '\i \" a'  (also accepted)
+    # '\i \"'    (also accepted)
+    re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
+    re_contents = re.compile(r'^([^\s{]+)(.*)$')
+    re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
+    i = 0
+    while 1:
+        i = find_re(document.body, re_wholeinset, i)
+        if i == -1:
+            return
+        match = re_wholeinset.match(document.body[i])
+        prefix = match.group(1)
+        contents = match.group(3).strip()
+        match = re_contents.match(contents)
+        if match:
+            # Strip first char (always \)
+            accent = match.group(1)[1:]
+            accented_contents = match.group(2).strip()
+            match = re_accentedcontents.match(accented_contents)
+            accented_char = match.group(1)
+            converted = _convert_accent(accent, accented_char)
+            if converted == '':
+                # Normalize contents
+                contents = '%s{%s}' % (accent, accented_char),
+            else:
+                document.body[i] = '%s%s' % (prefix, converted)
+                i += 1
+                continue
+        document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
+        document.body[i] = prefix
+        document.body[i+1:i+1] = ['\\begin_inset ERT',
+                                  'status collapsed',
+                                  '',
+                                  '\\begin_layout %s' % document.default_layout,
+                                  '',
+                                  '',
+                                  '']
+        i = convert_ertbackslash(document.body, i + 7,
+                                 '\\%s' % contents,
+                                 document.default_layout)
+        document.body[i+1:i+1] = ['\\end_layout',
+                                  '',
+                                  '\\end_inset']
+        i += 3
+
+
+def revert_accent(document):
+    inverse_accent_map = {}
+    for k in accent_map:
+        inverse_accent_map[accent_map[k]] = k
+    inverse_special_accent_map = {}
+    for k in special_accent_map:
+        inverse_special_accent_map[special_accent_map[k]] = k
+    inverse_accented_map = {}
+    for k in accented_map:
+        inverse_accented_map[accented_map[k]] = k
+
+    # Since LyX may insert a line break within a word we must combine all
+    # words before unicode normalization.
+    # We do this only if the next line starts with an accent, otherwise we
+    # would create things like '\begin_inset ERTstatus'.
+    numberoflines = len(document.body)
+    for i in range(numberoflines-1):
+        if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
+            continue
+        if (document.body[i+1][0] in inverse_accent_map):
+            # the last character of this line and the first of the next line
+            # form probably a surrogate pair.
+            while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
+                document.body[i] += document.body[i+1][0]
+                document.body[i+1] = document.body[i+1][1:]
+
+    # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
+    # This is needed to catch all accented characters.
+    for i in range(numberoflines):
+        # Unfortunately we have a mixture of unicode strings and plain strings,
+        # because we never use u'xxx' for string literals, but 'xxx'.
+        # Therefore we may have to try two times to normalize the data.
+        try:
+            document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+        except TypeError:
+            document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+
+    # Replace accented characters with InsetLaTeXAccent
+    # Do not convert characters that can be represented in the chosen
+    # encoding.
+    encoding_stack = [get_encoding(document.language, document.inputencoding, 248)]
+    lang_re = re.compile(r"^\\lang\s(\S+)")
+    for i in range(len(document.body)):
+
+        if document.inputencoding == "auto" or document.inputencoding == "default":
+            # Track the encoding of the current line
+            result = lang_re.match(document.body[i])
+            if result:
+                language = result.group(1)
+                if language == "default":
+                    encoding_stack[-1] = document.encoding
+                else:
+                    from lyx2lyx_lang import lang
+                    encoding_stack[-1] = lang[language][3]
+                continue
+            elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
+                encoding_stack.append(encoding_stack[-1])
+                continue
+            elif find_token(document.body, "\\end_layout", i, i + 1) == i:
+                del encoding_stack[-1]
+                continue
+
+        for j in range(len(document.body[i])):
+            # dotless i and dotless j are both in special_accent_map and can
+            # occur as an accented character, so we need to test that the
+            # following character is no accent
+            if (document.body[i][j] in inverse_special_accent_map and
+                (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
+                accent = document.body[i][j]
+                try:
+                    dummy = accent.encode(encoding_stack[-1])
+                except UnicodeEncodeError:
+                    # Insert the rest of the line as new line
+                    if j < len(document.body[i]) - 1:
+                        document.body[i+1:i+1] = document.body[i][j+1:]
+                    # Delete the accented character
+                    if j > 0:
+                        document.body[i] = document.body[i][:j-1]
+                    else:
+                        document.body[i] = u''
+                    # Finally add the InsetLaTeXAccent
+                    document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
+                    break
+            elif j > 0 and document.body[i][j] in inverse_accent_map:
+                accented_char = document.body[i][j-1]
+                if accented_char == ' ':
+                    # Conform to LyX output
+                    accented_char = ''
+                elif accented_char in inverse_accented_map:
+                    accented_char = inverse_accented_map[accented_char]
+                accent = document.body[i][j]
+                try:
+                    dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+                except UnicodeEncodeError:
+                    # Insert the rest of the line as new line
+                    if j < len(document.body[i]) - 1:
+                        document.body[i+1:i+1] = document.body[i][j+1:]
+                    # Delete the accented characters
+                    if j > 1:
+                        document.body[i] = document.body[i][:j-2]
+                    else:
+                        document.body[i] = u''
+                    # Finally add the InsetLaTeXAccent
+                    document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
+                    break
+    # Normalize to "Normal form C" (NFC, pre-composed characters) again
+    for i in range(numberoflines):
+        document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+
+
 ##
 # Conversion hub
 #
@@ -735,16 +982,18 @@ convert = [[246, []],
            [253, []],
            [254, [convert_esint]],
            [255, []],
-           [256, []]]
+           [256, []],
+           [257, [convert_accent]]]
 
-revert =  [[255, [revert_encodings]],
+revert =  [[256, []],
+           [255, [revert_encodings]],
            [254, [revert_clearpage, revert_cleardoublepage]],
            [253, [revert_esint]],
            [252, [revert_nomenclature, revert_printnomenclature]],
            [251, [revert_commandparams]],
            [250, [revert_cs_label]],
            [249, []],
-           [248, [revert_utf8]],
+           [248, [revert_accent, revert_utf8]],
            [247, [revert_booktabs]],
            [246, [revert_font_settings]],
            [245, [revert_framed]]]
Index: development/scons/scons_manifest.py
===================================================================
--- development/scons/scons_manifest.py	(Revision 16920)
+++ development/scons/scons_manifest.py	(Arbeitskopie)
@@ -347,7 +347,6 @@ src_insets_header_files = Split('''
     insetinclude.h
     insetindex.h
     insetlabel.h
-    insetlatexaccent.h
     insetline.h
     insetmarginal.h
     insetnewline.h
@@ -403,7 +402,6 @@ src_insets_files = Split('''
     insetinclude.C
     insetindex.C
     insetlabel.C
-    insetlatexaccent.C
     insetline.C
     insetmarginal.C
     insetnewline.C

Re: [patch] Get rid of InsetLaTeXAccent - finally

Reply via email to