Re: [patch] tex2lyx whitespace changes

Georg Baum Mon, 03 Nov 2003 03:04:46 -0800

Sorry, wrong patch (contained some unrelated test stuff), use this instead.


Georg

Index: src/tex2lyx/ChangeLog
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/ChangeLog,v
retrieving revision 1.40
diff -u -r1.40 ChangeLog
--- src/tex2lyx/ChangeLog	2003/10/23 11:46:33	1.40
+++ src/tex2lyx/ChangeLog	2003/11/03 09:31:49
@@ -1,3 +1,27 @@
+2003-11-03  Georg Baum  <[EMAIL PROTECTED]>
+
+	* math.C:
+	* table.C:
+	* text.C:
+	* context.[Ch]: New functions Context::set_item(),
+	Context::new_paragraph(ostream & os) and Context::atParagraphStart()
+	to make Context usage more explicit
+	* texparser.[Ch]: Rework Parser::tokenize (see comment in texparser.h)
+	* table.C:
+	* math.C:
+	* texparser.C: Don't silently drop comments
+	* texparser.C: Token::asInput() does not append a space anymore
+	* texparser.[Ch]: Renamed Parser::prev_token() to Parser::curr_token().
+	New function Parser::prev_token() returns now really the previous token
+	* Context.[Ch]:
+	* text.C: Convert known vspaces at paragraph start to \\added_space_top
+	* preamble.C: Don't put out newlines twice.
+	* text.C: Fix minipage position bug
+	* text.C: Fix \labelwidthstring bug
+	* text.C: Recognize alignment environments
+	* text.C: Fix a few cases of incorrect context usage, resulting
+	in missing or superflous \begin_layout / \end_laout lines.
+
 2003-10-23  Georg Baum  <[EMAIL PROTECTED]>
 
 	* math.C:
Index: src/tex2lyx/context.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/context.C,v
retrieving revision 1.7
diff -u -r1.7 context.C
--- src/tex2lyx/context.C	2003/10/06 15:43:21	1.7
+++ src/tex2lyx/context.C	2003/11/03 09:31:49
@@ -23,7 +23,7 @@
 
 void begin_layout(ostream & os, LyXLayout_ptr layout)
 {
-	os << "\n\\begin_layout " << layout->name() << "\n\n";
+	os << "\n\\begin_layout " << layout->name() << "\n";
 }
 
 
@@ -94,11 +94,12 @@
 			begin_layout(os, layout);
 			need_layout=false;
 			need_end_layout = true;
-			if (!extra_stuff.empty()) {
-				os << extra_stuff;
-				extra_stuff.erase();
-			}
 		}
+		if (!extra_stuff.empty()) {
+			os << extra_stuff;
+			extra_stuff.erase();
+		}
+		os << "\n";
 	}
 }
 
@@ -140,6 +141,20 @@
 }
 
 
+void Context::set_item()
+{
+	need_layout = true;
+	has_item = true;
+}
+
+
+void Context::new_paragraph(ostream & os)
+{
+	check_end_layout(os);
+	need_layout = true;
+}
+
+
 void Context::dump(ostream & os, string const & desc) const
 {
 	os << "\n" << desc <<" [";
@@ -147,6 +162,12 @@
 		os << "need_layout ";
 	if (need_end_layout)
 		os << "need_end_layout ";
+	if (need_end_deeper)
+		os << "need_end_deeper ";
+	if (has_item)
+		os << "has_item ";
+	if (deeper_paragraph)
+		os << "deeper_paragraph ";
 	if (!extra_stuff.empty())
 		os << "extrastuff=[" << extra_stuff << "] ";
 	os << "layout=" << layout->name();
Index: src/tex2lyx/context.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/context.h,v
retrieving revision 1.7
diff -u -r1.7 context.h
--- src/tex2lyx/context.h	2003/09/09 18:27:24	1.7
+++ src/tex2lyx/context.h	2003/11/03 09:31:49
@@ -37,6 +37,15 @@
 	// description \c desc.
 	void dump(std::ostream &, std::string const & desc = "context") const;
 
+	/// Are we just beginning a new paragraph?
+	bool atParagraphStart() const { return need_layout; }
+
+	/// Begin an item in a list environment
+	void set_item();
+
+	/// Start a new paragraph
+	void new_paragraph(std::ostream & os);
+
 	// Do we need to output some \begin_layout command before the
 	// next characters?
 	bool need_layout;
Index: src/tex2lyx/math.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/math.C,v
retrieving revision 1.11
diff -u -r1.11 math.C
--- src/tex2lyx/math.C	2003/10/23 11:46:33	1.11
+++ src/tex2lyx/math.C	2003/11/03 09:31:50
@@ -99,8 +99,10 @@
 			}
 		}
 
+		else if (t.cat() == catSpace)
+			os << ' ';
+
 		else if (t.cat() == catLetter ||
-			       t.cat() == catSpace ||
 			       t.cat() == catSuper ||
 			       t.cat() == catSub ||
 			       t.cat() == catOther ||
@@ -110,8 +112,7 @@
 			os << t.character();
 
 		else if (t.cat() == catNewline) {
-			//if (p.next_token().cat() == catNewline) {
-			//	p.get_token();
+			//if (t.cs.size() > 1) {
 			//	handle_par(os);
 			//} else {
 				os << "\n "; // note the space
@@ -130,8 +131,13 @@
 			os << "unexpected '}' in math\n";
 		}
 
-		else if (t.cat() == catComment)
-			handle_comment(p);
+		else if (t.cat() == catComment) {
+			if (t.cs().size())
+				cerr << "Ignoring comment: " << t.asInput();
+			else
+				// "%\n" combination
+				p.skip_spaces();
+		}
 
 		//
 		// control sequences
Index: src/tex2lyx/preamble.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/preamble.C,v
retrieving revision 1.16
diff -u -r1.16 preamble.C
--- src/tex2lyx/preamble.C	2003/10/23 11:46:33	1.16
+++ src/tex2lyx/preamble.C	2003/11/03 09:31:50
@@ -176,6 +176,21 @@
 	h_preamble.str("");
 }
 
+/*!
+ * Swallows up to \param number newlines.
+ * Swallows all newlines if \param number is 0.
+ */
+void swallow_newlines(Parser & p, ostream & os, size_t number)
+{
+	if (p.next_token().cat() != catNewline)
+		return;
+	Token const & t = p.get_token();
+	if (number == 0)
+		return;
+	for(size_t i = number; i < t.cs().size(); ++i)
+		os << '\n';
+}
+
 } // anonymous namespace
 
 LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & forceclass)
@@ -188,7 +203,7 @@
 	while (p.good()) {
 		Token const & t = p.get_token();
 
-		if (t.cs() == "documentclass") {
+		if (t.cat() == catEscape && t.cs() == "documentclass") {
 			is_full_document = true;
 			break;
 		}
@@ -206,7 +221,6 @@
 		// cat codes
 		//
 		if (t.cat() == catLetter ||
-			  t.cat() == catSpace ||
 			  t.cat() == catSuper ||
 			  t.cat() == catSub ||
 			  t.cat() == catOther ||
@@ -215,24 +229,28 @@
 			  t.cat() == catBegin ||
 			  t.cat() == catEnd ||
 			  t.cat() == catAlign ||
-			  t.cat() == catNewline ||
 			  t.cat() == catParameter)
 		h_preamble << t.character();
 
+		else if (t.cat() == catSpace || t.cat() == catNewline)
+			h_preamble << t.asInput();
+
 		else if (t.cat() == catComment)
-			handle_comment(p);
+			h_preamble << t.asInput();
 
-		else if (t.cs() == "pagestyle")
+		else if (t.cs() == "pagestyle") {
 			h_paperpagestyle = p.verbatim_item();
+			swallow_newlines(p, h_preamble, 1);
+		}
 
 		else if (t.cs() == "makeatletter") {
 			p.setCatCode('@', catLetter);
-			h_preamble << "\\makeatletter\n";
+			h_preamble << "\\makeatletter";
 		}
 
 		else if (t.cs() == "makeatother") {
 			p.setCatCode('@', catOther);
-			h_preamble << "\\makeatother\n";
+			h_preamble << "\\makeatother";
 		}
 
 		else if (t.cs() == "newcommand" || t.cs() == "renewcommand"
@@ -246,25 +264,27 @@
 			string const opts = p.getOpt();
 			string const body = p.verbatim_item();
 			// only non-lyxspecific stuff
-			if (name != "\\noun "
-				  && name != "\\tabularnewline "
-			    && name != "\\LyX "
-				  && name != "\\lyxline "
-				  && name != "\\lyxaddress "
-				  && name != "\\lyxrightaddress "
-				  && name != "\\boldsymbol "
-				  && name != "\\lyxarrow ") {
+			if (   name != "\\noun"
+			    && name != "\\tabularnewline"
+			    && name != "\\LyX"
+			    && name != "\\lyxline"
+			    && name != "\\lyxaddress"
+			    && name != "\\lyxrightaddress"
+			    && name != "\\boldsymbol"
+			    && name != "\\lyxarrow") {
 				ostringstream ss;
 				ss << '\\' << t.cs();
 				if (star)
 					ss << '*';
-				ss << '{' << name << '}' << opts << '{' << body << "}\n";
+				ss << '{' << name << '}' << opts << '{' << body << "}";
 				h_preamble << ss.str();
 /*
 				ostream & out = in_preamble ? h_preamble : os;
 				out << "\\" << t.cs() << "{" << name << "}"
-				    << opts << "{" << body << "}\n";
+				    << opts << "{" << body << "}";
 */
+			} else {
+				swallow_newlines(p, h_preamble, 0);
 			}
 		}
 
@@ -276,6 +296,7 @@
 			h_quotes_language = h_language;
 			h_options = join(opts, ",");
 			h_textclass = p.getArg('{', '}');
+			swallow_newlines(p, h_preamble, 1);
 		}
 
 		else if (t.cs() == "usepackage") {
@@ -291,6 +312,7 @@
 			} else {
 				handle_package(name, options);
 			}
+			swallow_newlines(p, h_preamble, 1);
 		}
 
 		else if (t.cs() == "newenvironment") {
@@ -301,17 +323,18 @@
 			ss << p.getOpt();
 			ss << '{' << p.verbatim_item() << '}';
 			ss << '{' << p.verbatim_item() << '}';
-			ss << '\n';
 			if (name != "lyxcode" && name != "lyxlist"
 					&& name != "lyxrightadress" && name != "lyxaddress")
 				h_preamble << ss.str();
+			else
+				swallow_newlines(p, h_preamble, 0);
 		}
 
 		else if (t.cs() == "def") {
 			string name = p.get_token().cs();
 			while (p.next_token().cat() != catBegin)
 				name += p.get_token().asString();
-			h_preamble << "\\def\\" << name << '{' << p.verbatim_item() << "}\n";
+			h_preamble << "\\def\\" << name << '{' << p.verbatim_item() << "}";
 		}
 
 		else if (t.cs() == "newcolumntype") {
@@ -328,34 +351,36 @@
 			h_preamble << "\\newcolumntype{" << name << "}";
 			if (nargs)
 				h_preamble << "[" << nargs << "]";
-			h_preamble << "{" << p.verbatim_item() << "}\n";
+			h_preamble << "{" << p.verbatim_item() << "}";
 		}
 
 		else if (t.cs() == "setcounter") {
 			string const name = p.getArg('{', '}');
 			string const content = p.getArg('{', '}');
-			if (name == "secnumdepth")
+			if (name == "secnumdepth") {
 				h_secnumdepth = content;
-			else if (name == "tocdepth")
+				swallow_newlines(p, h_preamble, 1);
+			} else if (name == "tocdepth") {
 				h_tocdepth = content;
-			else
-				h_preamble << "\\setcounter{" << name << "}{" << content << "}\n";
+				swallow_newlines(p, h_preamble, 1);
+			} else
+				h_preamble << "\\setcounter{" << name << "}{" << content << "}";
 		}
 
 		else if (t.cs() == "setlength") {
 			string const name = p.verbatim_item();
 			string const content = p.verbatim_item();
-			if (name == "parskip")
+			// Is this correct?
+			if (name == "parskip") {
 				h_paragraph_separation = "skip";
-			else if (name == "parindent")
+				swallow_newlines(p, h_preamble, 1);
+			} else if (name == "parindent") {
 				h_paragraph_separation = "skip";
-			else
-				h_preamble << "\\setlength{" << name << "}{" << content << "}\n";
+				swallow_newlines(p, h_preamble, 1);
+			} else
+				h_preamble << "\\setlength{" << name << "}{" << content << "}";
 		}
 
-		else if (t.cs() == "par")
-			h_preamble << '\n';
-
 		else if (t.cs() == "begin") {
 			string const name = p.getArg('{', '}');
 			if (name == "document")
@@ -364,8 +389,9 @@
 		}
 
 		else if (t.cs().size())
-			h_preamble << '\\' << t.cs() << ' ';
+			h_preamble << '\\' << t.cs();
 	}
+	p.skip_spaces();
 
 	// Force textclass if the user wanted it
 	if (forceclass.size()) {
Index: src/tex2lyx/table.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/table.C,v
retrieving revision 1.22
diff -u -r1.22 table.C
--- src/tex2lyx/table.C	2003/10/29 19:19:27	1.22
+++ src/tex2lyx/table.C	2003/11/03 09:31:52
@@ -192,13 +192,14 @@
 			}
 		}
 
+		else if (t.cat() == catSpace || t.cat() == catNewline)
+				os << t.cs();
+
 		else if (t.cat() == catLetter ||
-			       t.cat() == catSpace ||
 			       t.cat() == catSuper ||
 			       t.cat() == catSub ||
 			       t.cat() == catOther ||
 			       t.cat() == catActive ||
-			       t.cat() == catNewline ||
 			       t.cat() == catParameter)
 			os << t.character();
 
@@ -216,6 +217,7 @@
 
 		else if (t.cat() == catAlign) {
 			os << TAB;
+			p.skip_spaces();
 		}
 
 		else if (t.cs() == "tabularnewline" || t.cs() == "\\") {
@@ -232,7 +234,7 @@
 			hlines += "\\cline{" + p.verbatim_item() + '}';
 
 		else if (t.cat() == catComment)
-			handle_comment(p);
+			os << t.asInput();
 
 		else if (t.cs() == "(") {
 			os << "\\(";
Index: src/tex2lyx/tex2lyx.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.C,v
retrieving revision 1.52
diff -u -r1.52 tex2lyx.C
--- src/tex2lyx/tex2lyx.C	2003/10/23 11:46:33	1.52
+++ src/tex2lyx/tex2lyx.C	2003/11/03 09:31:52
@@ -52,19 +52,6 @@
 // Hacks to allow the thing to link in the lyxlayout stuff
 LyXErr lyxerr(std::cerr.rdbuf());
 
-void handle_comment(Parser & p)
-{
-	string s;
-	while (p.good()) {
-		Token const & t = p.get_token();
-		if (t.cat() == catNewline)
-			break;
-		s += t.asString();
-	}
-	//cerr << "comment: " << s << "\n";
-	p.skip_spaces();
-}
-
 
 string const trim(string const & a, char const * p)
 {
@@ -238,6 +225,13 @@
 	active_environments.pop_back();
 	ss.seekg(0);
 	os << ss.str();
+#ifdef TEST_PARSER
+	p.reset();
+	ofstream parsertest("parsertest.tex");
+	while (p.good())
+		parsertest << p.get_token().asInput();
+	// <origfile> and parsertest.tex should now have identical content
+#endif
 }
 
 
Index: src/tex2lyx/tex2lyx.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.h,v
retrieving revision 1.11
diff -u -r1.11 tex2lyx.h
--- src/tex2lyx/tex2lyx.h	2003/10/23 11:46:33	1.11
+++ src/tex2lyx/tex2lyx.h	2003/11/03 09:31:52
@@ -46,7 +46,6 @@
 
 
 /// in tex2lyx.C
-void handle_comment(Parser & p);
 std::string const trim(std::string const & a, char const * p = " \t\n\r");
 
 void split(std::string const & s, std::vector<std::string> & result,
Index: src/tex2lyx/texparser.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.C,v
retrieving revision 1.22
diff -u -r1.22 texparser.C
--- src/tex2lyx/texparser.C	2003/10/23 11:46:33	1.22
+++ src/tex2lyx/texparser.C	2003/11/03 09:31:53
@@ -28,17 +28,6 @@
 
 CatCode theCatcode[256];
 
-void skipSpaceTokens(istream & is, char c)
-{
-	// skip trailing spaces
-	while (catcode(c) == catSpace || catcode(c) == catNewline)
-		if (!is.get(c))
-			break;
-	//cerr << "putting back: " << c << "\n";
-	is.putback(c);
-}
-
-
 void catInit()
 {
 	fill(theCatcode, theCatcode + 256, catOther);
@@ -95,12 +84,16 @@
 
 ostream & operator<<(ostream & os, Token const & t)
 {
-	if (t.cs().size())
+	if (t.cat() == catComment)
+		os << '%' << t.cs() << '\n';
+	else if (t.cat() == catSpace)
+		os << t.cs();
+	else if (t.cat() == catEscape)
 		os << '\\' << t.cs() << ' ';
 	else if (t.cat() == catLetter)
 		os << t.character();
 	else if (t.cat() == catNewline)
-		os << "[\\n," << t.cat() << "]\n";
+		os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
 	else
 		os << '[' << t.character() << ',' << t.cat() << ']';
 	return os;
@@ -115,7 +108,11 @@
 
 string Token::asInput() const
 {
-	return char_ ? string(1, char_) : '\\' + cs_ + ' ';
+	if (cat_ == catComment)
+		return '%' + cs_ + '\n';
+	if (cat_ == catSpace || cat_ == catNewline)
+		return cs_;
+	return char_ ? string(1, char_) : '\\' + cs_;
 }
 
 
@@ -154,6 +151,13 @@
 Token const & Parser::prev_token() const
 {
 	static const Token dummy;
+	return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
+}
+
+
+Token const & Parser::curr_token() const
+{
+	static const Token dummy;
 	return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
 }
 
@@ -173,20 +177,41 @@
 }
 
 
-void Parser::skip_spaces()
+void Parser::skip_spaces(bool skip_comments)
 {
-	while (1) {
-		if (next_token().cat() == catSpace || next_token().cat() == catNewline)
+	// We just silently return if we have no more tokens.
+	// skip_spaces() should be callable at any time,
+	// the caller must check p::good() anyway.
+	while (good()) {
+		if ( next_token().cat() == catSpace ||
+		    (next_token().cat() == catNewline && next_token().cs().size() == 1) ||
+		     next_token().cat() == catComment && next_token().cs().empty())
 			get_token();
-		else if (next_token().cat() == catComment)
-			while (next_token().cat() != catNewline)
-				get_token();
+		else if (skip_comments && next_token().cat() == catComment)
+			cerr << "  Ignoring comment: " << get_token().asInput();
 		else
 			break;
 	}
 }
 
 
+void Parser::unskip_spaces(bool skip_comments)
+{
+	while (pos_ > 0) {
+		if ( curr_token().cat() == catSpace ||
+		    (curr_token().cat() == catNewline && curr_token().cs().size() == 1))
+			putback();
+		else if (skip_comments && curr_token().cat() == catComment) {
+			// TODO: Get rid of this
+			cerr << "Unignoring comment: " << curr_token().asInput();
+			putback();
+		}
+		else
+			break;
+	}
+}
+
+
 void Parser::putback()
 {
 	--pos_;
@@ -209,7 +234,12 @@
 
 string Parser::getArg(char left, char right)
 {
-	skip_spaces();
+	skip_spaces(true);
+
+	// This is needed if a partial file ends with a command without arguments,
+	// e. g. \medskip
+	if (! good())
+		return string();
 
 	string result;
 	char c = getChar();
@@ -217,8 +247,17 @@
 	if (c != left)
 		putback();
 	else
-		while ((c = getChar()) != right && good())
-			result += c;
+		while ((c = getChar()) != right && good()) {
+			// Ignore comments
+			if (curr_token().cat() == catComment) {
+				if (curr_token().cs().size())
+					cerr << "Ignoring comment: " << curr_token().asInput();
+			}
+			else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
+				result += curr_token().cs();
+			else
+				result += c;
+		}
 
 	return result;
 }
@@ -245,34 +284,39 @@
 		//cerr << "reading c: " << c << "\n";
 
 		switch (catcode(c)) {
+			case catSpace: {
+				string s(1, c);
+				while (is.get(c) && catcode(c) == catSpace)
+					s += c;
+				if (catcode(c) != catSpace)
+					is.putback(c);
+				push_back(Token(s, catSpace));
+				break;
+			}
+
 			case catNewline: {
 				++lineno_;
-				is.get(c);
-				if (catcode(c) == catNewline) {
-					//do {
-						is.get(c);
-					//} while (catcode(c) == catNewline);
-					push_back(Token("par"));
-				} else {
-					push_back(Token('\n', catNewline));
+				string s(1, c);
+				while (is.get(c) && catcode(c) == catNewline) {
+					++lineno_;
+					s += c;
 				}
-				is.putback(c);
+				if (catcode(c) != catNewline)
+					is.putback(c);
+				push_back(Token(s, catNewline));
 				break;
 			}
 
 			case catComment: {
-				push_back(Token(c, catComment));
+				// We don't treat "%\n" combinations here specially because
+				// we want to preserve them in the preamble
+				string s;
 				while (is.get(c) && catcode(c) != catNewline)
-					push_back(Token(c, catLetter));
-				push_back(Token(c, catNewline));
+					s += c;
+				// Note: The '%' at the beginning and the '\n' at the end
+				// of the comment are not stored.
 				++lineno_;
-				is.get(c);
-				if (catcode(c) == catNewline) {
-					push_back(Token("par"));
-					++lineno_;
-				} else {
-					is.putback(c);
-				}
+				push_back(Token(s, catComment));
 				break;
 			}
 
@@ -286,18 +330,11 @@
 						// collect letters
 						while (is.get(c) && catcode(c) == catLetter)
 							s += c;
-						skipSpaceTokens(is, c);
+						if (catcode(c) != catLetter)
+							is.putback(c);
 					}
-					push_back(Token(s));
+					push_back(Token(s, catEscape));
 				}
-				break;
-			}
-
-			case catSuper:
-			case catSub: {
-				push_back(Token(c, catcode(c)));
-				is.get(c);
-				skipSpaceTokens(is, c);
 				break;
 			}
 
Index: src/tex2lyx/texparser.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.h,v
retrieving revision 1.15
diff -u -r1.15 texparser.h
--- src/tex2lyx/texparser.h	2003/10/23 11:46:33	1.15
+++ src/tex2lyx/texparser.h	2003/11/03 09:31:53
@@ -75,7 +75,7 @@
 	///
 	Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
 	///
-	Token(std::string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
+	Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
 
 	///
 	std::string const & cs() const { return cs_; }
@@ -100,9 +100,16 @@
 std::ostream & operator<<(std::ostream & os, Token const & t);
 
 
-//
-// Actual parser class
-//
+/*!
+ * Actual parser class
+ *
+ * The parser parses every character of the inputstream into a token
+ * and classifies the token.
+ * The following transformations are done:
+ * - Consecutive spaces are combined into one single token with CatCode catSpace
+ * - Consecutive newlines are combined into one single token with CatCode catSpace
+ * - Comments and %\n combinations are parsed into one token with CatCode catComment
+ */
 
 class Parser {
 
@@ -136,11 +143,15 @@
 	///
 	Token const & prev_token() const;
 	///
-	Token const & next_token() const;
+	Token const & curr_token() const;
 	///
+	Token const & next_token() const;
+	/// Make the next token current and return that.
 	Token const & get_token();
-	/// skips spaces if any
-	void skip_spaces();
+	/// skips spaces (and comments if \param skip_comments is true)
+	void skip_spaces(bool skip_comments = false);
+	/// puts back spaces (and comments if \param skip_comments is true)
+	void unskip_spaces(bool skip_comments = false);
 	///
 	void lex(std::string const & s);
 	///
@@ -156,7 +167,7 @@
 	///
 	CatCode getCatCode(char c) const;
 
-//private:
+private:
 	///
 	int lineno_;
 	///
Index: src/tex2lyx/text.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/text.C,v
retrieving revision 1.25
diff -u -r1.25 text.C
--- src/tex2lyx/text.C	2003/10/23 11:46:33	1.25
+++ src/tex2lyx/text.C	2003/11/03 09:31:58
@@ -170,8 +170,30 @@
 }
 
 
-void handle_ert(ostream & os, string const & s, Context const & context)
+void handle_ert(ostream & os, string const & s, Context & context, bool check_layout = true)
 {
+	if (check_layout) {
+		// We must have a valid layout before outputting the ERT inset.
+		context.check_layout(os);
+	}
+	Context newcontext(true, context.textclass);
+	begin_inset(os, "ERT");
+	os << "\nstatus Collapsed\n";
+	newcontext.check_layout(os);
+	for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
+		if (*it == '\\')
+			os << "\n\\backslash \n";
+		else
+			os << *it;
+	}
+	newcontext.check_end_layout(os);
+	end_inset(os);
+}
+
+
+void handle_comment(ostream & os, string const & s, Context & context)
+{
+	// TODO: Handle this better
 	Context newcontext(true, context.textclass);
 	begin_inset(os, "ERT");
 	os << "\nstatus Collapsed\n";
@@ -182,6 +204,8 @@
 		else
 			os << *it;
 	}
+	// make sure that our comment is the last thing on the line
+	os << "\n\\newline";
 	newcontext.check_end_layout(os);
 	end_inset(os);
 }
@@ -217,10 +241,11 @@
 	context.check_deeper(os);
 	context.check_layout(os);
 	if (context.layout->optionalargs > 0) {
+		p.skip_spaces();
 		if (p.next_token().character() == '[') {
 			p.get_token(); // eat '['
 			begin_inset(os, "OptArg\n");
-			os << "collapsed true\n";
+			os << "collapsed true\n\n";
 			parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
 			end_inset(os);
 		}
@@ -228,9 +253,57 @@
 	parse_text_snippet(p, os, FLAG_ITEM, outer, context);
 	context.check_end_layout(os);
 	context.check_end_deeper(os);
+	// We don't need really a new paragraph, but
+	// we must make sure that the next item gets a \begin_layout.
+	parent_context.new_paragraph(os);
 }
 
 
+/*!
+ * Output a space if necessary.
+ * This function gets called for every whitespace token.
+ *
+ * We suppress as many spaces as possible. This has two effects:
+ * - Reimporting LyX generated LaTeX files changes almost no whitespace
+ * - Superflous whitespace from non LyX generated LaTeX files is removed.
+ *
+ * The consequence is that the logic inside the function is
+ * complicated, but that's the price...
+ *
+ * We have three cases here:
+ * - A space must be suppressed. Example: The lyxcode case below
+ * - A space may be suppressed. Example: Spaces before "\par"
+ * - A space must not be suppressed. Example: A space between two words
+ */
+void check_space(Parser const & p, ostream & os, Context & context)
+{
+	Token const next = p.next_token();
+	Token const curr = p.curr_token();
+	Token const prev = p.prev_token();
+	string const name = next.cs();
+	if (next.cat() == catSpace ||
+	    next.cat() == catComment ||
+	    (next.cat() == catNewline && next.cs().size() == 1) ||
+	    next.cs() == "par" ||
+	    next.cs() == "item" ||
+	    (next.cs() == "end" && (context.layout->latextype == LATEX_ITEM_ENVIRONMENT ||
+	                            context.layout->latextype == LATEX_LIST_ENVIRONMENT ||
+			            active_environment() == "quotation" ||
+			            active_environment() == "quote" ||
+			            active_environment() == "verse" ||
+	                            // LyX emits a newline before \end{lyxcode}.
+				    // This newline must be ignored,
+				    // otherwise LyX will add an additional protected space.
+			            (active_environment() == "lyxcode" && curr.cat() == catNewline))) ||
+	    // Or should we make a negative list? The alignment environments like center would be included
+	    prev.cs() == "]" ||
+	    prev.cs() == "\\") {
+		return;
+	}
+	context.check_layout(os);
+	os << ' ';
+}
+
 void parse_environment(Parser & p, ostream & os, bool outer,
 		       Context & parent_context)
 {
@@ -239,6 +312,8 @@
 	const bool is_starred = suffixIs(name, '*');
 	string const unstarred_name = rtrim(name, "*");
 	active_environments.push_back(name);
+	p.skip_spaces();
+
 	if (is_math_env(name)) {
 		parent_context.check_layout(os);
 		begin_inset(os, "Formula ");
@@ -262,13 +337,15 @@
 			os << "placement " << p.getArg('[', ']') << '\n';
 		}
 		os << "wide " << tostr(is_starred)
-		   << "\ncollapsed false\n";
+		   << "\ncollapsed false\n\n";
 		parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
 		end_inset(os);
+		// We don't need really a new paragraph, but
+		// we must make sure that the next item gets a \begin_layout.
+		parent_context.new_paragraph(os);
 	}
 
 	else if (name == "minipage") {
-		parent_context.check_layout(os);
 		string position = "1";
 		string inner_pos = "0";
 		string height = "0pt";
@@ -293,8 +370,8 @@
 				if (p.next_token().asInput() == "[") {
 					latex_inner_pos = p.getArg('[', ']');
 					switch(latex_inner_pos[0]) {
-					case 't': inner_pos = "0"; break;
-					case 'c': inner_pos = "1"; break;
+					case 'c': inner_pos = "0"; break;
+					case 't': inner_pos = "1"; break;
 					case 'b': inner_pos = "2"; break;
 					case 's': inner_pos = "3"; break;
 					default:
@@ -318,11 +395,11 @@
 				ss << '[' << latex_inner_pos << ']';
 			ss << "{" << width << "}";
 			handle_ert(os, ss.str(), parent_context);
-			parent_context.check_end_layout(os);
-			parent_context.need_layout = true;
+			parent_context.new_paragraph(os);
 			parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
 			handle_ert(os, "\\end{minipage}", parent_context);
 		} else {
+			parent_context.check_layout(os);
 			begin_inset(os, "Minipage\n");
 			os << "position " << position << '\n';
 			os << "inner_position " << inner_pos << '\n';
@@ -332,11 +409,27 @@
 			parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
 			end_inset(os);
 		}
-
 	}
 
-	else if (name == "center") {
+	// Alignment settings
+	else if (name == "center" || name == "flushleft" || name == "flushright" ||
+	         name == "centering" || name == "raggedright" || name == "raggedleft") {
+		// We must begin a new paragraph if not already done
+		if (! parent_context.atParagraphStart()) {
+			parent_context.check_end_layout(os);
+			parent_context.new_paragraph(os);
+		}
+		if (name == "flushleft" || name == "raggedright")
+			parent_context.extra_stuff += "\\align left ";
+		else if (name == "flushright" || name == "raggedleft")
+			parent_context.extra_stuff += "\\align right ";
+		else
+			parent_context.extra_stuff += "\\align center ";
 		parse_text(p, os, FLAG_END, outer, parent_context);
+		// Just in case the environment is empty ..
+		parent_context.extra_stuff.erase();
+		// We must begin a new paragraph to reset the alignment
+		parent_context.new_paragraph(os);
 	}
 
 	// The single '=' is meant here.
@@ -360,6 +453,7 @@
 		parse_text(p, os, FLAG_END, outer, context);
 		context.check_end_layout(os);
 		context.check_end_deeper(os);
+		parent_context.new_paragraph(os);
 	}
 
 	else if (name == "appendix") {
@@ -383,19 +477,20 @@
 
 	else if (name == "tabbing") {
 		// We need to remember that we have to handle '\=' specially
-		parent_context.check_layout(os);
 		handle_ert(os, "\\begin{" + name + "}", parent_context);
 		parse_text_snippet(p, os, FLAG_END | FLAG_TABBING, outer, parent_context);
 		handle_ert(os, "\\end{" + name + "}", parent_context);
 	}
 
 	else {
-		parent_context.check_layout(os);
 		handle_ert(os, "\\begin{" + name + "}", parent_context);
 		parse_text_snippet(p, os, FLAG_END, outer, parent_context);
 		handle_ert(os, "\\end{" + name + "}", parent_context);
 	}
+
 	active_environments.pop_back();
+	if (name != "math")
+		p.skip_spaces();
 }
 
 } // anonymous namespace
@@ -485,9 +580,10 @@
 			skip_braces(p);
 		}
 
+		else if (t.cat() == catSpace || (t.cat() == catNewline && t.cs().size() == 1))
+			check_space(p, os, context);
 
 		else if (t.cat() == catLetter ||
-			       t.cat() == catSpace ||
 			       t.cat() == catOther ||
 			       t.cat() == catAlign ||
 			       t.cat() == catParameter) {
@@ -495,16 +591,9 @@
 			os << t.character();
 		}
 
-		else if (t.cat() == catNewline) {
-			if (p.next_token().cat() == catNewline) {
-				// this should have been be done by
-				// the parser already
-				cerr << "what are we doing here?" << endl;
-				p.get_token();
-				context.need_layout = true;
-			} else {
-				os << " "; // note the space
-			}
+		else if (t.cat() == catNewline || (t.cat() == catEscape && t.cs() == "par")) {
+			p.skip_spaces();
+			context.new_paragraph(os);
 		}
 
 		else if (t.cat() == catActive) {
@@ -519,20 +608,19 @@
 		}
 
 		else if (t.cat() == catBegin) {
-// FIXME???
 			// special handling of size changes
 			context.check_layout(os);
 			bool const is_size = is_known(p.next_token().cs(), known_sizes);
-			Context newcontext(false, context.textclass);
-//			need_end_layout = false;
-			string const s = parse_text(p, FLAG_BRACE_LAST, outer, newcontext);
-//			need_end_layout = true;
-			if (s.empty() && p.next_token().character() == '`')
-				; // ignore it in  {}``
+			Token const prev = p.prev_token();
+			string const s = parse_text(p, FLAG_BRACE_LAST, outer, context);
+			if (s.empty() && (p.next_token().character() == '`' ||
+			                  (prev.character() == '-' && p.next_token().character())))
+				; // ignore it in {}`` or -{}-
 			else if (is_size || s == "[" || s == "]" || s == "*")
 				os << s;
 			else {
-				handle_ert(os, "{", context);
+				handle_ert(os, "{", context, false);
+				// s will end the current layout and begin a new one if necessary
 				os << s;
 				handle_ert(os, "}", context);
 			}
@@ -540,15 +628,26 @@
 
 		else if (t.cat() == catEnd) {
 			if (flags & FLAG_BRACE_LAST) {
-				context.check_end_layout(os);
 				return;
 			}
 			cerr << "stray '}' in text\n";
 			handle_ert(os, "}", context);
 		}
 
-		else if (t.cat() == catComment)
-			handle_comment(p);
+		else if (t.cat() == catComment) {
+			context.check_layout(os);
+			if (t.cs().size()) {
+				handle_comment(os, '%' + t.cs(), context);
+				if (p.next_token().cat() == catNewline) {
+					// A newline after a comment line starts a new paragraph
+					context.new_paragraph(os);
+					p.skip_spaces();
+				}
+			} else {
+				// "%\n" combination
+				p.skip_spaces();
+			}
+		}
 
 		//
 		// control sequences
@@ -588,8 +687,7 @@
 		}
 
 		else if (t.cs() == "item") {
-			// should be done automatically by Parser::tokenize
-			//p.skip_spaces();
+			p.skip_spaces();
 			string s;
 			bool optarg = false;
 			if (p.next_token().character() == '[') {
@@ -598,11 +696,10 @@
 				s = parse_text(p, FLAG_BRACK_LAST, outer, newcontext);
 				optarg = true;
 			}
-			context.need_layout = true;
-			context.has_item = true;
+			context.set_item();
 			context.check_layout(os);
 			if (optarg) {
-				if (active_environment() == "itemize") {
+				if (context.layout->labeltype != LABEL_MANUAL) {
 					// lyx does not support \item[\mybullet] in itemize environments
 					handle_ert(os, "[", context);
 					os << s;
@@ -610,13 +707,13 @@
 				} else if (s.size()) {
 					// The space is needed to separate the item from the rest of the sentence.
 					os << s << ' ';
+					p.skip_spaces();
 				}
 			}
 		}
 
 		else if (t.cs() == "bibitem") {
-			context.need_layout = true;
-			context.has_item = true;
+			context.set_item();
 			context.check_layout(os);
 			os << "\\bibitem ";
 			os << p.getOpt();
@@ -624,6 +721,7 @@
 		}
 
 		else if (t.cs() == "def") {
+			p.skip_spaces();
 			context.check_layout(os);
 			string name = p.get_token().cs();
 			while (p.next_token().cat() != catBegin)
@@ -631,20 +729,14 @@
 			handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}', context);
 		}
 
-		else if (t.cs() == "par") {
+		else if (t.cs() == "noindent") {
 			p.skip_spaces();
-			context.check_end_layout(os);
-			context.need_layout = true;
+			context.extra_stuff += "\\noindent ";
 		}
 
 		else if (t.cs() == "appendix") {
-			context.check_end_layout(os);
-			Context newcontext(true, context.textclass, context.layout,
-					context.layout);
-			newcontext.check_layout(os);
-			os << "\\start_of_appendix\n";
-			parse_text(p, os, FLAG_END, outer, newcontext);
-			newcontext.check_end_layout(os);
+			p.skip_spaces();
+			context.extra_stuff += "\\start_of_appendix ";
 		}
 
 		// Must attempt to parse "Section*" before "Section".
@@ -655,12 +747,14 @@
 			 newlayout->isCommand()) {
 			p.get_token();
 			output_command_layout(os, p, outer, context, newlayout);
+			p.skip_spaces();
 		}
 
 		// The single '=' is meant here.
 		else if ((newlayout = findLayout(context.textclass, t.cs())).get() &&
 			 newlayout->isCommand()) {
 			output_command_layout(os, p, outer, context, newlayout);
+			p.skip_spaces();
 		}
 
 		else if (t.cs() == "includegraphics") {
@@ -763,22 +857,25 @@
 		}
 
 		else if (t.cs() == "footnote") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "Foot\n");
-			os << "collapsed true\n";
+			os << "collapsed true\n\n";
 			parse_text_in_inset(p, os, FLAG_ITEM, false, context);
 			end_inset(os);
 		}
 
 		else if (t.cs() == "marginpar") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "Marginal\n");
-			os << "collapsed true\n";
+			os << "collapsed true\n\n";
 			parse_text_in_inset(p, os, FLAG_ITEM, false, context);
 			end_inset(os);
 		}
 
 		else if (t.cs() == "ensuremath") {
+			p.skip_spaces();
 			context.check_layout(os);
 			Context newcontext(false, context.textclass);
 			string s = parse_text(p, FLAG_ITEM, false, newcontext);
@@ -793,12 +890,16 @@
 			context.check_layout(os);
 			os << "\n\\hfill\n";
 			skip_braces(p);
+			p.skip_spaces();
 		}
 
-		else if (t.cs() == "makeindex" || t.cs() == "maketitle")
+		else if (t.cs() == "makeindex" || t.cs() == "maketitle") {
+			p.skip_spaces();
 			skip_braces(p); // swallow this
+		}
 
 		else if (t.cs() == "tableofcontents") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "LatexCommand \\tableofcontents\n");
 			end_inset(os);
@@ -806,6 +907,7 @@
 		}
 
 		else if (t.cs() == "listoffigures") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "FloatList figure\n");
 			end_inset(os);
@@ -813,6 +915,7 @@
 		}
 
 		else if (t.cs() == "listoftables") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "FloatList table\n");
 			end_inset(os);
@@ -820,6 +923,7 @@
 		}
 
 		else if (t.cs() == "listof") {
+			p.skip_spaces(true);
 			string const name = p.get_token().asString();
 			if (context.textclass.floats().typeExist(name)) {
 				context.check_layout(os);
@@ -906,6 +1010,7 @@
 
 		else if (is_known(t.cs(), known_quotes)) {
 			char const ** where = is_known(t.cs(), known_quotes);
+			context.check_layout(os);
 			begin_inset(os, "Quotes ");
 			os << known_coded_quotes[where - known_quotes];
 			end_inset(os);
@@ -916,6 +1021,7 @@
 			char const ** where = is_known(t.cs(), known_sizes);
 			context.check_layout(os);
 			os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
+			p.skip_spaces();
 		}
 
 		else if (t.cs() == "LyX" || t.cs() == "TeX"
@@ -1096,6 +1202,35 @@
 			end_inset(os);
 		}
 
+		else if ( t.cs() == "smallskip" ||
+		          t.cs() == "medskip" ||
+			  t.cs() == "bigskip" ||
+			  t.cs() == "vfill" ||
+		         (t.cs() == "vspace" && p.next_token().asInput() != "*")) {
+			string arg;
+			if (t.cs() == "vspace")
+				arg = p.getArg('{', '}');
+			else
+				arg = t.cs();
+			// We may only add the vspace to the current context if the
+			// current paragraph is not empty.
+			if (context.atParagraphStart()
+			    && (p.next_token().cat() != catNewline || p.next_token().cs().size() == 1)
+			    && (! (p.next_token().cat() == catEscape && p.next_token().cs() == "end"))
+			    && (! (p.next_token().cat() == catEscape && p.next_token().cs() == "par"))) {
+				context.extra_stuff += "\\added_space_top " + arg + " ";
+				p.skip_spaces();
+			} else {
+				if (t.cs() == "vspace")
+					handle_ert(os, t.asInput() + '{' + arg + '}', context);
+				else
+					handle_ert(os, t.asInput(), context);
+			}
+			// Would be nice to recognize added_space_bottom too...
+			// At the moment this is parsed as added_space_top of the
+			// next paragraph.
+		}
+
 		else if (t.cs() == "psfrag") {
 			// psfrag{ps-text}[ps-pos][tex-pos]{tex-text}
 			// TODO: Generalize this!
@@ -1103,7 +1238,6 @@
 			arguments += '}';
 			arguments += p.getOpt();
 			arguments += p.getOpt();
-			p.skip_spaces();
 			handle_ert(os, "\\psfrag{" + arguments, context);
 		}
 
@@ -1122,7 +1256,13 @@
 			handle_ert(os, s + ' ', context);
 			*/
 			context.check_layout(os);
-			handle_ert(os, t.asInput() + ' ', context);
+			string name = t.asInput();
+			if (p.next_token().asInput() == "*") {
+				// Starred commands like \vspace*{}
+				p.get_token();				// Eat '*'
+				name += '*';
+			}
+			handle_ert(os, name, context);
 		}
 
 		if (flags & FLAG_LEAVE) {

Re: [patch] tex2lyx whitespace changes

Reply via email to