Author: af
Date: Thu Jun 12 11:01:20 2014
New Revision: 1602118
URL: http://svn.apache.org/r1602118
Log:
125035: Added support for actions to the experimental Java parser.
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionDescriptor.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionIterator.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionManager.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionTrigger.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeValues.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/IAction.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java
Modified:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionDescriptor.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionDescriptor.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionDescriptor.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionDescriptor.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,93 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.Vector;
+
+/** Container of all actions that are associated with a single state.
+ */
+public class ActionDescriptor
+{
+ public ActionDescriptor (
+ final int nStateId,
+ final String sName)
+ {
+ msStateName = sName;
+
+ maElementStartActions = null;
+ maElementEndActions = null;
+ maTextActions = null;
+ }
+
+
+
+
+ public void AddAction (
+ final IAction aAction,
+ final ActionTrigger eTrigger)
+ {
+ GetActionsForTrigger(eTrigger, true).add(aAction);
+ }
+
+
+
+
+ public Iterable<IAction> GetActions (
+ final ActionTrigger eTrigger)
+ {
+ return GetActionsForTrigger(eTrigger, false);
+ }
+
+
+
+
+ @Override
+ public String toString ()
+ {
+ return "actions for state "+msStateName;
+ }
+
+
+
+
+ private Vector<IAction> GetActionsForTrigger (
+ final ActionTrigger eTrigger,
+ final boolean bCreateWhenMissing)
+ {
+ Vector<IAction> aActions = null;
+ switch(eTrigger)
+ {
+ case ElementStart:
+ aActions = maElementStartActions;
+ if (bCreateWhenMissing && aActions==null)
+ {
+ aActions = new Vector<>();
+ maElementStartActions = aActions;
+ }
+ break;
+ case ElementEnd:
+ aActions = maElementEndActions;
+ if (bCreateWhenMissing && aActions==null)
+ {
+ aActions = new Vector<>();
+ maElementEndActions = aActions;
+ }
+ break;
+ case Text:
+ aActions = maTextActions;
+ if (bCreateWhenMissing && aActions==null)
+ {
+ aActions = new Vector<>();
+ maTextActions = aActions;
+ }
+ break;
+ }
+ return aActions;
+ }
+
+
+
+
+ private final String msStateName;
+ private Vector<IAction> maElementStartActions;
+ private Vector<IAction> maElementEndActions;
+ private Vector<IAction> maTextActions;
+}
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionIterator.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionIterator.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionIterator.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionIterator.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,97 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.Iterator;
+
+/** Iterate over two sources of actions, both given as an Iterable<IAction>
+ * object that can be null.
+*/
+public class ActionIterator implements Iterable<IAction>
+{
+ public ActionIterator (
+ final Iterable<IAction> aOneStateActions,
+ final Iterable<IAction> aAllStateActions)
+ {
+ maOneStateActions = aOneStateActions;
+ maAllStateActions = aAllStateActions;
+ }
+
+
+
+
+ @Override public Iterator<IAction> iterator()
+ {
+ return new Iterator<IAction>()
+ {
+ Iterator<IAction> maIterator = null;
+ int mnPhase = 0;
+
+ @Override
+ public boolean hasNext()
+ {
+ while(true)
+ {
+ if (mnPhase == 2)
+ return false;
+ else if (mnPhase == 0)
+ {
+ if (maIterator == null)
+ if (maOneStateActions == null)
+ {
+ mnPhase = 1;
+ continue;
+ }
+ else
+ maIterator = maOneStateActions.iterator();
+ if (maIterator.hasNext())
+ return true;
+ else
+ {
+ maIterator = null;
+ mnPhase = 1;
+ }
+ }
+ else if (mnPhase == 1)
+ {
+ if (maIterator == null)
+ if (maAllStateActions == null)
+ {
+ mnPhase = 2;
+ return false;
+ }
+ else
+ maIterator = maAllStateActions.iterator();
+ if (maIterator.hasNext())
+ return true;
+ else
+ {
+ mnPhase = 2;
+ }
+ }
+ }
+ }
+
+
+
+
+ @Override
+ public IAction next()
+ {
+ return maIterator.next();
+ }
+
+
+
+
+ @Override
+ public void remove()
+ {
+ }
+ };
+ }
+
+
+
+
+ private final Iterable<IAction> maOneStateActions;
+ private final Iterable<IAction> maAllStateActions;
+}
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionManager.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionManager.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionManager.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionManager.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,142 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/** Manage actions that are bound to states and XML events.
+ */
+public class ActionManager
+{
+ ActionManager (
+ final NameMap aStateNameToIdMap)
+ {
+ maStateNameToIdMap = aStateNameToIdMap;
+ maAllStatesActions = new ActionDescriptor(0,"*");
+ maStateToActionsMap = new HashMap<>();
+ }
+
+
+
+
+ /** Add an action for an element start.
+ * @param sStateSelector
+ * The element is specified via a state name. This allows one element
+ * that leads to different complex types to have different actions,
+ * depending on the complex type.
+ * The selector value can be a full state name (including the
namespace
+ * prefix and CT prefix, e.g. w06_CT_Table) or a regular expression
+ * (e.g. .*_CT_Table to match w06_CT_Table and w12_CT_Table).
+ * The action is bound to all matching states.
+ * @param aAction
+ * The action to call on entering any of the states that match the
+ * selector.
+ */
+ public void AddElementStartAction (
+ final String sStateSelector,
+ final IAction aAction)
+ {
+ AddAction(sStateSelector, aAction, ActionTrigger.ElementStart);
+ }
+
+
+
+
+ /** Add an action for an element end.
+ * @see AddElementStartAction.
+ */
+ public void AddElementEndAction (
+ final String sStateSelector,
+ final IAction aAction)
+ {
+ AddAction(sStateSelector, aAction, ActionTrigger.ElementEnd);
+ }
+
+
+
+
+ /** Add an action for XML text events.
+ * @see AddElementStartAction.
+ */
+ public void AddTextAction (
+ final String sStateSelector,
+ final IAction aAction)
+ {
+ AddAction(sStateSelector, aAction, ActionTrigger.Text);
+ }
+
+
+
+
+ /** Return an iterable object that gives access to all actions
+ * bound to the given state and trigger.
+ * Return value can be null when there are no actions bound to the state
+ * and trigger.
+ */
+ public Iterable<IAction> GetActions (
+ final int nStateId,
+ final ActionTrigger eTrigger)
+ {
+ final ActionDescriptor aOneStateActionsDescriptor =
maStateToActionsMap.get(nStateId);
+ final Iterable<IAction> aOneStateActions =
aOneStateActionsDescriptor!=null
+ ? aOneStateActionsDescriptor.GetActions(eTrigger)
+ : null;
+ final Iterable<IAction> aAllStateActions =
maAllStatesActions.GetActions(eTrigger);
+
+ if (aOneStateActions == null)
+ return aAllStateActions;
+ else if (aAllStateActions == null)
+ return aOneStateActions;
+ else
+ return new ActionIterator(aOneStateActions, aAllStateActions);
+ }
+
+
+
+
+ private void AddAction (
+ final String sStateSelector,
+ final IAction aAction,
+ final ActionTrigger eTrigger)
+ {
+ if (sStateSelector.equals("*"))
+ {
+ // Simple optimization when an action is defined for all states.
+ maAllStatesActions.AddAction(aAction, eTrigger);
+ }
+ else if (sStateSelector.contains("*") || sStateSelector.contains("?"))
+ {
+ // The state selector contains wildcards. We have to iterate over
+ // all state names to find the matching ones.
+ for (final int nStateId :
maStateNameToIdMap.GetMatchingStateIds(sStateSelector))
+ {
+ GetActionDescriptor(nStateId).AddAction(aAction, eTrigger);
+ }
+ }
+ else
+ {
+ final int nStateId =
maStateNameToIdMap.GetIdForName(sStateSelector);
+ GetActionDescriptor(nStateId).AddAction(aAction, eTrigger);
+ }
+ }
+
+
+
+
+ private ActionDescriptor GetActionDescriptor (final int nStateId)
+ {
+ ActionDescriptor aDescriptor = maStateToActionsMap.get(nStateId);
+ if (aDescriptor == null)
+ {
+ aDescriptor = new ActionDescriptor(nStateId,
maStateNameToIdMap.GetNameForId(nStateId));
+ maStateToActionsMap.put(nStateId, aDescriptor);
+ }
+ return aDescriptor;
+ }
+
+
+
+
+ private final NameMap maStateNameToIdMap;
+ private final ActionDescriptor maAllStatesActions;
+ private final Map<Integer,ActionDescriptor> maStateToActionsMap;
+}
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionTrigger.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionTrigger.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionTrigger.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ActionTrigger.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,10 @@
+package org.apache.openoffice.ooxml.parser;
+
+/** An enumeration of all supported action triggers.
+ */
+public enum ActionTrigger
+{
+ ElementStart,
+ ElementEnd,
+ Text
+}
\ No newline at end of file
Modified:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java?rev=1602118&r1=1602117&r2=1602118&view=diff
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
(original)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
Thu Jun 12 11:01:20 2014
@@ -89,10 +89,12 @@ public class AttributeManager
/** For the state with id nStateId, match the attributes from the document
* with the attribute specifications of that state.
*/
- public void ParseAttributes (
+ public AttributeValues ParseAttributes (
final int nStateId,
final AttributeProvider aDocumentAttributes)
{
+ final AttributeValues aValues = new AttributeValues();
+
final Map<Integer,AttributeDescriptor> aAttributesPerState =
maStateIdToAttributesMap.get(nStateId);
if (aAttributesPerState == null)
{
@@ -120,6 +122,8 @@ public class AttributeManager
aEntry[2],
aAttributesPerState);
aUsedAttributes.add(aAttributeDescriptor);
+ aValues.AddAttribute(aAttributeDescriptor, aEntry[2]);
+
if (Log.Dbg != null)
{
if (aAttributeDescriptor == null)
@@ -147,6 +151,8 @@ public class AttributeManager
}
}
}
+
+ return aValues;
}
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeValues.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeValues.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeValues.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeValues.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,49 @@
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+/** Container of attribute values of an opening tag.
+ */
+public class AttributeValues
+{
+ AttributeValues ()
+ {
+ maAttributes = new TreeMap<>();
+ }
+
+
+
+
+ public void AddAttribute (
+ final AttributeDescriptor aAttributeDescriptor,
+ final String sValue)
+ {
+ maAttributes.put(
+ aAttributeDescriptor.GetName(),
+ sValue);
+ }
+
+
+
+
+ public Iterable<Entry<String,Object>> GetAttributes ()
+ {
+ return maAttributes.entrySet();
+ }
+
+
+
+
+ public int GetAttributeCount ()
+ {
+ return maAttributes.size();
+ }
+
+
+
+
+ private Map<String,Object> maAttributes;
+}
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,65 @@
+package org.apache.openoffice.ooxml.parser;
+
+/** Context that has the same life time (by default) as the element it
represents.
+ * Gives access to the attribute values and the parent context.
+ */
+public class ElementContext
+{
+ ElementContext (
+ final String sElementName,
+ final String sTypeName,
+ final boolean bIsSkipping,
+ final AttributeValues aValues,
+ final ElementContext aParentContext)
+ {
+ msElementName = sElementName;
+ msTypeName = sTypeName;
+ mbIsSkipping = bIsSkipping;
+ maAttributeValues = aValues;
+ maParentContext = aParentContext;
+ }
+
+
+
+
+ public String GetElementName ()
+ {
+ return msElementName;
+ }
+
+
+
+
+ public String GetTypeName ()
+ {
+ return msTypeName;
+ }
+
+
+
+
+ public AttributeValues GetAttributes ()
+ {
+ return maAttributeValues;
+ }
+
+
+
+
+ /** Return the context of the parent element.
+ * Can be null when there is no parent element.
+ */
+ public ElementContext GetParentContext ()
+ {
+ return maParentContext;
+ }
+
+
+
+
+ private final String msElementName;
+ private final String msTypeName;
+ private final boolean mbIsSkipping;
+ private final AttributeValues maAttributeValues;
+ private final ElementContext maParentContext;
+}
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/IAction.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/IAction.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/IAction.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/IAction.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,27 @@
+package org.apache.openoffice.ooxml.parser;
+
+import javax.xml.stream.Location;
+
+/** Interface for actions that are bound to states and triggered by XML
events.
+ */
+public interface IAction
+{
+ /** Callback for a single XML event.
+ * @param eTrigger
+ * Equivalent to the XML event type.
+ * @param aContext
+ * The context of the element that was just entered (element start),
+ * is about to be left (element end) or is currently active (all other
+ * events).
+ * @param sText
+ * Contains text for ActionTrigger.Text. Is null for all other
+ * triggers.
+ * @param aLocation
+ * The location in the source file that triggered the XML event.
+ */
+ void Run (
+ final ActionTrigger eTrigger,
+ final ElementContext aContext,
+ final String sText,
+ final Location aLocation);
+}
Modified:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java?rev=1602118&r1=1602117&r2=1602118&view=diff
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
(original)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
Thu Jun 12 11:01:20 2014
@@ -23,6 +23,7 @@ package org.apache.openoffice.ooxml.pars
import java.util.HashMap;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Vector;
public class NameMap
@@ -77,6 +78,22 @@ public class NameMap
+
+ /** Return the ids of all states whose names match the given pattern.
+ */
+ public Vector<Integer> GetMatchingStateIds (final String sPattern)
+ {
+ final Vector<Integer> aStateIds = new Vector<>();
+ for (final Entry<String,Integer> aEntry : maNameToIdMap.entrySet())
+ {
+ if (aEntry.getKey().matches(sPattern))
+ aStateIds.add(aEntry.getValue());
+ }
+ return aStateIds;
+ }
+
+
+
private final Map<String,Integer> maNameToIdMap;
private final Vector<String> maIdToNameMap;
Modified:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java?rev=1602118&r1=1602117&r2=1602118&view=diff
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
(original)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
Thu Jun 12 11:01:20 2014
@@ -24,12 +24,13 @@ package org.apache.openoffice.ooxml.pars
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
+import javax.xml.stream.Location;
/** This OOXML parser is based on the output of the schema parser.
* It exists to debug the schema parser and as illustration and preparation of
@@ -39,6 +40,10 @@ import javax.xml.stream.XMLStreamReader;
*/
public class OOXMLParser
{
+ class ActionContext
+ {
+ public Map<String,Integer> TypeCounts = new TreeMap<>();
+ }
/** The parser is called with two arguments:
* - A path to where the parser tables with the states and transitions can
* be found.
@@ -62,24 +67,41 @@ public class OOXMLParser
Log.Dbg = null;
System.out.printf("writing no log data\n");
}
-
+
+ new OOXMLParser(aArgumentList[0], aArgumentList[1]);
+ }
+
+
+
+ private OOXMLParser (
+ final String sParseTableFilename,
+ final String sInputFilename)
+ {
long nStartTime = System.currentTimeMillis();
- final StateMachine aMachine = new StateMachine(new
File(aArgumentList[0]));
- final InputStream aIn = GetInputStream(aArgumentList[1]);
- final XMLStreamReader aReader = GetStreamReader(aIn, aArgumentList[1]);
+ final StateMachine aMachine = new StateMachine(new
File(sParseTableFilename));
+ final InputStream aIn = GetInputStream(sInputFilename);
long nEndTime = System.currentTimeMillis();
+
+ final ActionContext aActionContext = new ActionContext();
+ AddSomeActions(aMachine.GetActionManager(), aActionContext);
+
System.out.printf("initialzed parser in %fs\n",
(nEndTime-nStartTime)/1000.0);
try
{
- if (aReader != null)
+ nStartTime = System.currentTimeMillis();
+ final Parser aParser = new Parser(aMachine, aIn);
+ aParser.Parse();
+ final int nElementCount = aParser.GetElementCount();
+ nEndTime = System.currentTimeMillis();
+ System.out.printf("parsed %d elements in %fs\n",
+ nElementCount,
+ (nEndTime-nStartTime)/1000.0);
+
+ System.out.printf("%d different elements found:\n",
aActionContext.TypeCounts.size());
+ for (final Entry<String, Integer> aEntry :
aActionContext.TypeCounts.entrySet())
{
- nStartTime = System.currentTimeMillis();
- final int nElementCount = Parse(aReader, aMachine);
- nEndTime = System.currentTimeMillis();
- System.out.printf("parsed %d elements in %fs\n",
- nElementCount,
- (nEndTime-nStartTime)/1000.0);
+ System.out.printf("%-32s : %6d\n", aEntry.getKey(),
aEntry.getValue());
}
}
catch (final Exception aException)
@@ -91,6 +113,71 @@ public class OOXMLParser
+ private static void AddSomeActions (
+ final ActionManager aActionManager,
+ final ActionContext aActionContext)
+ {
+ aActionManager.AddElementStartAction(
+ "*",
+ new IAction()
+ {
+ @Override public void Run(
+ final ActionTrigger eTrigger,
+ final ElementContext aContext,
+ final String sText,
+ final Location aLocation)
+ {
+ Integer nValue =
aActionContext.TypeCounts.get(aContext.GetTypeName());
+ if (nValue == null)
+ nValue = 1;
+ else
+ ++nValue;
+ aActionContext.TypeCounts.put(aContext.GetTypeName(),
nValue);
+ }
+ }
+ );
+ aActionManager.AddElementStartAction(
+ ".*CT_Shd",
+ new IAction()
+ {
+ @Override public void Run(
+ final ActionTrigger eTrigger,
+ final ElementContext aContext,
+ final String sText,
+ final Location aLocation)
+ {
+ System.out.printf("processing %s of element %s at position
%d\n",
+ eTrigger,
+ aContext.GetElementName(),
+ aLocation.getCharacterOffset());
+
+ if (aContext.GetAttributes().GetAttributeCount() == 0)
+ System.out.printf(" no attributes\n");
+ else
+ for (final Entry<String,Object> aAttribute :
aContext.GetAttributes().GetAttributes())
+ System.out.printf(" %s -> %s\n",
aAttribute.getKey(), aAttribute.getValue());
+ }
+ }
+ );
+ aActionManager.AddTextAction(
+ ".*CT_Text",
+ new IAction()
+ {
+ @Override public void Run(
+ final ActionTrigger eTrigger,
+ final ElementContext aContext,
+ final String sText,
+ final Location aLocation)
+ {
+ System.out.printf("%s text \"%s\"\n",
aContext.GetTypeName(), sText.replace("\n", "\\n"));
+ }
+ }
+ );
+ }
+
+
+
+
private static InputStream GetInputStream (final String sInputName)
{
final InputStream aIn;
@@ -126,178 +213,4 @@ public class OOXMLParser
}
return aIn;
}
-
-
-
-
- private static XMLStreamReader GetStreamReader (
- final InputStream aIn,
- final String sDescription)
- {
- if (aIn == null)
- return null;
-
- try
- {
- final XMLInputFactory aFactory =
(XMLInputFactory)XMLInputFactory.newInstance();
-
aFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
-
aFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
- aFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
-
- return (XMLStreamReader)aFactory.createXMLStreamReader(
- sDescription,
- aIn);
- }
- catch (final Exception aException)
- {
- aException.printStackTrace();
- return null;
- }
- }
-
-
-
-
- private static int Parse (
- final XMLStreamReader aReader,
- final StateMachine aMachine)
- {
- int nElementCount = 0;
- try
- {
- final AttributeProvider aAttributeProvider = new
AttributeProvider(aReader);
- while (aReader.hasNext())
- {
- final int nCode = aReader.next();
- switch(nCode)
- {
- case XMLStreamReader.START_ELEMENT:
- ++nElementCount;
- if (aMachine.IsInSkipState())
- {
- if (Log.Dbg != null)
- Log.Dbg.printf("is skip state -> starting to
skip\n");
- nElementCount += Skip(aReader);
- }
- else if ( ! aMachine.ProcessStartElement(
- aReader.getNamespaceURI(),
- aReader.getLocalName(),
- aReader.getLocation(),
- aAttributeProvider))
- {
- if (Log.Dbg != null)
- Log.Dbg.printf("starting to skip to recover
from error\n");
- nElementCount += Skip(aReader);
- }
- break;
-
- case XMLStreamReader.END_ELEMENT:
- aMachine.ProcessEndElement(
- aReader.getNamespaceURI(),
- aReader.getLocalName(),
- aReader.getLocation());
- break;
-
- case XMLStreamReader.CHARACTERS:
- final String sText = aReader.getText();
- if (Log.Dbg != null)
- Log.Dbg.printf("text [%s]\n", sText.replace("\n",
"\\n"));
- aMachine.ProcessCharacters(sText);
- break;
-
- case XMLStreamReader.END_DOCUMENT:
- Log.Std.printf("--- end of document ---\n");
- break;
-
- default:
- Log.Err.printf("can't handle XML event of type %d\n",
nCode);
- }
- }
-
- aReader.close();
- }
- catch (final XMLStreamException aException)
- {
- aException.printStackTrace();
- }
-
- return nElementCount;
- }
-
-
-
-
- private static int Skip (final XMLStreamReader aReader)
- {
- if (Log.Dbg != null)
- {
- Log.Dbg.printf("starting to skip on %s at L%dC%d\n",
- aReader.getLocalName(),
- aReader.getLocation().getLineNumber(),
- aReader.getLocation().getColumnNumber());
- Log.Dbg.IncreaseIndentation();
- }
-
- // We are called when processing a start element. This means that we
are
- // already at relative depth 1.
- int nRelativeDepth = 1;
- int nElementCount = 0;
- try
- {
- while (aReader.hasNext())
- {
- final int nCode = aReader.next();
- switch (nCode)
- {
- case XMLStreamReader.START_ELEMENT:
- ++nRelativeDepth;
- ++nElementCount;
- if (Log.Dbg != null)
- {
- Log.Dbg.printf("skipping start element %s\n",
aReader.getLocalName());
- Log.Dbg.IncreaseIndentation();
- }
- break;
-
- case XMLStreamReader.END_ELEMENT:
- --nRelativeDepth;
- if (Log.Dbg != null)
- Log.Dbg.DecreaseIndentation();
- if (nRelativeDepth <= 0)
- {
- if (Log.Dbg != null)
- Log.Dbg.printf("leaving skip mode on %s\n",
aReader.getLocalName());
- return nElementCount;
- }
- break;
-
- case XMLStreamReader.END_DOCUMENT:
- throw new RuntimeException("saw end of document while
skipping elements\n");
-
- case XMLStreamReader.CHARACTERS:
- SkipText(aReader.getText());
- break;
-
- default:
- if (Log.Dbg != null)
- Log.Dbg.printf("%s\n", nCode);
- break;
- }
- }
- }
- catch (final XMLStreamException aException)
- {
- aException.printStackTrace();
- }
- return nElementCount;
- }
-
-
-
-
- private static void SkipText (final String sText)
- {
- if (Log.Dbg != null)
- Log.Dbg.printf("skipping text [%s]\n", sText.replace("\n", "\\n"));
- }
}
Added:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java?rev=1602118&view=auto
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java
(added)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java
Thu Jun 12 11:01:20 2014
@@ -0,0 +1,203 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+/** This is the actual parser (where OOXMLParser is the front end that handles
+ * parameters given to the main method).
+ */
+public class Parser
+{
+ public Parser (
+ final StateMachine aMachine,
+ final InputStream aIn)
+ {
+ maMachine = aMachine;
+ maReader = GetStreamReader(aIn, "input");
+ mnElementCount = 0;
+ }
+
+
+
+
+ void Parse ()
+ {
+ try
+ {
+ final AttributeProvider aAttributeProvider = new
AttributeProvider(maReader);
+ while (maReader.hasNext())
+ {
+ final int nCode = maReader.next();
+ switch(nCode)
+ {
+ case XMLStreamReader.START_ELEMENT:
+ ++mnElementCount;
+ if (maMachine.IsInSkipState())
+ {
+ if (Log.Dbg != null)
+ Log.Dbg.printf("is skip state -> starting to
skip\n");
+ Skip();
+ }
+ else if ( ! maMachine.ProcessStartElement(
+ maReader.getNamespaceURI(),
+ maReader.getLocalName(),
+ maReader.getLocation(),
+ aAttributeProvider))
+ {
+ if (Log.Dbg != null)
+ Log.Dbg.printf("starting to skip to recover
from error\n");
+ Skip();
+ }
+ break;
+
+ case XMLStreamReader.END_ELEMENT:
+ maMachine.ProcessEndElement(
+ maReader.getNamespaceURI(),
+ maReader.getLocalName(),
+ maReader.getLocation());
+ break;
+
+ case XMLStreamReader.CHARACTERS:
+ maMachine.ProcessCharacters(
+ maReader.getText(),
+ maReader.getLocation());
+ break;
+
+ case XMLStreamReader.END_DOCUMENT:
+ Log.Std.printf("--- end of document ---\n");
+ break;
+
+ default:
+ Log.Err.printf("can't handle XML event of type %d\n",
nCode);
+ }
+ }
+
+ maReader.close();
+ }
+ catch (final XMLStreamException aException)
+ {
+ aException.printStackTrace();
+ }
+ }
+
+
+
+
+ public int GetElementCount ()
+ {
+ return mnElementCount;
+ }
+
+
+
+
+ private void Skip ()
+ {
+ if (Log.Dbg != null)
+ {
+ Log.Dbg.printf("starting to skip on %s at L%dC%d\n",
+ maReader.getLocalName(),
+ maReader.getLocation().getLineNumber(),
+ maReader.getLocation().getColumnNumber());
+ Log.Dbg.IncreaseIndentation();
+ }
+
+ // We are called when processing a start element. This means that we
are
+ // already at relative depth 1.
+ int nRelativeDepth = 1;
+ try
+ {
+ while (maReader.hasNext())
+ {
+ final int nCode = maReader.next();
+ switch (nCode)
+ {
+ case XMLStreamReader.START_ELEMENT:
+ ++nRelativeDepth;
+ ++mnElementCount;
+ if (Log.Dbg != null)
+ {
+ Log.Dbg.printf("skipping start element %s\n",
maReader.getLocalName());
+ Log.Dbg.IncreaseIndentation();
+ }
+ break;
+
+ case XMLStreamReader.END_ELEMENT:
+ --nRelativeDepth;
+ if (Log.Dbg != null)
+ Log.Dbg.DecreaseIndentation();
+ if (nRelativeDepth <= 0)
+ {
+ if (Log.Dbg != null)
+ Log.Dbg.printf("leaving skip mode on %s\n",
maReader.getLocalName());
+ return;
+ }
+ break;
+
+ case XMLStreamReader.END_DOCUMENT:
+ throw new RuntimeException("saw end of document while
skipping elements\n");
+
+ case XMLStreamReader.CHARACTERS:
+ SkipText(maReader.getText());
+ break;
+
+ default:
+ if (Log.Dbg != null)
+ Log.Dbg.printf("%s\n", nCode);
+ break;
+ }
+ }
+ }
+ catch (final XMLStreamException aException)
+ {
+ aException.printStackTrace();
+ }
+ }
+
+
+
+
+ private void SkipText (final String sText)
+ {
+ if (Log.Dbg != null)
+ Log.Dbg.printf("skipping text [%s]\n", sText.replace("\n", "\\n"));
+ }
+
+
+
+
+ private XMLStreamReader GetStreamReader (
+ final InputStream aIn,
+ final String sDescription)
+ {
+ if (aIn == null)
+ return null;
+
+ try
+ {
+ final XMLInputFactory aFactory =
(XMLInputFactory)XMLInputFactory.newInstance();
+
aFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
+
aFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
+ aFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
+
+ return (XMLStreamReader)aFactory.createXMLStreamReader(
+ sDescription,
+ aIn);
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ return null;
+ }
+ }
+
+
+
+
+ private final XMLStreamReader maReader;
+ private final StateMachine maMachine;
+ private int mnElementCount;
+}
Modified:
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
URL:
http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java?rev=1602118&r1=1602117&r2=1602118&view=diff
==============================================================================
---
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
(original)
+++
openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
Thu Jun 12 11:01:20 2014
@@ -47,21 +47,22 @@ public class StateMachine
aReader.GetSection("attribute"),
maNamespaceMap,
maNameMap);
-
- System.out.printf("read %d namespace, %d names, %d states (%d skip, %d
accept), %d transitions and %d attributes\n",
- maNamespaceMap.GetNamespaceCount(),
- maNameMap.GetNameCount(),
- maStateNameMap.GetNameCount(),
- maSkipStates.GetSkipStateCount(),
- maAcceptingStates.GetAcceptingStateCount(),
- maTransitions.GetTransitionCount(),
- maAttributeManager.GetAttributeCount());
-
mnStartStateId =
Integer.parseInt(aReader.GetSection("start-state").firstElement()[1]);
mnEndStateId =
Integer.parseInt(aReader.GetSection("end-state").firstElement()[1]);
mnCurrentStateId = mnStartStateId;
maStateStack = new Stack<>();
-
+ maElementContextStack = new Stack<>();
+ maActionManager = new ActionManager(maStateNameMap);
+
+ System.out.printf("read %d namespace, %d names, %d states (%d skip, %d
accept), %d transitions and %d attributes\n",
+ maNamespaceMap.GetNamespaceCount(),
+ maNameMap.GetNameCount(),
+ maStateNameMap.GetNameCount(),
+ maSkipStates.GetSkipStateCount(),
+ maAcceptingStates.GetAcceptingStateCount(),
+ maTransitions.GetTransitionCount(),
+ maAttributeManager.GetAttributeCount());
+
if (Log.Dbg != null)
Log.Dbg.printf("starting in state _start_ (%d)\n",
mnCurrentStateId);
}
@@ -79,20 +80,20 @@ public class StateMachine
try
{
- final NamespaceMap.NamespaceDescriptor aDescriptor =
maNamespaceMap.GetDescriptorForURI(sNamespaceURI);
+ final NamespaceMap.NamespaceDescriptor aNamespaceDescriptor =
maNamespaceMap.GetDescriptorForURI(sNamespaceURI);
final int nElementNameId = maNameMap.GetIdForName(sElementName);
if (Log.Dbg != null)
Log.Dbg.printf("%s:%s(%d:%d) L%dC%d\n",
- aDescriptor.Prefix,
+ aNamespaceDescriptor.Prefix,
sElementName,
- aDescriptor.Id,
+ aNamespaceDescriptor.Id,
nElementNameId,
aLocation.getLineNumber(),
aLocation.getColumnNumber());
final Transition aTransition = maTransitions.GetTransition(
mnCurrentStateId,
- aDescriptor.Id,
+ aNamespaceDescriptor.Id,
nElementNameId);
if (aTransition == null)
{
@@ -100,7 +101,7 @@ public class StateMachine
"can not find transition for state %s(%d) and element
%s(%d:%d) at L%dC%d\n",
maStateNameMap.GetNameForId(mnCurrentStateId),
mnCurrentStateId,
- aDescriptor.Id,
+ aNamespaceDescriptor.Id,
maNameMap.GetNameForId(nElementNameId),
nElementNameId,
aLocation.getLineNumber(),
@@ -123,10 +124,42 @@ public class StateMachine
Log.Dbg.printf("\n");
}
- final int nOldState = mnCurrentStateId;
- SetCurrentState(aTransition.GetEndStateId());
+ // Follow the transition to its end state but first process its
+ // content. We do that by
- ExecuteActions(aTransition, aAttributes, nOldState,
mnCurrentStateId);
+ if (Log.Dbg != null)
+ Log.Dbg.IncreaseIndentation();
+
+ // a) pushing the end state to the state stack so that on the
+ // end tag that corresponds to the current start tag it will
become the current state.
+ maStateStack.push(aTransition.GetEndStateId());
+
+ // b) entering the state that corresponds to start tag that
+ // we are currently processing.
+ mnCurrentStateId = aTransition.GetActionId();
+
+ // c) Prepare the attributes and store them in the new element
context.
+ final AttributeValues aAttributeValues =
maAttributeManager.ParseAttributes(
+ mnCurrentStateId,
+ aAttributes);
+
+ // d) creating a new ElementContext for the element that just
starts.
+ maElementContextStack.push(maCurrentElementContext);
+ final ElementContext aPreviousElementContext =
maCurrentElementContext;
+ maCurrentElementContext = new ElementContext(
+ sElementName,
+ maStateNameMap.GetNameForId(aTransition.GetActionId()),
+ false,
+ aAttributeValues,
+ aPreviousElementContext);
+
+ // e) and run all actions that are bound to the the current
start tag.
+ ExecuteActions(
+ mnCurrentStateId,
+ maCurrentElementContext,
+ ActionTrigger.ElementStart,
+ null,
+ aLocation);
bResult = true;
}
@@ -161,8 +194,22 @@ public class StateMachine
final NamespaceMap.NamespaceDescriptor aDescriptor =
maNamespaceMap.GetDescriptorForURI(sNamespaceURI);
- final int nOldStateId = mnCurrentStateId;
- SetCurrentState(maStateStack.pop());
+ // Leave the current element.
+
+ final int nPreviousStateId = mnCurrentStateId;
+ mnCurrentStateId = maStateStack.pop();
+ if (mnCurrentStateId == mnEndStateId)
+ mnCurrentStateId = mnStartStateId;
+
+ final ElementContext aPreviousElementContext = maCurrentElementContext;
+ maCurrentElementContext = maElementContextStack.pop();
+
+ ExecuteActions(
+ nPreviousStateId,
+ aPreviousElementContext,
+ ActionTrigger.ElementEnd,
+ null,
+ aLocation);
if (Log.Dbg != null)
{
@@ -173,8 +220,8 @@ public class StateMachine
aLocation.getLineNumber(),
aLocation.getColumnNumber());
Log.Dbg.printf(" %s(%d) <- %s(%d)\n",
- maStateNameMap.GetNameForId(nOldStateId),
- nOldStateId,
+ maStateNameMap.GetNameForId(nPreviousStateId),
+ nPreviousStateId,
maStateNameMap.GetNameForId(mnCurrentStateId),
mnCurrentStateId);
}
@@ -184,8 +231,19 @@ public class StateMachine
public void ProcessCharacters (
- final String sText)
+ final String sText,
+ final Location aLocation)
{
+ if (Log.Dbg != null)
+ Log.Dbg.printf("text [%s]\n", sText.replace("\n", "\\n"));
+
+ ExecuteActions(
+ mnCurrentStateId,
+ maCurrentElementContext,
+ ActionTrigger.Text,
+ sText,
+ aLocation);
+
}
@@ -198,35 +256,26 @@ public class StateMachine
-
- private void SetCurrentState (final int nState)
+
+ public ActionManager GetActionManager ()
{
- if (mnCurrentStateId != nState)
- {
- if (nState == mnEndStateId)
- mnCurrentStateId = mnStartStateId;
- else
- mnCurrentStateId = nState;
- }
+ return maActionManager;
}
private void ExecuteActions (
- final Transition aTransition,
- final AttributeProvider aAttributes,
- final int nOldState,
- final int nNewState)
+ final int nStateId,
+ final ElementContext aElementContext,
+ final ActionTrigger eTrigger,
+ final String sText,
+ final Location aLocation)
{
- maStateStack.push(mnCurrentStateId);
- if (Log.Dbg != null)
- Log.Dbg.IncreaseIndentation();
- final int nActionId = aTransition.GetActionId();
- SetCurrentState(nActionId);
- maAttributeManager.ParseAttributes(
- nActionId,
- aAttributes);
+ final Iterable<IAction> aActions =
maActionManager.GetActions(nStateId, eTrigger);
+ if (aActions != null)
+ for (final IAction aAction : aActions)
+ aAction.Run(eTrigger, aElementContext, sText, aLocation);
}
@@ -239,8 +288,11 @@ public class StateMachine
private final AttributeManager maAttributeManager;
private int mnCurrentStateId;
private Stack<Integer> maStateStack;
+ private ElementContext maCurrentElementContext;
+ private Stack<ElementContext> maElementContextStack;
private final int mnStartStateId;
private final int mnEndStateId;
private SkipStateTable maSkipStates;
private AcceptingStateTable maAcceptingStates;
+ private final ActionManager maActionManager;
}