Rev 2128 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
using System;using System.Collections;using System.Text;using Word;// TODO//// 1) Make more efficient by using style IDs instead of names// 2) Possible let user and a tag simply with </>namespace EA_DocGen{/// <summary>/// Summary description for TextParser./// </summary>public class TextParser{// An embedded formatting tag can be one of two types// * An EA_DocGen special format// * An MS-Word Style based formatprivate enum style_type_e{STYLE_EA_DOCGEN = 0,STYLE_MS_WORD,STYLE_UNKNOWN = -1};// A token is a block of text associated with a tag (style) name and typeprivate struct token_type{public string txt;public style_type_e styleType;public string styleName;public int gen1;};// Some formatting has to be applied after ALL the text of a description has been// appended to the word document. For this formatting, we need to remember the word// ranges that denote the text and the style name to apply.private struct postFormat_type{public postFormat_type(Word.Range wr, token_type tk){m_wr = wr;m_tk = tk;}public Word.Range m_wr;public token_type m_tk;};// Use a hash table for recording the allowed tags and their attributes, facilitating rapid// lookup during parsing.private static Hashtable styleDefs = null;/// <summary>/// Class initialisation function/// </summary>public static void initialise(){// initialsie the hash tablestyleDefs = new Hashtable();// This list dictates what tags a user can use in the notes text of an EA element. When adding new items// to this list, we have to update the parsing function obviously.// The hash key is the short tag name that end-users will use in their descriptions. Tags found in// user text is matched to these keys, and the style definition if found can then be used.// EA_DocGen tagsstyleDefs.Add( EA_Constants.EA_DocGenTable, formStyleDef( style_type_e.STYLE_EA_DOCGEN, EA_Constants.EA_DocGenTable, 0 ) );// MS-Word formatting tagsstyleDefs.Add( "b", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Bold ,0) );styleDefs.Add( "i", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Italic ,0) );styleDefs.Add( "u", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Underline ,0) );styleDefs.Add( "lb0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet0,1 ) );styleDefs.Add( "lb1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet1,2 ) );styleDefs.Add( "lb2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet2,3 ) );styleDefs.Add( "lb3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet3,4 ) );styleDefs.Add( "lb4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet4,5 ) );styleDefs.Add( "ln0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber0,1 ) );styleDefs.Add( "ln1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber1,2 ) );styleDefs.Add( "ln2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber2,3 ) );styleDefs.Add( "ln3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber3,4 ) );styleDefs.Add( "ln4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber4,5 ) );styleDefs.Add( "li0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent0,1 ) );styleDefs.Add( "li1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent1,2 ) );styleDefs.Add( "li2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent2,3 ) );styleDefs.Add( "li3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent3,4 ) );styleDefs.Add( "li4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent4,5 ) );styleDefs.Add( "la0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList0, 1 ) );styleDefs.Add( "la1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList1, 2 ) );styleDefs.Add( "la2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList2, 3 ) );styleDefs.Add( "la3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList3, 4 ) );styleDefs.Add( "la4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList4, 5 ) );styleDefs.Add( "code", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_CodeText, 0 ) );styleDefs.Add( "normal", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Normal , 0 ) );styleDefs.Add( "note", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Note , 0 ) );styleDefs.Add( "warn", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Warning , 0 ) );}/// <summary>/// Helper for initialise() function/// </summary>/// <param name="styleType"></param>/// <param name="styleName"></param>/// <returns></returns>private static token_type formStyleDef(style_type_e styleType, string styleName, int genvar1){token_type tokenType = new token_type();tokenType.styleType = styleType;tokenType.styleName = styleName;tokenType.txt = null;tokenType.gen1 = genvar1;return tokenType;}/// <summary>/// Parse the notes of an element and use the results to form document content./// </summary>/// <param name="theElement"></param>/// <param name="defaultStyle"></param>/// <returns></returns>public static bool parse(string s, int id, string defaultStyle, float indent_pts, bool continuation){int pos;int pos_LeftBracket = 0;int pos_RightBracket = 0;int pos_tagName;int pos_ContentStart;int pos_ContentEnd;bool lookingForTagEnd;bool foundError = false;Word.Range wr_body;// get relative (to 2.5cm) indentation user has commanded. At 2.5cm, the pts value is 70.866. We only indent, never outdent// so end stop at 0.float relative_indent_adjustment = 0;if (indent_pts > 0){relative_indent_adjustment = indent_pts - (float)70.866;if (relative_indent_adjustment < 0)relative_indent_adjustment = 0;}token_type token;ArrayList tokens = new ArrayList();// default starting token - may be updated latertoken.styleName = EA_Constants.styleName_Body1;token.styleType = style_type_e.STYLE_MS_WORD;token.txt = null;token.gen1 = 0;lookingForTagEnd = false;pos_ContentStart = 0;// look for a tagpos = s.IndexOf("<", 0);while ((pos >= 0) && (pos < s.Length)){// record position of tagpos_LeftBracket = pos;// tag name begins at the next charpos_tagName = pos_LeftBracket + 1;// Check if this is a closing tagbool isEnding = false;if (pos < (s.Length-1)){if (s[pos+1] == '/'){// skip past the / charisEnding = true;pos_tagName++;}}// We found a possible tag, now figure out if this is one of the tags we recognisebool found = false;// look for the closing bracket of the tagpos = s.IndexOf(">", pos+1);if (pos >= 0){found = styleDefs.Contains(s.Substring(pos_tagName, pos - pos_tagName));}else{// Cannot find any '>' so we should just exit the loopbreak;}// if the tag was recognisedif (found){// record position of the closing bracket of the tagpos_RightBracket = pos;// if this is an end tag, ie. </tagname>if (isEnding){pos_ContentEnd = pos_LeftBracket - 1; // not sure if we really need to compute pos_ContentEnd// check for out of sequence errorif (!lookingForTagEnd){if (!foundError){Main.WriteOutput( string.Format("ERROR, Found out of sequence style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);foundError = true;}}else{// Check that the end tag matches the start tagtoken_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);if (token.styleName == tt.styleName &&token.styleType == tt.styleType){// Update the token's text field and add the now complete token to our list// for processing a little later on.token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);tokens.Add(token);// re-initialise token for next tag searchtoken.styleName = EA_Constants.styleName_Body1;token.styleType = style_type_e.STYLE_MS_WORD;token.txt = null;lookingForTagEnd = false;pos_ContentStart = pos_RightBracket + 1;}else{// end tag does not seem to be the same as the starting tag, so ignore itif (!foundError){Main.WriteOutput(string.Format("ERROR, Found unmatched style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);foundError = true;}}}}else{// If there is content prior to now that has not been consumed, tokenise it nowif ((pos_LeftBracket - pos_ContentStart) > 0){token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);tokens.Add(token);}if (lookingForTagEnd){if (!foundError){Main.WriteOutput(string.Format("ERROR, Found nested style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);foundError = true;}}else{// update the token variable with this tags atributes from the hash table lookuptoken_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);token.styleName = tt.styleName;token.styleType = tt.styleType;token.gen1 = tt.gen1;token.txt = null; // we dont know what the text content will be yet. This is obtained when we encounter the end tagpos_ContentStart = pos_RightBracket + 1;lookingForTagEnd = true;}}}else{// the tag was not recognised so for now we just treat it as if it were plain text and continue//pos++;}// look for next tagpos = s.IndexOf("<", pos);} // end of the loop// POST-LOOP operations// take care of the last token, if there is oneif (pos_ContentStart < s.Length){// Update the token's text fieldtoken.txt = s.Substring(pos_ContentStart, s.Length - pos_ContentStart);tokens.Add(token);}if (lookingForTagEnd){if (!foundError){Main.WriteOutput(string.Format("ERROR, Found incomplete style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);foundError = true;}}ArrayList postFormats = new ArrayList();// Now process all the tokens we have foundforeach (token_type tt in tokens){if (tt.txt != null && tt.txt.Length > 0){switch (tt.styleType){case style_type_e.STYLE_EA_DOCGEN:switch (tt.styleName){case EA_Constants.EA_DocGenTable:TabularContent.processTableElement(tt.txt, 0, indent_pts);continuation = false;// flag list numbering restartpostFormats.Add( new postFormat_type(null, tt) );break;default:break;}break;case style_type_e.STYLE_MS_WORD:switch (tt.styleName){// List all formatting that has to be done after all of the text has been inserted into the// document ie. post-formattingcase EA_Constants.styleName_Bold:case EA_Constants.styleName_Italic:case EA_Constants.styleName_Underline:case EA_Constants.styleName_ListBullet0:case EA_Constants.styleName_ListBullet1:case EA_Constants.styleName_ListBullet2:case EA_Constants.styleName_ListBullet3:case EA_Constants.styleName_ListBullet4:case EA_Constants.styleName_ListNumber0:case EA_Constants.styleName_ListNumber1:case EA_Constants.styleName_ListNumber2:case EA_Constants.styleName_ListNumber3:case EA_Constants.styleName_ListNumber4:case EA_Constants.styleName_ListIndent0:case EA_Constants.styleName_ListIndent1:case EA_Constants.styleName_ListIndent2:case EA_Constants.styleName_ListIndent3:case EA_Constants.styleName_ListIndent4:case EA_Constants.styleName_AlphaList0 :case EA_Constants.styleName_AlphaList1 :case EA_Constants.styleName_AlphaList2 :case EA_Constants.styleName_AlphaList3 :case EA_Constants.styleName_AlphaList4 :wr_body = TextualContent.appendAndSelectText( tt.txt, defaultStyle, continuation );continuation = true;if (wr_body.Characters.Last.Text.Equals("\r"))wr_body.End = wr_body.End - 1; // don't format the \r char at the end - doing so causes wierd ms-word exceptions later onpostFormats.Add( new postFormat_type(wr_body, tt) );break;case EA_Constants.styleName_CodeText:case EA_Constants.styleName_Normal :case EA_Constants.styleName_Note :case EA_Constants.styleName_Warning :wr_body = TextualContent.appendAndSelectText( tt.txt, tt.styleName, continuation );continuation = true;if (indent_pts > 0)wr_body.ParagraphFormat.LeftIndent = indent_pts;// flag list numbering restartpostFormats.Add( new postFormat_type(null, tt) );break;// List all other formatting that can be done immediately.case EA_Constants.styleName_Body1:wr_body = TextualContent.appendAndSelectText( tt.txt, defaultStyle, continuation );continuation = true;if (indent_pts > 0)wr_body.ParagraphFormat.LeftIndent = indent_pts;// flag list numbering restart if this is printable text.if (tt.txt.Trim().Length > 0)postFormats.Add( new postFormat_type(null, tt) );break;default:break;}break;default:break;}}}// Now apply post formatting commands to text already serialised in previous loopint last_list_level = 0;foreach (postFormat_type pf in postFormats){object style;// a null word range implies we must restart numbering for any listsif (pf.m_wr == null){last_list_level = 0;}else{switch (pf.m_tk.styleName){case EA_Constants.styleName_Bold:pf.m_wr.Select();createWordDoc.WordApp.Selection.Range.Bold = 1;last_list_level = 0;break;case EA_Constants.styleName_Italic:pf.m_wr.Select();createWordDoc.WordApp.Selection.Range.Italic = 1;last_list_level = 0;break;case EA_Constants.styleName_Underline:pf.m_wr.Select();createWordDoc.WordApp.Selection.Range.Underline = Word.WdUnderline.wdUnderlineSingle;last_list_level = 0;break;case EA_Constants.styleName_ListBullet0:case EA_Constants.styleName_ListBullet1:case EA_Constants.styleName_ListBullet2:case EA_Constants.styleName_ListBullet3:case EA_Constants.styleName_ListBullet4:case EA_Constants.styleName_ListIndent0:case EA_Constants.styleName_ListIndent1:case EA_Constants.styleName_ListIndent2:case EA_Constants.styleName_ListIndent3:case EA_Constants.styleName_ListIndent4:case EA_Constants.styleName_ListNumber0:case EA_Constants.styleName_ListNumber1:case EA_Constants.styleName_ListNumber2:case EA_Constants.styleName_ListNumber3:case EA_Constants.styleName_ListNumber4:case EA_Constants.styleName_AlphaList0 :case EA_Constants.styleName_AlphaList1 :case EA_Constants.styleName_AlphaList2 :case EA_Constants.styleName_AlphaList3 :case EA_Constants.styleName_AlphaList4 :style = pf.m_tk.styleName;pf.m_wr.Select();createWordDoc.WordApp.Selection.Range.set_Style(ref style);//Main.WriteOutput(string.Format("last list level {0}, this list level {1}", last_list_level, pf.m_tk.gen1), -1);// Figure out if we have to restart numberingif (last_list_level < pf.m_tk.gen1){Word.ListTemplate lt;object continuePreviousList = false;object applyTo = Word.WdListApplyTo.wdListApplyToWholeList;object defListBehavour = Word.WdDefaultListBehavior.wdWord10ListBehavior;if ((pf.m_tk.styleName.IndexOf("Alpha") >= 0) || (pf.m_tk.styleName.IndexOf("Number") >= 0)){lt = createWordDoc.WordApp.Selection.Range.ListFormat.ListTemplate;createWordDoc.WordApp.Selection.Range.ListFormat.ApplyListTemplate(lt, ref continuePreviousList, ref applyTo, ref defListBehavour);}}// shift content right by relative indent adjustment we calculated earlierif (relative_indent_adjustment > 0){createWordDoc.WordApp.Selection.Range.ParagraphFormat.LeftIndent += relative_indent_adjustment;}last_list_level = pf.m_tk.gen1;break;default:break;}}}return true;}}}