WebSVN – DevTools – /EA_DocGen/trunk/EA_DocGen/TextParser.cs

using System;
using System.Collections;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Office.Interop.Word;
using Microsoft.Office.Core;


// TODO
//
// 1) Possible let user and a tag simply with </>



namespace EA_DocGen
{
        /// <summary>
        /// Summary description for TextParser.
        /// </summary>
   public class TextParser
   {
      // An embedded formatting tag can be one of two types
      // * An EA_DocGen special format
      // * An MS-Word Style based format
      private enum style_type_e
      {
         STYLE_EA_DOCGEN = 0,    // style is an EA_DocGen specific style or pattern 
         STYLE_MS_WORD,          // style is an MS-WORD style of character/font attribute
         STYLE_UNKNOWN = -1
      };

      private enum style_handling_e
      {
         STYLE_POST_FORMAT,      // text will require formatting AFTER the entire string has been serialised.
         STYLE_USE_TAG_STYLE,    // text will require the tag-implied style to be applied to it.
         STYLE_USE_CALLER_STYLE  // text will require the callers specified style to be applied to it.
      };

      // IDs for all of the styles we will process.
      // Note that the arrangement of enum values in the following type, has been made to allow
      // for some simple comparison tests during parsing, so do not re-arrange these enums without
      // addressing the necessary code changes in the parsing function.
      private enum style_id_e
      {
         // EA_DocGen styles (0..99)
         STYLE_ID_TABLE = 0,
         // add more EA_DocGen styles here when we need to

         // MS-WORD styles (100..)
         STYLE_ID_BODY1 = 100,
         STYLE_ID_BOLD,
         STYLE_ID_ITALIC,
         STYLE_ID_UNDERLINE,
         STYLE_ID_NORMAL,
         STYLE_ID_WARNING,
         STYLE_ID_NOTE,
         STYLE_ID_CODETEXT,
         // add more MS-WORD styles here when we need to

         // ALL bullet/list styles must be >= 200 for easy detection during post-formatting
         STYLE_ID_LIST_BULLET_0 = 200,
         STYLE_ID_LIST_BULLET_1,
         STYLE_ID_LIST_BULLET_2,
         STYLE_ID_LIST_BULLET_3,
         STYLE_ID_LIST_BULLET_4,
         STYLE_ID_LIST_INDENT_0,
         STYLE_ID_LIST_INDENT_1,
         STYLE_ID_LIST_INDENT_2,
         STYLE_ID_LIST_INDENT_3,
         STYLE_ID_LIST_INDENT_4,
         // Below here go lists that display numbering or lettering and so "restart numbering"
         // attributes are important
         STYLE_ID_LIST_NUMBER_0,
         STYLE_ID_LIST_NUMBER_1,
         STYLE_ID_LIST_NUMBER_2,
         STYLE_ID_LIST_NUMBER_3,
         STYLE_ID_LIST_NUMBER_4,
         STYLE_ID_LIST_ALPHA_0,
         STYLE_ID_LIST_ALPHA_1,
         STYLE_ID_LIST_ALPHA_2,
         STYLE_ID_LIST_ALPHA_3,
         STYLE_ID_LIST_ALPHA_4,
         // do not put anything below here
         STYLE_ID_FONT_COLOR
      };

      // A token is a block of text associated with a tag (style) name and type
      private struct token_type
      {
         public string txt;                     // This is the text content enclosed by the tag
         public style_type_e styleType;         // The type MS-WORD or EA_DocGen
         public string styleName;               // The name of the style
         public style_id_e styleId;             // The id of the style
         public int level;                      // level (for list items only)
         public style_handling_e styleHandling; // Handling attribute for the style
      };

      // Some formatting has to be applied after ALL the text of a description has been
      // appended to the word document. For this formatting, we need to remember the word
      // ranges that denote the text and the style name to apply. So we do it in a list of
      // the following item.
      private struct postFormat_type
      {
         public postFormat_type(Range wr, token_type tk)
         {
            m_wr = wr;
            m_tk = tk;
         }
         public Range m_wr;
         public token_type m_tk;
      };

      // Use a hash table for recording the allowed tags and their attributes, facilitating rapid
      // lookup during parsing.
      private static Hashtable styleDefs = null;


      /// <summary>
      /// Class initialisation function
      /// </summary>
      public static void initialise()
      {
         // initialsie the hash table
         styleDefs = new Hashtable();

         // This list dictates what tags a user can use in the notes text of an EA element. When adding new items
         // to this list, we may have to update the parsing function obviously.

         // The hash key is the short tag name that end-users will use in their descriptions. Tags found in
         // user text is matched to these keys, and the style definition if found can then be used.

         // EA_DocGen tags
         styleDefs.Add( EA_Constants.EA_DocGenTable, formStyleDef( style_type_e.STYLE_EA_DOCGEN, style_id_e.STYLE_ID_TABLE, EA_Constants.EA_DocGenTable, 0, style_handling_e.STYLE_USE_TAG_STYLE ) );

         // MS-Word formatting tags
         styleDefs.Add( "b", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_BOLD, EA_Constants.styleName_Bold      , 0, style_handling_e.STYLE_POST_FORMAT) );
         styleDefs.Add( "i", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_ITALIC, EA_Constants.styleName_Italic    , 0, style_handling_e.STYLE_POST_FORMAT) );
         styleDefs.Add( "u", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_UNDERLINE, EA_Constants.styleName_Underline , 0, style_handling_e.STYLE_POST_FORMAT) );
         styleDefs.Add("font", formStyleDef(style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_FONT_COLOR, EA_Constants.styleName_Font_Color, 0, style_handling_e.STYLE_POST_FORMAT));

         styleDefs.Add( "lb0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_0, EA_Constants.styleName_ListBullet0,1, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "lb1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_1, EA_Constants.styleName_ListBullet1,2, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "lb2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_2, EA_Constants.styleName_ListBullet2,3, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "lb3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_3, EA_Constants.styleName_ListBullet3,4, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "lb4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_4, EA_Constants.styleName_ListBullet4,5, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "ln0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_0, EA_Constants.styleName_ListNumber0,1, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "ln1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_1, EA_Constants.styleName_ListNumber1,2, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "ln2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_2, EA_Constants.styleName_ListNumber2,3, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "ln3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_3, EA_Constants.styleName_ListNumber3,4, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "ln4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_4, EA_Constants.styleName_ListNumber4,5, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "li0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_0, EA_Constants.styleName_ListIndent0,1, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "li1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_1, EA_Constants.styleName_ListIndent1,2, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "li2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_2, EA_Constants.styleName_ListIndent2,3, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "li3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_3, EA_Constants.styleName_ListIndent3,4, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "li4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_4, EA_Constants.styleName_ListIndent4,5, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "la0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_0, EA_Constants.styleName_AlphaList0, 1, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "la1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_1, EA_Constants.styleName_AlphaList1, 2, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "la2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_2, EA_Constants.styleName_AlphaList2, 3, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "la3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_3, EA_Constants.styleName_AlphaList3, 4, style_handling_e.STYLE_POST_FORMAT ) );
         styleDefs.Add( "la4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_4, EA_Constants.styleName_AlphaList4, 5, style_handling_e.STYLE_POST_FORMAT ) );

         styleDefs.Add( "code", formStyleDef( style_type_e.STYLE_MS_WORD,   style_id_e.STYLE_ID_CODETEXT, EA_Constants.styleName_CodeText, 0, style_handling_e.STYLE_USE_TAG_STYLE ) );
         styleDefs.Add( "normal", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_NORMAL,   EA_Constants.styleName_Normal  , 0, style_handling_e.STYLE_USE_TAG_STYLE ) );
         styleDefs.Add( "note", formStyleDef( style_type_e.STYLE_MS_WORD,   style_id_e.STYLE_ID_NOTE,     EA_Constants.styleName_Note    , 0, style_handling_e.STYLE_USE_TAG_STYLE ) );
         styleDefs.Add( "warn", formStyleDef( style_type_e.STYLE_MS_WORD,   style_id_e.STYLE_ID_WARNING,  EA_Constants.styleName_Warning , 0, style_handling_e.STYLE_USE_TAG_STYLE ) );
      }

      /// <summary>
      /// Helper for initialise() function
      /// </summary>
      /// <param name="styleName"></param>
      /// <param name="styleName"></param>
      /// <returns></returns>
      private static token_type formStyleDef(style_type_e styleType, style_id_e styleId, string styleName, int levelvar, style_handling_e styleHandling)
      {
         token_type tokenType = new token_type();
         tokenType.styleType = styleType; // The kind of style (ms-word or ea_docgen)
         tokenType.styleId   = styleId;   // The specific type of style
         tokenType.styleName = styleName; // The name of the style
         tokenType.txt = null;            // This is the actual text to be formatted
         tokenType.level = levelvar;      // used to record level numbering - only really useful for bullet/list styles
         tokenType.styleHandling = styleHandling;
         return tokenType;
      }


      private static void initialiseToken(out token_type token)
      {
         token.styleName = EA_Constants.styleName_Body1;
         token.styleId   = style_id_e.STYLE_ID_BODY1;
         token.styleType = style_type_e.STYLE_MS_WORD;
         token.styleHandling = style_handling_e.STYLE_USE_CALLER_STYLE;
         token.txt = null;
         token.level = 0;
      }


      private static string convert_EA7_1_RTF_ListTag(string s, string open, string close, string replacement_open, string replacement_close)
      {
         int pos;
         int pos_2;

         pos = s.IndexOf(open,0);
         if (pos >= 0)
         {
            while (pos >= 0)
            {
               pos_2 = s.IndexOf(close,pos);

               string seg = s.Substring(pos+4, pos_2 - (pos+4));
               seg = seg.Replace("\t<li>", replacement_open);
               seg = seg.Replace("</li>", replacement_close);
               
               s = s.Substring(0, pos) + seg + s.Substring(pos_2+5, s.Length - (pos_2+5));

               pos = s.IndexOf(open,0);
            }
         }
         return s;
      }




      /// <summary>
      /// Parse the notes of an element and use the results to form document content.
      /// </summary>
      /// <param name="theElement"></param>
      /// <param name="callerStyle"></param>
      /// <returns></returns>
      public static bool parse(string s, int id, string callerStyle, float indent_pts, bool continuation)
      {
         int pos;
         int pos_LeftBracket = 0;
         int pos_RightBracket = 0;
         int pos_tagName;
         int pos_ContentStart;
         int pos_ContentEnd;
         bool lookingForTagEnd;
         bool foundError = false;
         Range wr_body;

         // Convert EA7.1 embedded RTF controls into EA_DocGen controls where possible
         s = convert_EA7_1_RTF_ListTag(s, "<ol>\r\n", "</ol>", "<ln0>", "</ln0>");
         s = convert_EA7_1_RTF_ListTag(s, "<ul>\r\n", "</ul>", "<lb0>", "</lb0>");

         // In EAv7.1, Sparx allows users to embed RTF into notes text. RTF uses <> to enclose controls just as EA_DocGen
         // does for its own controls which this function decodes. However, EA_DocGen controls are entered as text by
         // human users and so EAv7.1 converts < and > chars into a form such that they do not look like RTF tags.
         // We have to convert EA's escaping mechanism back into ascii text chars otherwise the parser wont work.
         // There may be issues to resolve with this parser if EAv7.1 users have used any of EA's RTF editing features
         // because if they do, the < and > chars there will not be escaped and the parser will find them and try to
         // decode them. Most will probably not be decodable.
         s = TextualContent.HtmlDecode(s);


         // Begin to construct a range that will eventually encompass ALL of the text we will serialize during the execution
         // of this function. This is needed later only if the caller has specified a requirement style that needs to have 
         // global strikethrough or italicising applied to (all of) the text
         object startLocation;
         object endLocation;
         startLocation = createWordDoc.WordDocument.Content.End - 1;

         // Requirement element text must be indented according to the level number of the requirement tag. Caller
         // passes in absolute indentation value but for some items such as bullets/lists, we need a relative adjustment
         // since bullet/list styles have their own indentation settings and we only want to offset them rather than overwrite
         // them.
         // Get relative (to 2.5cm) indentation user has commanded. The 2.5cm mark is the standard point where body 1 text
         // begins. At 2.5cm, the pts value is 70.866. We only indent, never outdent so end stop at 0. 
         float relative_indent_adjustment = 0;
         if (indent_pts > 0)
         {
            relative_indent_adjustment = indent_pts - (float)70.866;
            if (relative_indent_adjustment < 0)
               relative_indent_adjustment = 0;
         }

         // A working variable and a list for completed tokens
         token_type token;
         ArrayList tokens = new ArrayList();

         // default starting token - may be updated later
         initialiseToken(out token);


         lookingForTagEnd = false;
         pos_ContentStart = 0;

         // PARSING LOOP -
         // Break up the input string into tokens that identify what kind of action is to be performed
         // with the token text. The default is as seen just above - apply Body1 MS-Word style. However,
         // if user has used formatting tags, the token style name, ID, and type will be updated accordingly.
         // This must be done iteratively until we have exhausted the input string.

         // look for a tag
         pos = s.IndexOf("<", 0);
         while ((pos >= 0) && (pos < s.Length))
         {
            if (createWordDoc.abortCreationThread)
               return false;

            // record position of tag
            pos_LeftBracket = pos;

            // tag name begins at the next char
            pos_tagName = pos_LeftBracket + 1;

            // Check if this is a closing tag
            bool isEnding = false;
            if (pos < (s.Length-1))
            {
               if (s[pos+1] == '/')
               {
                  // skip past the / char
                  isEnding = true;
                  pos_tagName++;
               }
            }

            // We found a possible tag, now figure out if this is one of the tags we recognise
            bool found = false;

            // look for the closing bracket of the tag
            pos = s.IndexOf(">", pos+1);
            if (pos >= 0)
            {
               string sTag = s.Substring(pos_tagName, pos - pos_tagName);

               if (!isEnding)
               {
                   // check for font start tag with color attribute
                   Regex re = new Regex("font.*?color=\"#(?<color>[0-9A-F]*)\"",RegexOptions.IgnoreCase);
                   if (re.IsMatch(sTag))
                     sTag = "font";
               }

               // use hash table to identify the tag
               found = styleDefs.Contains(sTag);
            }
            else
            {
               // Cannot find any '>' so we should just exit the loop
               break;
            }

            // if the tag was recognised
            if (found)
            {
               // record position of the closing bracket of the tag
               pos_RightBracket = pos;

               // if this is an end tag, ie. </tagname>
               if (isEnding)
               {
                  pos_ContentEnd = pos_LeftBracket - 1;  // not sure if we really need to compute pos_ContentEnd

                  // check for out of sequence error
                  if (!lookingForTagEnd)
                  {
                     if (!foundError)
                     {
                        Main.WriteOutput( string.Format("ERROR, Found out of sequence style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
                        foundError = true;
                     }
                  }
                  else
                  {
                     // Check that the end tag matches the start tag
                     // NOTE: If we were to allow lazy end-tagging (ie using '</>' only) we would have to do away with this
                     // check.
                     token_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);
                     if (token.styleType == tt.styleType &&
                        token.styleId == tt.styleId &&
                        token.styleName == tt.styleName)
                     {
                        // Update the token's text field and add the now complete token to our list
                        // for processing a little later on.
                        token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);
                        tokens.Add(token);

                        // re-initialise token for next tag search
                        initialiseToken(out token);

                        lookingForTagEnd = false;

                        pos_ContentStart = pos_RightBracket + 1;
                     }
                     else
                     {
                        // end tag does not seem to be the same as the starting tag, so ignore it
                        if (!foundError)
                        {
                           Main.WriteOutput(string.Format("ERROR, Found unmatched style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
                           foundError = true;
                        }
                     }
                  }
               }
               else
               {
                  // If there is content prior to now that has not been consumed, tokenise it now
                  if ((pos_LeftBracket - pos_ContentStart) > 0)

                  {
                     token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);
                     tokens.Add(token);
                  }

                  if (lookingForTagEnd)
                  {
                     if (!foundError)
                     {
                        Main.WriteOutput(string.Format("ERROR, Found nested style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
                        foundError = true;
                     }
                  }
                  else
                  {
                     // update the token variable with this tags atributes from the hash table lookup. This overwrites
                     // the default values assigned when parsing began, or when we resumed parsing after dealing with
                     // the previous token found.

                     string df = s.Substring(pos_tagName, pos_RightBracket-pos_tagName);

                     // check for font tag with color attribute
                     int iFontColor = -1;
                     Regex re = new Regex("font.*?color=\"#(?<color>[0-9A-F]*)\"", RegexOptions.IgnoreCase);
                     if (re.IsMatch(df))
                     {
                         // get color attribute value
                         string sColor = re.Matches(df)[0].Groups["color"].Value;
                         iFontColor = Convert.ToInt32(sColor,16);
                         df = "font";
                     }
                      
                     token_type lookupToken = ((token_type)styleDefs[df]);
                     token.styleId   = lookupToken.styleId;
                     token.styleType = lookupToken.styleType;
                     token.styleName = lookupToken.styleName;
                     if (token.styleId == style_id_e.STYLE_ID_FONT_COLOR)
                         token.level = iFontColor;
                     else
                         token.level = lookupToken.level;
                     token.styleHandling = lookupToken.styleHandling;
                     token.txt = null; // we dont know what the text content will be yet. This is obtained when we encounter the end tag

                     pos_ContentStart = pos_RightBracket + 1;

                     lookingForTagEnd = true;
                  }
               }
            }
            else
            {
               // the tag was not recognised so for now we just treat it as if it were plain text and continue
            }

            // look for next tag
            pos = s.IndexOf("<", pos);

         } // end of the loop


         // take care of the last token, if there is one
         if (pos_ContentStart < s.Length)
         {
            // Update the token's text field
            token.txt = s.Substring(pos_ContentStart, s.Length - pos_ContentStart);
            tokens.Add(token);
         }

         if (lookingForTagEnd)
         {
            if (!foundError)
            {
               Main.WriteOutput(string.Format("ERROR, Found incomplete style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
               foundError = true;
            }
         }

         // The way MS-Word works makes it necessary to do some formatting after we have serialised all of the text.
         // So, we need another list. This will contain elements that have the token AND the word range object that we
         // obtain when we serialise the text.
         ArrayList postFormats = new ArrayList();

         // SERIALISATION LOOP - Now process all the tokens we have found
         int tt_i = 0;
         for (tt_i = 0; tt_i < tokens.Count; tt_i++)
         {
            token_type tt = (token_type)tokens[tt_i];

            if (createWordDoc.abortCreationThread)
               return false;

            if (tt.txt != null && tt.txt.Length > 0)
            {

               switch (tt.styleType)
               {
                  case style_type_e.STYLE_EA_DOCGEN:
                     switch (tt.styleId)
                     {
                        case style_id_e.STYLE_ID_TABLE:
                           TabularContent.processTableElement(tt.txt, 0, indent_pts);
                           continuation = false;

                           // flag list numbering restart
                           postFormats.Add( new postFormat_type(null, tt) );
                           break;

                        default:
                           break;
                     }
                     break;

                  case style_type_e.STYLE_MS_WORD:
                     switch (tt.styleHandling)
                     {
                        case style_handling_e.STYLE_POST_FORMAT:
                           // Replace <br> with actual required characters, and use the caller supplied style when serialising
                           // the text
                           tt.txt = tt.txt.Replace("<br>","\r\n");

                           // Serialise the text, initially applying callers style but since post-formatting will be
                           // done later, the texts appearance will change from what this initially applied style
                           // implies.
                           wr_body = TextualContent.appendAndSelectText( tt.txt, callerStyle, continuation );
                           continuation = true;

                           while (wr_body.Characters.Last.Text.Equals("\r") || wr_body.Characters.Last.Text.Equals("\n"))
                              wr_body.End = wr_body.End - 1;  // don't format the /n or \r char at the end - doing so causes wierd ms-word exceptions later on
                           postFormats.Add( new postFormat_type(wr_body, tt) );
                           break;

                        case style_handling_e.STYLE_USE_TAG_STYLE:
                           // Replace <br> with actual required characters, and use the caller supplied style when serialising
                           // the text
                           tt.txt = tt.txt.Replace("<br>","\r\n");

                           // Serialise the text, applying the tag's style 
                           wr_body = TextualContent.appendAndSelectText( tt.txt, tt.styleName, continuation );
                           continuation = true;

                           // Indent according to callers specified amount
                           if (indent_pts > 0)
                              wr_body.ParagraphFormat.LeftIndent = indent_pts;

                           // flag list numbering restart if this is printable text.
                           if (tt.txt.Trim().Length > 0)
                              postFormats.Add( new postFormat_type(null, tt) );
                           break;


                        case style_handling_e.STYLE_USE_CALLER_STYLE:
                           // Replace <br> with actual required characters, and use the caller supplied style when serialising
                           // the text
                           tt.txt = tt.txt.Replace("<br>","\r\n");

                           // Serialise the text, applying callers style 
                           wr_body = TextualContent.appendAndSelectText( tt.txt, callerStyle, continuation );
                           continuation = true;

                           // Indent according to callers specified amount
                           if (indent_pts > 0)
                              wr_body.ParagraphFormat.LeftIndent = indent_pts;

                           // flag list numbering restart if this is printable text.
                           if (tt.txt.Trim().Length > 0)
                              postFormats.Add( new postFormat_type(null, tt) );
                           break;

                        default:
                           break;
                     }
                     break;

                  default:
                     break;
               }
            }
         } // end of serialisation loop


         // POST-FORMATTING LOOP - Now apply post formatting commands to text already serialised in previous loop
         int last_list_level = 0;
         foreach (postFormat_type pf in postFormats)
         {
            object style;

            if (createWordDoc.abortCreationThread)
               return false;

            // a null word range implies we must restart numbering for any lists
            if (pf.m_wr == null)
            {
               last_list_level = 0;
            }
            else
            {
               switch (pf.m_tk.styleId)
               {
                  case style_id_e.STYLE_ID_BOLD:
                     pf.m_wr.Select();
                     createWordDoc.WordApp.Selection.Range.Bold = 1;
                     last_list_level = 0;
                     break;

                  case style_id_e.STYLE_ID_ITALIC:
                     pf.m_wr.Select();
                     createWordDoc.WordApp.Selection.Range.Italic = 1;
                     last_list_level = 0;
                     break;

                  case style_id_e.STYLE_ID_UNDERLINE:
                     pf.m_wr.Select();
                     createWordDoc.WordApp.Selection.Range.Underline = WdUnderline.wdUnderlineSingle;
                     last_list_level = 0;
                     break;

                   case style_id_e.STYLE_ID_FONT_COLOR:
                     pf.m_wr.Select();
                     createWordDoc.WordApp.Selection.Range.Font.Color = (WdColor) ReverseRGB((UInt32)pf.m_tk.level);
                     last_list_level = 0;
                    break;

                  default:
                     // Handle bullets/lists
                     if (pf.m_tk.styleId >= style_id_e.STYLE_ID_LIST_BULLET_0)
                     {
                        style = pf.m_tk.styleName;
                        pf.m_wr.Select();
                        createWordDoc.WordApp.Selection.Range.set_Style(ref style);

                        // Figure out if we have to restart numbering
                        if (last_list_level < pf.m_tk.level)
                        {
                           // only need to restart numbering if this list displays numbering - bullets and simple
                           // indents do not, but alpha and numeric lists do. The style_id_e type has been organised to
                           // make this a simple test.
                           if (pf.m_tk.styleId >= style_id_e.STYLE_ID_LIST_NUMBER_0)
                           {
                              // To restart numbering, the only way to do it is to (re-)apply the list template to the 
                              // selection with a "continue previous list" setting of false, and an "apply to"
                              // setting of "whole list". 
                              object continuePreviousList = false;
                              object applyTo = WdListApplyTo.wdListApplyToWholeList;
                              object defListBehavour = WdDefaultListBehavior.wdWord10ListBehavior;
                              
                              ListTemplate lt = createWordDoc.WordApp.Selection.Range.ListFormat.ListTemplate;

                              createWordDoc.WordApp.Selection.Range.ListFormat.ApplyListTemplate(
                                 lt, ref continuePreviousList, ref applyTo, ref defListBehavour);
                           }
                        }

                        // shift content right by relative indent adjustment we calculated earlier
                        if (relative_indent_adjustment > 0)
                        {
                           createWordDoc.WordApp.Selection.Range.ParagraphFormat.LeftIndent += relative_indent_adjustment;
                        }

                        last_list_level = pf.m_tk.level;
                     }
                     break;
               }
            }
         } // end of post-formatting loop

         // Special handling for Proposed and Rejected requirement sections - here the text must be italicies or
         // struck through (see definition of the styleName_ReqPropBody and styleName_ReqRejBody styles in StyleContent.cs).
         if (callerStyle.Equals(EA_Constants.styleName_ReqPropBody))
         {
            // Complete construction of a range that will encompass ALL of the text we will serialize during the execution
            // of this function
            endLocation = createWordDoc.WordDocument.Content.End;
            Range wr_total = createWordDoc.WordDocument.Range(ref startLocation, ref endLocation);
            while (wr_total.Characters.Last.Text.Equals("\r") || wr_total.Characters.Last.Text.Equals("\n"))
               wr_total.End = wr_total.End - 1;  // don't format the \r\n char at the end - doing so causes wierd ms-word exceptions later on

            // italicise
            wr_total.Font.Italic = (int)MsoTriState.msoTrue;
         }
         else if (callerStyle.Equals(EA_Constants.styleName_ReqRejBody))
         {
            // Complete construction of a range that will encompass ALL of the text we will serialize during the execution
            // of this function
            endLocation = createWordDoc.WordDocument.Content.End;
            Range wr_total = createWordDoc.WordDocument.Range(ref startLocation, ref endLocation);
            while (wr_total.Characters.Last.Text.Equals("\r") || wr_total.Characters.Last.Text.Equals("\n"))
               wr_total.End = wr_total.End - 1;  // don't format the \r\n char at the end - doing so causes wierd ms-word exceptions later on

            // strikethrough - use msoCTrue since msoTrue simply toggles strikethough attribute, or so it seems
            wr_total.Font.StrikeThrough = (int)MsoTriState.msoCTrue;
         }

         return true;
      }

      public static UInt32 ReverseRGB(UInt32 value)
      {
          UInt32 i = (value & 0x000000FFU) << 16 | (value & 0x0000FF00U) |
                 (value & 0x00FF0000U) >> 16;
          return i;
      }

   }
}
Subversion Repositories DevTools

(root)/EA_DocGen/trunk/EA_DocGen/TextParser.cs – Rev 2136