Subversion Repositories DevTools

Rev

Rev 2128 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2126 ghuddy 1
using System;
2
using System.Collections;
3
using System.Text;
4
using Word;
5
 
6
// TODO
7
//
8
// 1) Make more efficient by using style IDs instead of names
9
// 2) Possible let user and a tag simply with </>
10
 
11
 
12
 
13
namespace EA_DocGen
14
{
15
	/// <summary>
16
	/// Summary description for TextParser.
17
	/// </summary>
18
	public class TextParser
19
	{
20
      // An embedded formatting tag can be one of two types
21
      // * An EA_DocGen special format
22
      // * An MS-Word Style based format
23
      private enum style_type_e
24
      {
25
         STYLE_EA_DOCGEN = 0,
26
         STYLE_MS_WORD,
27
         STYLE_UNKNOWN = -1
28
      };
29
 
30
      // A token is a block of text associated with a tag (style) name and type
31
      private struct token_type
32
      {
33
         public string txt;
34
         public style_type_e styleType;
35
         public string styleName;
36
         public int gen1;
37
      };
38
 
39
      // Some formatting has to be applied after ALL the text of a description has been 
40
      // appended to the word document. For this formatting, we need to remember the word
41
      // ranges that denote the text and the style name to apply.
42
      private struct postFormat_type
43
      {
44
         public postFormat_type(Word.Range wr, token_type tk)
45
         {
46
            m_wr = wr;
47
            m_tk = tk;
48
         }
49
         public Word.Range m_wr;
50
         public token_type m_tk;
51
      };
52
 
53
      // Use a hash table for recording the allowed tags and their attributes, facilitating rapid
54
      // lookup during parsing.
55
      private static Hashtable styleDefs = null;
56
 
57
 
58
      /// <summary>
59
      /// Class initialisation function
60
      /// </summary>
61
      public static void initialise()
62
      {
63
         // initialsie the hash table
64
         styleDefs = new Hashtable();
65
 
66
         // This list dictates what tags a user can use in the notes text of an EA element. When adding new items
67
         // to this list, we have to update the parsing function obviously.
68
 
69
         // The hash key is the short tag name that end-users will use in their descriptions. Tags found in 
70
         // user text is matched to these keys, and the style definition if found can then be used.
71
 
72
         // EA_DocGen tags
73
         styleDefs.Add( EA_Constants.EA_DocGenTable, formStyleDef( style_type_e.STYLE_EA_DOCGEN, EA_Constants.EA_DocGenTable, 0 ) );
74
 
75
         // MS-Word formatting tags
76
         styleDefs.Add( "b", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Bold      ,0) );
77
         styleDefs.Add( "i", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Italic    ,0) );
78
         styleDefs.Add( "u", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Underline ,0) );
79
 
80
         styleDefs.Add( "lb0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet0,1 ) );
81
         styleDefs.Add( "lb1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet1,2 ) );
82
         styleDefs.Add( "lb2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet2,3 ) );
83
         styleDefs.Add( "lb3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet3,4 ) );
84
         styleDefs.Add( "lb4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet4,5 ) );
85
         styleDefs.Add( "ln0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber0,1 ) );
86
         styleDefs.Add( "ln1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber1,2 ) );
87
         styleDefs.Add( "ln2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber2,3 ) );
88
         styleDefs.Add( "ln3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber3,4 ) );
89
         styleDefs.Add( "ln4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber4,5 ) );
90
         styleDefs.Add( "li0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent0,1 ) );
91
         styleDefs.Add( "li1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent1,2 ) );
92
         styleDefs.Add( "li2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent2,3 ) );
93
         styleDefs.Add( "li3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent3,4 ) );
94
         styleDefs.Add( "li4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent4,5 ) );
95
         styleDefs.Add( "la0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList0, 1 ) );
96
         styleDefs.Add( "la1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList1, 2 ) );
97
         styleDefs.Add( "la2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList2, 3 ) );
98
         styleDefs.Add( "la3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList3, 4 ) );
99
         styleDefs.Add( "la4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList4, 5 ) );
100
 
101
         styleDefs.Add( "code", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_CodeText, 0 ) );
102
         styleDefs.Add( "normal", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Normal  , 0 ) );
103
         styleDefs.Add( "note", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Note  , 0 ) );
104
         styleDefs.Add( "warn", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Warning , 0 ) );
105
 
106
      }
107
 
108
      /// <summary>
109
      /// Helper for initialise() function
110
      /// </summary>
111
      /// <param name="styleType"></param>
112
      /// <param name="styleName"></param>
113
      /// <returns></returns>
114
      private static token_type formStyleDef(style_type_e styleType, string styleName, int genvar1)
115
      {
116
         token_type tokenType = new token_type();
117
         tokenType.styleType = styleType;
118
         tokenType.styleName = styleName;
119
         tokenType.txt = null;
120
         tokenType.gen1 = genvar1;
121
         return tokenType;
122
      }
123
 
124
 
125
 
126
      /// <summary>
127
      /// Parse the notes of an element and use the results to form document content.
128
      /// </summary>
129
      /// <param name="theElement"></param>
130
      /// <param name="defaultStyle"></param>
131
      /// <returns></returns>
132
      public static bool parse(string s, int id, string defaultStyle, float indent_pts, bool continuation)
133
      {
134
         int pos;
135
         int pos_LeftBracket = 0;
136
         int pos_RightBracket = 0;
137
         int pos_tagName;
138
         int pos_ContentStart;
139
         int pos_ContentEnd;
140
         bool lookingForTagEnd;
141
         bool foundError = false;
142
         Word.Range wr_body;
143
 
144
         // get relative (to 2.5cm) indentation user has commanded. At 2.5cm, the pts value is 70.866. We only indent, never outdent
145
         // so end stop at 0.
146
         float relative_indent_adjustment = 0;
147
         if (indent_pts > 0)
148
         {
149
            relative_indent_adjustment = indent_pts - (float)70.866;
150
            if (relative_indent_adjustment < 0)
151
               relative_indent_adjustment = 0;
152
         }
153
 
154
         token_type token;
155
         ArrayList tokens = new ArrayList();
156
 
157
         // default starting token - may be updated later
158
         token.styleName = EA_Constants.styleName_Body1;
159
         token.styleType = style_type_e.STYLE_MS_WORD;
160
         token.txt = null;
161
         token.gen1 = 0;
162
 
163
         lookingForTagEnd = false;
164
         pos_ContentStart = 0;
165
 
166
         // look for a tag
167
         pos = s.IndexOf("<", 0);
168
         while ((pos >= 0) && (pos < s.Length))
169
         {
170
            // record position of tag
171
            pos_LeftBracket = pos;
172
 
173
            // tag name begins at the next char
174
            pos_tagName = pos_LeftBracket + 1;
175
 
176
            // Check if this is a closing tag
177
            bool isEnding = false;
178
            if (pos < (s.Length-1))
179
            {
180
               if (s[pos+1] == '/')
181
               {
182
                  // skip past the / char
183
                  isEnding = true;
184
                  pos_tagName++;
185
               }
186
            }
187
 
188
            // We found a possible tag, now figure out if this is one of the tags we recognise
189
            bool found = false;
190
 
191
            // look for the closing bracket of the tag
192
            pos = s.IndexOf(">", pos+1);
193
            if (pos >= 0)
194
            {
195
               found = styleDefs.Contains(s.Substring(pos_tagName, pos - pos_tagName));
196
            }
197
            else
198
            {
199
               // Cannot find any '>' so we should just exit the loop
200
               break;
201
            }
202
 
203
            // if the tag was recognised
204
            if (found)
205
            {
206
               // record position of the closing bracket of the tag
207
               pos_RightBracket = pos;
208
 
209
               // if this is an end tag, ie. </tagname>
210
               if (isEnding)
211
               {
212
                  pos_ContentEnd = pos_LeftBracket - 1;  // not sure if we really need to compute pos_ContentEnd
213
 
214
                  // check for out of sequence error
215
                  if (!lookingForTagEnd)
216
                  {
217
                     if (!foundError)
218
                     {
219
                        Main.WriteOutput( string.Format("ERROR, Found out of sequence style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
220
                        foundError = true;
221
                     }
222
                  }
223
                  else
224
                  {
225
                     // Check that the end tag matches the start tag
226
                     token_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);
227
                     if (token.styleName == tt.styleName && 
228
                        token.styleType == tt.styleType)
229
                     {
230
                        // Update the token's text field and add the now complete token to our list
231
                        // for processing a little later on.
232
                        token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);
233
                        tokens.Add(token);
234
 
235
                        // re-initialise token for next tag search
236
                        token.styleName = EA_Constants.styleName_Body1;
237
                        token.styleType = style_type_e.STYLE_MS_WORD;
238
                        token.txt = null;
239
 
240
                        lookingForTagEnd = false;
241
 
242
                        pos_ContentStart = pos_RightBracket + 1;
243
                     }
244
                     else
245
                     {
246
                        // end tag does not seem to be the same as the starting tag, so ignore it
247
                        if (!foundError)
248
                        {
249
                           Main.WriteOutput(string.Format("ERROR, Found unmatched style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
250
                           foundError = true;
251
                        }
252
                     }
253
                  }
254
               }
255
               else
256
               {
257
                  // If there is content prior to now that has not been consumed, tokenise it now
258
                  if ((pos_LeftBracket - pos_ContentStart) > 0)
259
                  {
260
                     token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);
261
                     tokens.Add(token);
262
                  }
263
 
264
                  if (lookingForTagEnd)
265
                  {
266
                     if (!foundError)
267
                     {
268
                        Main.WriteOutput(string.Format("ERROR, Found nested style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
269
                        foundError = true;
270
                     }
271
                  }
272
                  else
273
                  {
274
                     // update the token variable with this tags atributes from the hash table lookup
275
                     token_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);
276
                     token.styleName = tt.styleName;
277
                     token.styleType = tt.styleType;
278
                     token.gen1      = tt.gen1;
279
                     token.txt = null; // we dont know what the text content will be yet. This is obtained when we encounter the end tag
280
 
281
                     pos_ContentStart = pos_RightBracket + 1;
282
 
283
                     lookingForTagEnd = true;
284
                  }
285
               }
286
            }
287
            else
288
            {
289
               // the tag was not recognised so for now we just treat it as if it were plain text and continue
290
               //pos++;
291
            }
292
 
293
            // look for next tag
294
            pos = s.IndexOf("<", pos);
295
 
296
         } // end of the loop
297
 
298
 
299
         // POST-LOOP operations
300
 
301
         // take care of the last token, if there is one
302
         if (pos_ContentStart < s.Length)
303
         {
304
            // Update the token's text field
305
            token.txt = s.Substring(pos_ContentStart, s.Length - pos_ContentStart);
306
            tokens.Add(token);
307
         }
308
 
309
         if (lookingForTagEnd)
310
         {
311
            if (!foundError)
312
            {
313
               Main.WriteOutput(string.Format("ERROR, Found incomplete style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
314
               foundError = true;
315
            }
316
         }
317
 
318
 
319
         ArrayList postFormats = new ArrayList();
320
 
321
         // Now process all the tokens we have found
322
         foreach (token_type tt in tokens)
323
         {
324
            if (tt.txt != null && tt.txt.Length > 0)
325
            {
326
 
327
               switch (tt.styleType)
328
               {
329
                  case style_type_e.STYLE_EA_DOCGEN:
330
                     switch (tt.styleName)
331
                     {
332
                        case EA_Constants.EA_DocGenTable:
333
                           TabularContent.processTableElement(tt.txt, 0, indent_pts);
334
                           continuation = false;
335
 
336
                           // flag list numbering restart
337
                           postFormats.Add( new postFormat_type(null, tt) );
338
                           break;
339
 
340
                        default:
341
                           break;
342
                     }
343
                     break;
344
 
345
                  case style_type_e.STYLE_MS_WORD:
346
                     switch (tt.styleName)
347
                     {
348
                        // List all formatting that has to be done after all of the text has been inserted into the 
349
                        // document ie. post-formatting
350
                        case EA_Constants.styleName_Bold:
351
                        case EA_Constants.styleName_Italic:
352
                        case EA_Constants.styleName_Underline:
353
                        case EA_Constants.styleName_ListBullet0:
354
                        case EA_Constants.styleName_ListBullet1:
355
                        case EA_Constants.styleName_ListBullet2:
356
                        case EA_Constants.styleName_ListBullet3:
357
                        case EA_Constants.styleName_ListBullet4:
358
                        case EA_Constants.styleName_ListNumber0:
359
                        case EA_Constants.styleName_ListNumber1:
360
                        case EA_Constants.styleName_ListNumber2:
361
                        case EA_Constants.styleName_ListNumber3:
362
                        case EA_Constants.styleName_ListNumber4:
363
                        case EA_Constants.styleName_ListIndent0:
364
                        case EA_Constants.styleName_ListIndent1:
365
                        case EA_Constants.styleName_ListIndent2:
366
                        case EA_Constants.styleName_ListIndent3:
367
                        case EA_Constants.styleName_ListIndent4:
368
                        case EA_Constants.styleName_AlphaList0 :
369
                        case EA_Constants.styleName_AlphaList1 :
370
                        case EA_Constants.styleName_AlphaList2 :
371
                        case EA_Constants.styleName_AlphaList3 :
372
                        case EA_Constants.styleName_AlphaList4 :
373
                           wr_body = TextualContent.appendAndSelectText( tt.txt, defaultStyle, continuation );
374
                           continuation = true;
375
                           if (wr_body.Characters.Last.Text.Equals("\r"))
376
                              wr_body.End = wr_body.End - 1;  // don't format the \r char at the end - doing so causes wierd ms-word exceptions later on
377
                           postFormats.Add( new postFormat_type(wr_body, tt) );
378
                           break;
379
 
380
                        case EA_Constants.styleName_CodeText:
381
                        case EA_Constants.styleName_Normal  :
382
                        case EA_Constants.styleName_Note    :
383
                        case EA_Constants.styleName_Warning :
384
                           wr_body = TextualContent.appendAndSelectText( tt.txt, tt.styleName, continuation );
385
                           continuation = true;
386
                           if (indent_pts > 0)
387
                              wr_body.ParagraphFormat.LeftIndent = indent_pts;
388
 
389
                           // flag list numbering restart
390
                           postFormats.Add( new postFormat_type(null, tt) );
391
                           break;
392
 
393
 
394
                        // List all other formatting that can be done immediately.
395
                        case EA_Constants.styleName_Body1:
396
                           wr_body = TextualContent.appendAndSelectText( tt.txt, defaultStyle, continuation );
397
                           continuation = true;
398
                           if (indent_pts > 0)
399
                              wr_body.ParagraphFormat.LeftIndent = indent_pts;
400
 
401
                           // flag list numbering restart if this is printable text.
402
                           if (tt.txt.Trim().Length > 0)
403
                              postFormats.Add( new postFormat_type(null, tt) );
404
                           break;
405
 
406
                        default:
407
                           break;
408
                     }
409
                     break;
410
 
411
                  default:
412
                     break;
413
               }
414
            }
415
         }
416
 
417
         // Now apply post formatting commands to text already serialised in previous loop
418
         int last_list_level = 0;
419
         foreach (postFormat_type pf in postFormats)
420
         {
421
            object style;
422
 
423
            // a null word range implies we must restart numbering for any lists
424
            if (pf.m_wr == null)
425
            {
426
               last_list_level = 0;
427
            }
428
            else
429
            {
430
               switch (pf.m_tk.styleName)
431
               {
432
                  case EA_Constants.styleName_Bold:
433
                     pf.m_wr.Select();
434
                     createWordDoc.WordApp.Selection.Range.Bold = 1;
435
                     last_list_level = 0;
436
                     break;
437
 
438
                  case EA_Constants.styleName_Italic:
439
                     pf.m_wr.Select();
440
                     createWordDoc.WordApp.Selection.Range.Italic = 1;
441
                     last_list_level = 0;
442
                     break;
443
 
444
                  case EA_Constants.styleName_Underline:
445
                     pf.m_wr.Select();
446
                     createWordDoc.WordApp.Selection.Range.Underline = Word.WdUnderline.wdUnderlineSingle;
447
                     last_list_level = 0;
448
                     break;
449
 
450
                  case EA_Constants.styleName_ListBullet0:
451
                  case EA_Constants.styleName_ListBullet1:
452
                  case EA_Constants.styleName_ListBullet2:
453
                  case EA_Constants.styleName_ListBullet3:
454
                  case EA_Constants.styleName_ListBullet4:
455
                  case EA_Constants.styleName_ListIndent0:
456
                  case EA_Constants.styleName_ListIndent1:
457
                  case EA_Constants.styleName_ListIndent2:
458
                  case EA_Constants.styleName_ListIndent3:
459
                  case EA_Constants.styleName_ListIndent4:
460
                  case EA_Constants.styleName_ListNumber0:
461
                  case EA_Constants.styleName_ListNumber1:
462
                  case EA_Constants.styleName_ListNumber2:
463
                  case EA_Constants.styleName_ListNumber3:
464
                  case EA_Constants.styleName_ListNumber4:
465
                  case EA_Constants.styleName_AlphaList0 :
466
                  case EA_Constants.styleName_AlphaList1 :
467
                  case EA_Constants.styleName_AlphaList2 :
468
                  case EA_Constants.styleName_AlphaList3 :
469
                  case EA_Constants.styleName_AlphaList4 :
470
                     style = pf.m_tk.styleName;
471
                     pf.m_wr.Select();
472
                     createWordDoc.WordApp.Selection.Range.set_Style(ref style);
473
 
474
                     //Main.WriteOutput(string.Format("last list level {0}, this list level {1}", last_list_level, pf.m_tk.gen1), -1);
475
 
476
                     // Figure out if we have to restart numbering
477
                     if (last_list_level < pf.m_tk.gen1)
478
                     {
479
                        Word.ListTemplate lt;
480
                        object continuePreviousList = false;
481
                        object applyTo = Word.WdListApplyTo.wdListApplyToWholeList;
482
                        object defListBehavour = Word.WdDefaultListBehavior.wdWord10ListBehavior;
483
 
484
                        if ((pf.m_tk.styleName.IndexOf("Alpha") >= 0) || (pf.m_tk.styleName.IndexOf("Number") >= 0))
485
                        {
486
                           lt = createWordDoc.WordApp.Selection.Range.ListFormat.ListTemplate;
487
 
488
                           createWordDoc.WordApp.Selection.Range.ListFormat.ApplyListTemplate(
489
                              lt, ref continuePreviousList, ref applyTo, ref defListBehavour);
490
                        }
491
                     }
492
 
493
                     // shift content right by relative indent adjustment we calculated earlier
494
                     if (relative_indent_adjustment > 0)
495
                     {
496
                        createWordDoc.WordApp.Selection.Range.ParagraphFormat.LeftIndent += relative_indent_adjustment;
497
                     }
498
 
499
                     last_list_level = pf.m_tk.gen1;
500
                     break;
501
                  default:
502
                     break;
503
               }
504
            }
505
         }
506
         return true;
507
      }
508
 
509
 
510
 
511
 
512
 
513
	}
514
}