Subversion Repositories DevTools

Rev

Rev 2126 | Rev 2130 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2126 ghuddy 1
using System;
2
using System.Collections;
3
using System.Text;
4
using Word;
5
 
6
// TODO
7
//
8
// 1) Make more efficient by using style IDs instead of names
9
// 2) Possible let user and a tag simply with </>
10
 
11
 
12
 
13
namespace EA_DocGen
14
{
15
	/// <summary>
16
	/// Summary description for TextParser.
17
	/// </summary>
18
	public class TextParser
19
	{
20
      // An embedded formatting tag can be one of two types
21
      // * An EA_DocGen special format
22
      // * An MS-Word Style based format
23
      private enum style_type_e
24
      {
25
         STYLE_EA_DOCGEN = 0,
26
         STYLE_MS_WORD,
27
         STYLE_UNKNOWN = -1
28
      };
29
 
30
      // A token is a block of text associated with a tag (style) name and type
31
      private struct token_type
32
      {
33
         public string txt;
34
         public style_type_e styleType;
35
         public string styleName;
36
         public int gen1;
37
      };
38
 
39
      // Some formatting has to be applied after ALL the text of a description has been 
40
      // appended to the word document. For this formatting, we need to remember the word
41
      // ranges that denote the text and the style name to apply.
42
      private struct postFormat_type
43
      {
44
         public postFormat_type(Word.Range wr, token_type tk)
45
         {
46
            m_wr = wr;
47
            m_tk = tk;
48
         }
49
         public Word.Range m_wr;
50
         public token_type m_tk;
51
      };
52
 
53
      // Use a hash table for recording the allowed tags and their attributes, facilitating rapid
54
      // lookup during parsing.
55
      private static Hashtable styleDefs = null;
56
 
57
 
58
      /// <summary>
59
      /// Class initialisation function
60
      /// </summary>
61
      public static void initialise()
62
      {
63
         // initialsie the hash table
64
         styleDefs = new Hashtable();
65
 
66
         // This list dictates what tags a user can use in the notes text of an EA element. When adding new items
67
         // to this list, we have to update the parsing function obviously.
68
 
69
         // The hash key is the short tag name that end-users will use in their descriptions. Tags found in 
70
         // user text is matched to these keys, and the style definition if found can then be used.
71
 
72
         // EA_DocGen tags
73
         styleDefs.Add( EA_Constants.EA_DocGenTable, formStyleDef( style_type_e.STYLE_EA_DOCGEN, EA_Constants.EA_DocGenTable, 0 ) );
74
 
75
         // MS-Word formatting tags
76
         styleDefs.Add( "b", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Bold      ,0) );
77
         styleDefs.Add( "i", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Italic    ,0) );
78
         styleDefs.Add( "u", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Underline ,0) );
79
 
80
         styleDefs.Add( "lb0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet0,1 ) );
81
         styleDefs.Add( "lb1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet1,2 ) );
82
         styleDefs.Add( "lb2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet2,3 ) );
83
         styleDefs.Add( "lb3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet3,4 ) );
84
         styleDefs.Add( "lb4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListBullet4,5 ) );
85
         styleDefs.Add( "ln0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber0,1 ) );
86
         styleDefs.Add( "ln1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber1,2 ) );
87
         styleDefs.Add( "ln2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber2,3 ) );
88
         styleDefs.Add( "ln3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber3,4 ) );
89
         styleDefs.Add( "ln4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListNumber4,5 ) );
90
         styleDefs.Add( "li0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent0,1 ) );
91
         styleDefs.Add( "li1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent1,2 ) );
92
         styleDefs.Add( "li2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent2,3 ) );
93
         styleDefs.Add( "li3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent3,4 ) );
94
         styleDefs.Add( "li4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_ListIndent4,5 ) );
95
         styleDefs.Add( "la0", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList0, 1 ) );
96
         styleDefs.Add( "la1", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList1, 2 ) );
97
         styleDefs.Add( "la2", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList2, 3 ) );
98
         styleDefs.Add( "la3", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList3, 4 ) );
99
         styleDefs.Add( "la4", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_AlphaList4, 5 ) );
100
 
101
         styleDefs.Add( "code", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_CodeText, 0 ) );
102
         styleDefs.Add( "normal", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Normal  , 0 ) );
103
         styleDefs.Add( "note", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Note  , 0 ) );
104
         styleDefs.Add( "warn", formStyleDef( style_type_e.STYLE_MS_WORD, EA_Constants.styleName_Warning , 0 ) );
105
 
106
      }
107
 
108
      /// <summary>
109
      /// Helper for initialise() function
110
      /// </summary>
111
      /// <param name="styleType"></param>
112
      /// <param name="styleName"></param>
113
      /// <returns></returns>
114
      private static token_type formStyleDef(style_type_e styleType, string styleName, int genvar1)
115
      {
116
         token_type tokenType = new token_type();
117
         tokenType.styleType = styleType;
118
         tokenType.styleName = styleName;
119
         tokenType.txt = null;
120
         tokenType.gen1 = genvar1;
121
         return tokenType;
122
      }
123
 
124
 
125
 
126
      /// <summary>
127
      /// Parse the notes of an element and use the results to form document content.
128
      /// </summary>
129
      /// <param name="theElement"></param>
130
      /// <param name="defaultStyle"></param>
131
      /// <returns></returns>
132
      public static bool parse(string s, int id, string defaultStyle, float indent_pts, bool continuation)
133
      {
134
         int pos;
135
         int pos_LeftBracket = 0;
136
         int pos_RightBracket = 0;
137
         int pos_tagName;
138
         int pos_ContentStart;
139
         int pos_ContentEnd;
140
         bool lookingForTagEnd;
141
         bool foundError = false;
142
         Word.Range wr_body;
143
 
144
         // get relative (to 2.5cm) indentation user has commanded. At 2.5cm, the pts value is 70.866. We only indent, never outdent
145
         // so end stop at 0.
146
         float relative_indent_adjustment = 0;
147
         if (indent_pts > 0)
148
         {
149
            relative_indent_adjustment = indent_pts - (float)70.866;
150
            if (relative_indent_adjustment < 0)
151
               relative_indent_adjustment = 0;
152
         }
153
 
154
         token_type token;
155
         ArrayList tokens = new ArrayList();
156
 
157
         // default starting token - may be updated later
158
         token.styleName = EA_Constants.styleName_Body1;
159
         token.styleType = style_type_e.STYLE_MS_WORD;
160
         token.txt = null;
161
         token.gen1 = 0;
162
 
163
         lookingForTagEnd = false;
164
         pos_ContentStart = 0;
165
 
166
         // look for a tag
167
         pos = s.IndexOf("<", 0);
168
         while ((pos >= 0) && (pos < s.Length))
169
         {
2128 ghuddy 170
            if (createWordDoc.abortCreationThread)
171
               return false;
172
 
2126 ghuddy 173
            // record position of tag
174
            pos_LeftBracket = pos;
175
 
176
            // tag name begins at the next char
177
            pos_tagName = pos_LeftBracket + 1;
178
 
179
            // Check if this is a closing tag
180
            bool isEnding = false;
181
            if (pos < (s.Length-1))
182
            {
183
               if (s[pos+1] == '/')
184
               {
185
                  // skip past the / char
186
                  isEnding = true;
187
                  pos_tagName++;
188
               }
189
            }
190
 
191
            // We found a possible tag, now figure out if this is one of the tags we recognise
192
            bool found = false;
193
 
194
            // look for the closing bracket of the tag
195
            pos = s.IndexOf(">", pos+1);
196
            if (pos >= 0)
197
            {
198
               found = styleDefs.Contains(s.Substring(pos_tagName, pos - pos_tagName));
199
            }
200
            else
201
            {
202
               // Cannot find any '>' so we should just exit the loop
203
               break;
204
            }
205
 
206
            // if the tag was recognised
207
            if (found)
208
            {
209
               // record position of the closing bracket of the tag
210
               pos_RightBracket = pos;
211
 
212
               // if this is an end tag, ie. </tagname>
213
               if (isEnding)
214
               {
215
                  pos_ContentEnd = pos_LeftBracket - 1;  // not sure if we really need to compute pos_ContentEnd
216
 
217
                  // check for out of sequence error
218
                  if (!lookingForTagEnd)
219
                  {
220
                     if (!foundError)
221
                     {
222
                        Main.WriteOutput( string.Format("ERROR, Found out of sequence style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
223
                        foundError = true;
224
                     }
225
                  }
226
                  else
227
                  {
228
                     // Check that the end tag matches the start tag
229
                     token_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);
230
                     if (token.styleName == tt.styleName && 
231
                        token.styleType == tt.styleType)
232
                     {
233
                        // Update the token's text field and add the now complete token to our list
234
                        // for processing a little later on.
235
                        token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);
236
                        tokens.Add(token);
237
 
238
                        // re-initialise token for next tag search
239
                        token.styleName = EA_Constants.styleName_Body1;
240
                        token.styleType = style_type_e.STYLE_MS_WORD;
241
                        token.txt = null;
242
 
243
                        lookingForTagEnd = false;
244
 
245
                        pos_ContentStart = pos_RightBracket + 1;
246
                     }
247
                     else
248
                     {
249
                        // end tag does not seem to be the same as the starting tag, so ignore it
250
                        if (!foundError)
251
                        {
252
                           Main.WriteOutput(string.Format("ERROR, Found unmatched style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
253
                           foundError = true;
254
                        }
255
                     }
256
                  }
257
               }
258
               else
259
               {
260
                  // If there is content prior to now that has not been consumed, tokenise it now
261
                  if ((pos_LeftBracket - pos_ContentStart) > 0)
262
                  {
263
                     token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);
264
                     tokens.Add(token);
265
                  }
266
 
267
                  if (lookingForTagEnd)
268
                  {
269
                     if (!foundError)
270
                     {
271
                        Main.WriteOutput(string.Format("ERROR, Found nested style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
272
                        foundError = true;
273
                     }
274
                  }
275
                  else
276
                  {
277
                     // update the token variable with this tags atributes from the hash table lookup
278
                     token_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);
279
                     token.styleName = tt.styleName;
280
                     token.styleType = tt.styleType;
281
                     token.gen1      = tt.gen1;
282
                     token.txt = null; // we dont know what the text content will be yet. This is obtained when we encounter the end tag
283
 
284
                     pos_ContentStart = pos_RightBracket + 1;
285
 
286
                     lookingForTagEnd = true;
287
                  }
288
               }
289
            }
290
            else
291
            {
292
               // the tag was not recognised so for now we just treat it as if it were plain text and continue
293
               //pos++;
294
            }
295
 
296
            // look for next tag
297
            pos = s.IndexOf("<", pos);
298
 
299
         } // end of the loop
300
 
301
 
302
         // POST-LOOP operations
303
 
304
         // take care of the last token, if there is one
305
         if (pos_ContentStart < s.Length)
306
         {
307
            // Update the token's text field
308
            token.txt = s.Substring(pos_ContentStart, s.Length - pos_ContentStart);
309
            tokens.Add(token);
310
         }
311
 
312
         if (lookingForTagEnd)
313
         {
314
            if (!foundError)
315
            {
316
               Main.WriteOutput(string.Format("ERROR, Found incomplete style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);
317
               foundError = true;
318
            }
319
         }
320
 
321
 
322
         ArrayList postFormats = new ArrayList();
323
 
324
         // Now process all the tokens we have found
325
         foreach (token_type tt in tokens)
326
         {
2128 ghuddy 327
            if (createWordDoc.abortCreationThread)
328
               return false;
329
 
2126 ghuddy 330
            if (tt.txt != null && tt.txt.Length > 0)
331
            {
332
 
333
               switch (tt.styleType)
334
               {
335
                  case style_type_e.STYLE_EA_DOCGEN:
336
                     switch (tt.styleName)
337
                     {
338
                        case EA_Constants.EA_DocGenTable:
339
                           TabularContent.processTableElement(tt.txt, 0, indent_pts);
340
                           continuation = false;
341
 
342
                           // flag list numbering restart
343
                           postFormats.Add( new postFormat_type(null, tt) );
344
                           break;
345
 
346
                        default:
347
                           break;
348
                     }
349
                     break;
350
 
351
                  case style_type_e.STYLE_MS_WORD:
352
                     switch (tt.styleName)
353
                     {
354
                        // List all formatting that has to be done after all of the text has been inserted into the 
355
                        // document ie. post-formatting
356
                        case EA_Constants.styleName_Bold:
357
                        case EA_Constants.styleName_Italic:
358
                        case EA_Constants.styleName_Underline:
359
                        case EA_Constants.styleName_ListBullet0:
360
                        case EA_Constants.styleName_ListBullet1:
361
                        case EA_Constants.styleName_ListBullet2:
362
                        case EA_Constants.styleName_ListBullet3:
363
                        case EA_Constants.styleName_ListBullet4:
364
                        case EA_Constants.styleName_ListNumber0:
365
                        case EA_Constants.styleName_ListNumber1:
366
                        case EA_Constants.styleName_ListNumber2:
367
                        case EA_Constants.styleName_ListNumber3:
368
                        case EA_Constants.styleName_ListNumber4:
369
                        case EA_Constants.styleName_ListIndent0:
370
                        case EA_Constants.styleName_ListIndent1:
371
                        case EA_Constants.styleName_ListIndent2:
372
                        case EA_Constants.styleName_ListIndent3:
373
                        case EA_Constants.styleName_ListIndent4:
374
                        case EA_Constants.styleName_AlphaList0 :
375
                        case EA_Constants.styleName_AlphaList1 :
376
                        case EA_Constants.styleName_AlphaList2 :
377
                        case EA_Constants.styleName_AlphaList3 :
378
                        case EA_Constants.styleName_AlphaList4 :
379
                           wr_body = TextualContent.appendAndSelectText( tt.txt, defaultStyle, continuation );
380
                           continuation = true;
381
                           if (wr_body.Characters.Last.Text.Equals("\r"))
382
                              wr_body.End = wr_body.End - 1;  // don't format the \r char at the end - doing so causes wierd ms-word exceptions later on
383
                           postFormats.Add( new postFormat_type(wr_body, tt) );
384
                           break;
385
 
386
                        case EA_Constants.styleName_CodeText:
387
                        case EA_Constants.styleName_Normal  :
388
                        case EA_Constants.styleName_Note    :
389
                        case EA_Constants.styleName_Warning :
390
                           wr_body = TextualContent.appendAndSelectText( tt.txt, tt.styleName, continuation );
391
                           continuation = true;
392
                           if (indent_pts > 0)
393
                              wr_body.ParagraphFormat.LeftIndent = indent_pts;
394
 
395
                           // flag list numbering restart
396
                           postFormats.Add( new postFormat_type(null, tt) );
397
                           break;
398
 
399
 
400
                        // List all other formatting that can be done immediately.
401
                        case EA_Constants.styleName_Body1:
402
                           wr_body = TextualContent.appendAndSelectText( tt.txt, defaultStyle, continuation );
403
                           continuation = true;
404
                           if (indent_pts > 0)
405
                              wr_body.ParagraphFormat.LeftIndent = indent_pts;
406
 
407
                           // flag list numbering restart if this is printable text.
408
                           if (tt.txt.Trim().Length > 0)
409
                              postFormats.Add( new postFormat_type(null, tt) );
410
                           break;
411
 
412
                        default:
413
                           break;
414
                     }
415
                     break;
416
 
417
                  default:
418
                     break;
419
               }
420
            }
421
         }
422
 
423
         // Now apply post formatting commands to text already serialised in previous loop
424
         int last_list_level = 0;
425
         foreach (postFormat_type pf in postFormats)
426
         {
427
            object style;
428
 
2128 ghuddy 429
            if (createWordDoc.abortCreationThread)
430
               return false;
431
 
2126 ghuddy 432
            // a null word range implies we must restart numbering for any lists
433
            if (pf.m_wr == null)
434
            {
435
               last_list_level = 0;
436
            }
437
            else
438
            {
439
               switch (pf.m_tk.styleName)
440
               {
441
                  case EA_Constants.styleName_Bold:
442
                     pf.m_wr.Select();
443
                     createWordDoc.WordApp.Selection.Range.Bold = 1;
444
                     last_list_level = 0;
445
                     break;
446
 
447
                  case EA_Constants.styleName_Italic:
448
                     pf.m_wr.Select();
449
                     createWordDoc.WordApp.Selection.Range.Italic = 1;
450
                     last_list_level = 0;
451
                     break;
452
 
453
                  case EA_Constants.styleName_Underline:
454
                     pf.m_wr.Select();
455
                     createWordDoc.WordApp.Selection.Range.Underline = Word.WdUnderline.wdUnderlineSingle;
456
                     last_list_level = 0;
457
                     break;
458
 
459
                  case EA_Constants.styleName_ListBullet0:
460
                  case EA_Constants.styleName_ListBullet1:
461
                  case EA_Constants.styleName_ListBullet2:
462
                  case EA_Constants.styleName_ListBullet3:
463
                  case EA_Constants.styleName_ListBullet4:
464
                  case EA_Constants.styleName_ListIndent0:
465
                  case EA_Constants.styleName_ListIndent1:
466
                  case EA_Constants.styleName_ListIndent2:
467
                  case EA_Constants.styleName_ListIndent3:
468
                  case EA_Constants.styleName_ListIndent4:
469
                  case EA_Constants.styleName_ListNumber0:
470
                  case EA_Constants.styleName_ListNumber1:
471
                  case EA_Constants.styleName_ListNumber2:
472
                  case EA_Constants.styleName_ListNumber3:
473
                  case EA_Constants.styleName_ListNumber4:
474
                  case EA_Constants.styleName_AlphaList0 :
475
                  case EA_Constants.styleName_AlphaList1 :
476
                  case EA_Constants.styleName_AlphaList2 :
477
                  case EA_Constants.styleName_AlphaList3 :
478
                  case EA_Constants.styleName_AlphaList4 :
479
                     style = pf.m_tk.styleName;
480
                     pf.m_wr.Select();
481
                     createWordDoc.WordApp.Selection.Range.set_Style(ref style);
482
 
483
                     //Main.WriteOutput(string.Format("last list level {0}, this list level {1}", last_list_level, pf.m_tk.gen1), -1);
484
 
485
                     // Figure out if we have to restart numbering
486
                     if (last_list_level < pf.m_tk.gen1)
487
                     {
488
                        Word.ListTemplate lt;
489
                        object continuePreviousList = false;
490
                        object applyTo = Word.WdListApplyTo.wdListApplyToWholeList;
491
                        object defListBehavour = Word.WdDefaultListBehavior.wdWord10ListBehavior;
492
 
493
                        if ((pf.m_tk.styleName.IndexOf("Alpha") >= 0) || (pf.m_tk.styleName.IndexOf("Number") >= 0))
494
                        {
495
                           lt = createWordDoc.WordApp.Selection.Range.ListFormat.ListTemplate;
496
 
497
                           createWordDoc.WordApp.Selection.Range.ListFormat.ApplyListTemplate(
498
                              lt, ref continuePreviousList, ref applyTo, ref defListBehavour);
499
                        }
500
                     }
501
 
502
                     // shift content right by relative indent adjustment we calculated earlier
503
                     if (relative_indent_adjustment > 0)
504
                     {
505
                        createWordDoc.WordApp.Selection.Range.ParagraphFormat.LeftIndent += relative_indent_adjustment;
506
                     }
507
 
508
                     last_list_level = pf.m_tk.gen1;
509
                     break;
510
                  default:
511
                     break;
512
               }
513
            }
514
         }
515
         return true;
516
      }
517
 
518
 
519
 
520
 
521
 
522
	}
523
}