WebSVN – DevTools – Blame – /EA_DocGen/trunk/EA_DocGen/TextParser.cs

Rev	Author	Line No.	Line
2126	ghuddy	1	`using System;`
		2	`using System.Collections;`
		3	`using System.Text;`
2136	brianf	4	`using System.Text.RegularExpressions;`
		5	`using Microsoft.Office.Interop.Word;`
2130	ghuddy	6	`using Microsoft.Office.Core;`
2126	ghuddy	7
2130	ghuddy	8
2126	ghuddy	9	`// TODO`
		10	`//`
2130	ghuddy	11	`// 1) Possible let user and a tag simply with </>`
2126	ghuddy	12
		13
		14
		15	`namespace EA_DocGen`
		16	`{`
		17	`/// <summary>`
		18	`/// Summary description for TextParser.`
		19	`/// </summary>`
2130	ghuddy	20	`public class TextParser`
		21	`{`
2126	ghuddy	22	`// An embedded formatting tag can be one of two types`
		23	`// * An EA_DocGen special format`
		24	`// * An MS-Word Style based format`
		25	`private enum style_type_e`
		26	`{`
2130	ghuddy	27	`STYLE_EA_DOCGEN = 0, // style is an EA_DocGen specific style or pattern`
		28	`STYLE_MS_WORD, // style is an MS-WORD style of character/font attribute`
2126	ghuddy	29	`STYLE_UNKNOWN = -1`
		30	`};`
2130	ghuddy	31
		32	`private enum style_handling_e`
		33	`{`
		34	`STYLE_POST_FORMAT, // text will require formatting AFTER the entire string has been serialised.`
		35	`STYLE_USE_TAG_STYLE, // text will require the tag-implied style to be applied to it.`
		36	`STYLE_USE_CALLER_STYLE // text will require the callers specified style to be applied to it.`
		37	`};`
		38
		39	`// IDs for all of the styles we will process.`
		40	`// Note that the arrangement of enum values in the following type, has been made to allow`
		41	`// for some simple comparison tests during parsing, so do not re-arrange these enums without`
		42	`// addressing the necessary code changes in the parsing function.`
		43	`private enum style_id_e`
		44	`{`
		45	`// EA_DocGen styles (0..99)`
		46	`STYLE_ID_TABLE = 0,`
		47	`// add more EA_DocGen styles here when we need to`
		48
		49	`// MS-WORD styles (100..)`
		50	`STYLE_ID_BODY1 = 100,`
		51	`STYLE_ID_BOLD,`
		52	`STYLE_ID_ITALIC,`
		53	`STYLE_ID_UNDERLINE,`
		54	`STYLE_ID_NORMAL,`
		55	`STYLE_ID_WARNING,`
		56	`STYLE_ID_NOTE,`
		57	`STYLE_ID_CODETEXT,`
		58	`// add more MS-WORD styles here when we need to`
		59
		60	`// ALL bullet/list styles must be >= 200 for easy detection during post-formatting`
		61	`STYLE_ID_LIST_BULLET_0 = 200,`
		62	`STYLE_ID_LIST_BULLET_1,`
		63	`STYLE_ID_LIST_BULLET_2,`
		64	`STYLE_ID_LIST_BULLET_3,`
		65	`STYLE_ID_LIST_BULLET_4,`
		66	`STYLE_ID_LIST_INDENT_0,`
		67	`STYLE_ID_LIST_INDENT_1,`
		68	`STYLE_ID_LIST_INDENT_2,`
		69	`STYLE_ID_LIST_INDENT_3,`
		70	`STYLE_ID_LIST_INDENT_4,`
		71	`// Below here go lists that display numbering or lettering and so "restart numbering"`
		72	`// attributes are important`
		73	`STYLE_ID_LIST_NUMBER_0,`
		74	`STYLE_ID_LIST_NUMBER_1,`
		75	`STYLE_ID_LIST_NUMBER_2,`
		76	`STYLE_ID_LIST_NUMBER_3,`
		77	`STYLE_ID_LIST_NUMBER_4,`
		78	`STYLE_ID_LIST_ALPHA_0,`
		79	`STYLE_ID_LIST_ALPHA_1,`
		80	`STYLE_ID_LIST_ALPHA_2,`
		81	`STYLE_ID_LIST_ALPHA_3,`
2136	brianf	82	`STYLE_ID_LIST_ALPHA_4,`
2130	ghuddy	83	`// do not put anything below here`
2136	brianf	84	`STYLE_ID_FONT_COLOR`
2130	ghuddy	85	`};`
		86
2126	ghuddy	87	`// A token is a block of text associated with a tag (style) name and type`
		88	`private struct token_type`
		89	`{`
2130	ghuddy	90	`public string txt; // This is the text content enclosed by the tag`
		91	`public style_type_e styleType; // The type MS-WORD or EA_DocGen`
		92	`public string styleName; // The name of the style`
		93	`public style_id_e styleId; // The id of the style`
		94	`public int level; // level (for list items only)`
		95	`public style_handling_e styleHandling; // Handling attribute for the style`
2126	ghuddy	96	`};`
		97
2130	ghuddy	98	`// Some formatting has to be applied after ALL the text of a description has been`
2126	ghuddy	99	`// appended to the word document. For this formatting, we need to remember the word`
2130	ghuddy	100	`// ranges that denote the text and the style name to apply. So we do it in a list of`
		101	`// the following item.`
2126	ghuddy	102	`private struct postFormat_type`
		103	`{`
2136	brianf	104	`public postFormat_type(Range wr, token_type tk)`
2126	ghuddy	105	`{`
		106	`m_wr = wr;`
		107	`m_tk = tk;`
		108	`}`
2136	brianf	109	`public Range m_wr;`
2126	ghuddy	110	`public token_type m_tk;`
		111	`};`
		112
		113	`// Use a hash table for recording the allowed tags and their attributes, facilitating rapid`
		114	`// lookup during parsing.`
		115	`private static Hashtable styleDefs = null;`
		116
		117
		118	`/// <summary>`
		119	`/// Class initialisation function`
		120	`/// </summary>`
		121	`public static void initialise()`
		122	`{`
		123	`// initialsie the hash table`
		124	`styleDefs = new Hashtable();`
		125
		126	`// This list dictates what tags a user can use in the notes text of an EA element. When adding new items`
2130	ghuddy	127	`// to this list, we may have to update the parsing function obviously.`
2126	ghuddy	128
2130	ghuddy	129	`// The hash key is the short tag name that end-users will use in their descriptions. Tags found in`
2126	ghuddy	130	`// user text is matched to these keys, and the style definition if found can then be used.`
		131
		132	`// EA_DocGen tags`
2130	ghuddy	133	`styleDefs.Add( EA_Constants.EA_DocGenTable, formStyleDef( style_type_e.STYLE_EA_DOCGEN, style_id_e.STYLE_ID_TABLE, EA_Constants.EA_DocGenTable, 0, style_handling_e.STYLE_USE_TAG_STYLE ) );`
2126	ghuddy	134
		135	`// MS-Word formatting tags`
2130	ghuddy	136	`styleDefs.Add( "b", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_BOLD, EA_Constants.styleName_Bold , 0, style_handling_e.STYLE_POST_FORMAT) );`
		137	`styleDefs.Add( "i", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_ITALIC, EA_Constants.styleName_Italic , 0, style_handling_e.STYLE_POST_FORMAT) );`
		138	`styleDefs.Add( "u", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_UNDERLINE, EA_Constants.styleName_Underline , 0, style_handling_e.STYLE_POST_FORMAT) );`
2136	brianf	139	`styleDefs.Add("font", formStyleDef(style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_FONT_COLOR, EA_Constants.styleName_Font_Color, 0, style_handling_e.STYLE_POST_FORMAT));`
2126	ghuddy	140
2130	ghuddy	141	`styleDefs.Add( "lb0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_0, EA_Constants.styleName_ListBullet0,1, style_handling_e.STYLE_POST_FORMAT ) );`
		142	`styleDefs.Add( "lb1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_1, EA_Constants.styleName_ListBullet1,2, style_handling_e.STYLE_POST_FORMAT ) );`
		143	`styleDefs.Add( "lb2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_2, EA_Constants.styleName_ListBullet2,3, style_handling_e.STYLE_POST_FORMAT ) );`
		144	`styleDefs.Add( "lb3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_3, EA_Constants.styleName_ListBullet3,4, style_handling_e.STYLE_POST_FORMAT ) );`
		145	`styleDefs.Add( "lb4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_BULLET_4, EA_Constants.styleName_ListBullet4,5, style_handling_e.STYLE_POST_FORMAT ) );`
		146	`styleDefs.Add( "ln0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_0, EA_Constants.styleName_ListNumber0,1, style_handling_e.STYLE_POST_FORMAT ) );`
		147	`styleDefs.Add( "ln1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_1, EA_Constants.styleName_ListNumber1,2, style_handling_e.STYLE_POST_FORMAT ) );`
		148	`styleDefs.Add( "ln2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_2, EA_Constants.styleName_ListNumber2,3, style_handling_e.STYLE_POST_FORMAT ) );`
		149	`styleDefs.Add( "ln3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_3, EA_Constants.styleName_ListNumber3,4, style_handling_e.STYLE_POST_FORMAT ) );`
		150	`styleDefs.Add( "ln4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_NUMBER_4, EA_Constants.styleName_ListNumber4,5, style_handling_e.STYLE_POST_FORMAT ) );`
		151	`styleDefs.Add( "li0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_0, EA_Constants.styleName_ListIndent0,1, style_handling_e.STYLE_POST_FORMAT ) );`
		152	`styleDefs.Add( "li1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_1, EA_Constants.styleName_ListIndent1,2, style_handling_e.STYLE_POST_FORMAT ) );`
		153	`styleDefs.Add( "li2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_2, EA_Constants.styleName_ListIndent2,3, style_handling_e.STYLE_POST_FORMAT ) );`
		154	`styleDefs.Add( "li3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_3, EA_Constants.styleName_ListIndent3,4, style_handling_e.STYLE_POST_FORMAT ) );`
		155	`styleDefs.Add( "li4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_INDENT_4, EA_Constants.styleName_ListIndent4,5, style_handling_e.STYLE_POST_FORMAT ) );`
		156	`styleDefs.Add( "la0", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_0, EA_Constants.styleName_AlphaList0, 1, style_handling_e.STYLE_POST_FORMAT ) );`
		157	`styleDefs.Add( "la1", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_1, EA_Constants.styleName_AlphaList1, 2, style_handling_e.STYLE_POST_FORMAT ) );`
		158	`styleDefs.Add( "la2", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_2, EA_Constants.styleName_AlphaList2, 3, style_handling_e.STYLE_POST_FORMAT ) );`
		159	`styleDefs.Add( "la3", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_3, EA_Constants.styleName_AlphaList3, 4, style_handling_e.STYLE_POST_FORMAT ) );`
		160	`styleDefs.Add( "la4", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_LIST_ALPHA_4, EA_Constants.styleName_AlphaList4, 5, style_handling_e.STYLE_POST_FORMAT ) );`
2126	ghuddy	161
2130	ghuddy	162	`styleDefs.Add( "code", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_CODETEXT, EA_Constants.styleName_CodeText, 0, style_handling_e.STYLE_USE_TAG_STYLE ) );`
		163	`styleDefs.Add( "normal", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_NORMAL, EA_Constants.styleName_Normal , 0, style_handling_e.STYLE_USE_TAG_STYLE ) );`
		164	`styleDefs.Add( "note", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_NOTE, EA_Constants.styleName_Note , 0, style_handling_e.STYLE_USE_TAG_STYLE ) );`
		165	`styleDefs.Add( "warn", formStyleDef( style_type_e.STYLE_MS_WORD, style_id_e.STYLE_ID_WARNING, EA_Constants.styleName_Warning , 0, style_handling_e.STYLE_USE_TAG_STYLE ) );`
2126	ghuddy	166	`}`
		167
		168	`/// <summary>`
		169	`/// Helper for initialise() function`
		170	`/// </summary>`
		171	`/// <param name="styleName"></param>`
2130	ghuddy	172	`/// <param name="styleName"></param>`
2126	ghuddy	173	`/// <returns></returns>`
2130	ghuddy	174	`private static token_type formStyleDef(style_type_e styleType, style_id_e styleId, string styleName, int levelvar, style_handling_e styleHandling)`
2126	ghuddy	175	`{`
		176	`token_type tokenType = new token_type();`
2130	ghuddy	177	`tokenType.styleType = styleType; // The kind of style (ms-word or ea_docgen)`
		178	`tokenType.styleId = styleId; // The specific type of style`
		179	`tokenType.styleName = styleName; // The name of the style`
		180	`tokenType.txt = null; // This is the actual text to be formatted`
		181	`tokenType.level = levelvar; // used to record level numbering - only really useful for bullet/list styles`
		182	`tokenType.styleHandling = styleHandling;`
2126	ghuddy	183	`return tokenType;`
		184	`}`
		185
		186
2130	ghuddy	187	`private static void initialiseToken(out token_type token)`
		188	`{`
		189	`token.styleName = EA_Constants.styleName_Body1;`
		190	`token.styleId = style_id_e.STYLE_ID_BODY1;`
		191	`token.styleType = style_type_e.STYLE_MS_WORD;`
		192	`token.styleHandling = style_handling_e.STYLE_USE_CALLER_STYLE;`
		193	`token.txt = null;`
		194	`token.level = 0;`
		195	`}`
2126	ghuddy	196
2134	ghuddy	197
		198	`private static string convert_EA7_1_RTF_ListTag(string s, string open, string close, string replacement_open, string replacement_close)`
		199	`{`
		200	`int pos;`
		201	`int pos_2;`
		202
		203	`pos = s.IndexOf(open,0);`
		204	`if (pos >= 0)`
		205	`{`
		206	`while (pos >= 0)`
		207	`{`
		208	`pos_2 = s.IndexOf(close,pos);`
		209
		210	`string seg = s.Substring(pos+4, pos_2 - (pos+4));`
		211	`seg = seg.Replace("\t<li>", replacement_open);`
		212	`seg = seg.Replace("</li>", replacement_close);`
		213
		214	`s = s.Substring(0, pos) + seg + s.Substring(pos_2+5, s.Length - (pos_2+5));`
		215
		216	`pos = s.IndexOf(open,0);`
		217	`}`
		218	`}`
		219	`return s;`
		220	`}`
		221
		222
		223
		224
2126	ghuddy	225	`/// <summary>`
		226	`/// Parse the notes of an element and use the results to form document content.`
		227	`/// </summary>`
		228	`/// <param name="theElement"></param>`
2130	ghuddy	229	`/// <param name="callerStyle"></param>`
2126	ghuddy	230	`/// <returns></returns>`
2130	ghuddy	231	`public static bool parse(string s, int id, string callerStyle, float indent_pts, bool continuation)`
2126	ghuddy	232	`{`
		233	`int pos;`
		234	`int pos_LeftBracket = 0;`
		235	`int pos_RightBracket = 0;`
		236	`int pos_tagName;`
		237	`int pos_ContentStart;`
		238	`int pos_ContentEnd;`
		239	`bool lookingForTagEnd;`
		240	`bool foundError = false;`
2136	brianf	241	`Range wr_body;`
2126	ghuddy	242
2134	ghuddy	243	`// Convert EA7.1 embedded RTF controls into EA_DocGen controls where possible`
		244	`s = convert_EA7_1_RTF_ListTag(s, "<ol>\r\n", "</ol>", "<ln0>", "</ln0>");`
		245	`s = convert_EA7_1_RTF_ListTag(s, "<ul>\r\n", "</ul>", "<lb0>", "</lb0>");`
2130	ghuddy	246
2134	ghuddy	247	`// In EAv7.1, Sparx allows users to embed RTF into notes text. RTF uses <> to enclose controls just as EA_DocGen`
		248	`// does for its own controls which this function decodes. However, EA_DocGen controls are entered as text by`
		249	`// human users and so EAv7.1 converts < and > chars into a form such that they do not look like RTF tags.`
		250	`// We have to convert EA's escaping mechanism back into ascii text chars otherwise the parser wont work.`
		251	`// There may be issues to resolve with this parser if EAv7.1 users have used any of EA's RTF editing features`
		252	`// because if they do, the < and > chars there will not be escaped and the parser will find them and try to`
		253	`// decode them. Most will probably not be decodable.`
2136	brianf	254	`s = TextualContent.HtmlDecode(s);`
2134	ghuddy	255
2136	brianf	256
2130	ghuddy	257	`// Begin to construct a range that will eventually encompass ALL of the text we will serialize during the execution`
		258	`// of this function. This is needed later only if the caller has specified a requirement style that needs to have`
		259	`// global strikethrough or italicising applied to (all of) the text`
		260	`object startLocation;`
		261	`object endLocation;`
		262	`startLocation = createWordDoc.WordDocument.Content.End - 1;`
		263
		264	`// Requirement element text must be indented according to the level number of the requirement tag. Caller`
		265	`// passes in absolute indentation value but for some items such as bullets/lists, we need a relative adjustment`
		266	`// since bullet/list styles have their own indentation settings and we only want to offset them rather than overwrite`
		267	`// them.`
		268	`// Get relative (to 2.5cm) indentation user has commanded. The 2.5cm mark is the standard point where body 1 text`
		269	`// begins. At 2.5cm, the pts value is 70.866. We only indent, never outdent so end stop at 0.`
2126	ghuddy	270	`float relative_indent_adjustment = 0;`
		271	`if (indent_pts > 0)`
		272	`{`
		273	`relative_indent_adjustment = indent_pts - (float)70.866;`
		274	`if (relative_indent_adjustment < 0)`
		275	`relative_indent_adjustment = 0;`
		276	`}`
		277
2130	ghuddy	278	`// A working variable and a list for completed tokens`
2126	ghuddy	279	`token_type token;`
		280	`ArrayList tokens = new ArrayList();`
		281
		282	`// default starting token - may be updated later`
2130	ghuddy	283	`initialiseToken(out token);`
2126	ghuddy	284
2130	ghuddy	285
2126	ghuddy	286	`lookingForTagEnd = false;`
		287	`pos_ContentStart = 0;`
		288
2130	ghuddy	289	`// PARSING LOOP -`
		290	`// Break up the input string into tokens that identify what kind of action is to be performed`
		291	`// with the token text. The default is as seen just above - apply Body1 MS-Word style. However,`
		292	`// if user has used formatting tags, the token style name, ID, and type will be updated accordingly.`
		293	`// This must be done iteratively until we have exhausted the input string.`
		294
2126	ghuddy	295	`// look for a tag`
		296	`pos = s.IndexOf("<", 0);`
		297	`while ((pos >= 0) && (pos < s.Length))`
		298	`{`
2128	ghuddy	299	`if (createWordDoc.abortCreationThread)`
		300	`return false;`
		301
2126	ghuddy	302	`// record position of tag`
		303	`pos_LeftBracket = pos;`
		304
		305	`// tag name begins at the next char`
		306	`pos_tagName = pos_LeftBracket + 1;`
		307
		308	`// Check if this is a closing tag`
		309	`bool isEnding = false;`
		310	`if (pos < (s.Length-1))`
		311	`{`
		312	`if (s[pos+1] == '/')`
		313	`{`
		314	`// skip past the / char`
		315	`isEnding = true;`
		316	`pos_tagName++;`
		317	`}`
		318	`}`
2130	ghuddy	319
2126	ghuddy	320	`// We found a possible tag, now figure out if this is one of the tags we recognise`
		321	`bool found = false;`
		322
		323	`// look for the closing bracket of the tag`
		324	`pos = s.IndexOf(">", pos+1);`
		325	`if (pos >= 0)`
		326	`{`
2136	brianf	327	`string sTag = s.Substring(pos_tagName, pos - pos_tagName);`
		328
		329	`if (!isEnding)`
		330	`{`
		331	`// check for font start tag with color attribute`
		332	`Regex re = new Regex("font.?color=\"#(?<color>[0-9A-F])\"",RegexOptions.IgnoreCase);`
		333	`if (re.IsMatch(sTag))`
		334	`sTag = "font";`
		335	`}`
		336
2130	ghuddy	337	`// use hash table to identify the tag`
2136	brianf	338	`found = styleDefs.Contains(sTag);`
2126	ghuddy	339	`}`
		340	`else`
		341	`{`
		342	`// Cannot find any '>' so we should just exit the loop`
		343	`break;`
		344	`}`
		345
		346	`// if the tag was recognised`
		347	`if (found)`
		348	`{`
		349	`// record position of the closing bracket of the tag`
		350	`pos_RightBracket = pos;`
		351
		352	`// if this is an end tag, ie. </tagname>`
		353	`if (isEnding)`
		354	`{`
		355	`pos_ContentEnd = pos_LeftBracket - 1; // not sure if we really need to compute pos_ContentEnd`
2130	ghuddy	356
2126	ghuddy	357	`// check for out of sequence error`
		358	`if (!lookingForTagEnd)`
		359	`{`
		360	`if (!foundError)`
		361	`{`
		362	`Main.WriteOutput( string.Format("ERROR, Found out of sequence style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);`
		363	`foundError = true;`
		364	`}`
		365	`}`
		366	`else`
		367	`{`
		368	`// Check that the end tag matches the start tag`
2130	ghuddy	369	`// NOTE: If we were to allow lazy end-tagging (ie using '</>' only) we would have to do away with this`
		370	`// check.`
2126	ghuddy	371	`token_type tt = ((token_type)styleDefs[s.Substring(pos_tagName, pos_RightBracket-pos_tagName)]);`
2130	ghuddy	372	`if (token.styleType == tt.styleType &&`
		373	`token.styleId == tt.styleId &&`
		374	`token.styleName == tt.styleName)`
2126	ghuddy	375	`{`
		376	`// Update the token's text field and add the now complete token to our list`
		377	`// for processing a little later on.`
		378	`token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);`
		379	`tokens.Add(token);`
		380
		381	`// re-initialise token for next tag search`
2130	ghuddy	382	`initialiseToken(out token);`
2126	ghuddy	383
		384	`lookingForTagEnd = false;`
		385
		386	`pos_ContentStart = pos_RightBracket + 1;`
		387	`}`
		388	`else`
		389	`{`
		390	`// end tag does not seem to be the same as the starting tag, so ignore it`
		391	`if (!foundError)`
		392	`{`
		393	`Main.WriteOutput(string.Format("ERROR, Found unmatched style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);`
		394	`foundError = true;`
		395	`}`
		396	`}`
		397	`}`
		398	`}`
		399	`else`
		400	`{`
		401	`// If there is content prior to now that has not been consumed, tokenise it now`
		402	`if ((pos_LeftBracket - pos_ContentStart) > 0)`
2136	brianf	403
2126	ghuddy	404	`{`
		405	`token.txt = s.Substring(pos_ContentStart, pos_LeftBracket - pos_ContentStart);`
		406	`tokens.Add(token);`
		407	`}`
2130	ghuddy	408
2126	ghuddy	409	`if (lookingForTagEnd)`
		410	`{`
		411	`if (!foundError)`
		412	`{`
		413	`Main.WriteOutput(string.Format("ERROR, Found nested style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);`
		414	`foundError = true;`
		415	`}`
		416	`}`
		417	`else`
		418	`{`
2130	ghuddy	419	`// update the token variable with this tags atributes from the hash table lookup. This overwrites`
		420	`// the default values assigned when parsing began, or when we resumed parsing after dealing with`
		421	`// the previous token found.`
2136	brianf	422
		423	`string df = s.Substring(pos_tagName, pos_RightBracket-pos_tagName);`
		424
		425	`// check for font tag with color attribute`
		426	`int iFontColor = -1;`
		427	`Regex re = new Regex("font.?color=\"#(?<color>[0-9A-F])\"", RegexOptions.IgnoreCase);`
		428	`if (re.IsMatch(df))`
		429	`{`
		430	`// get color attribute value`
		431	`string sColor = re.Matches(df)[0].Groups["color"].Value;`
		432	`iFontColor = Convert.ToInt32(sColor,16);`
		433	`df = "font";`
		434	`}`
		435
		436	`token_type lookupToken = ((token_type)styleDefs[df]);`
2130	ghuddy	437	`token.styleId = lookupToken.styleId;`
		438	`token.styleType = lookupToken.styleType;`
		439	`token.styleName = lookupToken.styleName;`
2136	brianf	440	`if (token.styleId == style_id_e.STYLE_ID_FONT_COLOR)`
		441	`token.level = iFontColor;`
		442	`else`
		443	`token.level = lookupToken.level;`
2130	ghuddy	444	`token.styleHandling = lookupToken.styleHandling;`
2126	ghuddy	445	`token.txt = null; // we dont know what the text content will be yet. This is obtained when we encounter the end tag`
		446
		447	`pos_ContentStart = pos_RightBracket + 1;`
		448
		449	`lookingForTagEnd = true;`
		450	`}`
		451	`}`
		452	`}`
		453	`else`
		454	`{`
		455	`// the tag was not recognised so for now we just treat it as if it were plain text and continue`
		456	`}`
		457
		458	`// look for next tag`
		459	`pos = s.IndexOf("<", pos);`
		460
		461	`} // end of the loop`
		462
		463
		464	`// take care of the last token, if there is one`
		465	`if (pos_ContentStart < s.Length)`
		466	`{`
		467	`// Update the token's text field`
		468	`token.txt = s.Substring(pos_ContentStart, s.Length - pos_ContentStart);`
		469	`tokens.Add(token);`
		470	`}`
		471
		472	`if (lookingForTagEnd)`
		473	`{`
		474	`if (!foundError)`
		475	`{`
		476	`Main.WriteOutput(string.Format("ERROR, Found incomplete style tag ({0}), generated document text may be formatted incorrectly.", s.Substring(pos_LeftBracket, pos_RightBracket - pos_LeftBracket + 1)), id);`
		477	`foundError = true;`
		478	`}`
		479	`}`
		480
2130	ghuddy	481	`// The way MS-Word works makes it necessary to do some formatting after we have serialised all of the text.`
		482	`// So, we need another list. This will contain elements that have the token AND the word range object that we`
		483	`// obtain when we serialise the text.`
2126	ghuddy	484	`ArrayList postFormats = new ArrayList();`
		485
2130	ghuddy	486	`// SERIALISATION LOOP - Now process all the tokens we have found`
		487	`int tt_i = 0;`
		488	`for (tt_i = 0; tt_i < tokens.Count; tt_i++)`
2126	ghuddy	489	`{`
2130	ghuddy	490	`token_type tt = (token_type)tokens[tt_i];`
		491
2128	ghuddy	492	`if (createWordDoc.abortCreationThread)`
		493	`return false;`
		494
2126	ghuddy	495	`if (tt.txt != null && tt.txt.Length > 0)`
		496	`{`
		497
		498	`switch (tt.styleType)`
		499	`{`
		500	`case style_type_e.STYLE_EA_DOCGEN:`
2130	ghuddy	501	`switch (tt.styleId)`
2126	ghuddy	502	`{`
2130	ghuddy	503	`case style_id_e.STYLE_ID_TABLE:`
2126	ghuddy	504	`TabularContent.processTableElement(tt.txt, 0, indent_pts);`
		505	`continuation = false;`
		506
		507	`// flag list numbering restart`
		508	`postFormats.Add( new postFormat_type(null, tt) );`
		509	`break;`
		510
		511	`default:`
		512	`break;`
		513	`}`
		514	`break;`
		515
		516	`case style_type_e.STYLE_MS_WORD:`
2130	ghuddy	517	`switch (tt.styleHandling)`
2126	ghuddy	518	`{`
2130	ghuddy	519	`case style_handling_e.STYLE_POST_FORMAT:`
		520	`// Replace <br> with actual required characters, and use the caller supplied style when serialising`
		521	`// the text`
		522	`tt.txt = tt.txt.Replace("<br>","\r\n");`
		523
		524	`// Serialise the text, initially applying callers style but since post-formatting will be`
		525	`// done later, the texts appearance will change from what this initially applied style`
		526	`// implies.`
		527	`wr_body = TextualContent.appendAndSelectText( tt.txt, callerStyle, continuation );`
2126	ghuddy	528	`continuation = true;`
2130	ghuddy	529
		530	`while (wr_body.Characters.Last.Text.Equals("\r") \|\| wr_body.Characters.Last.Text.Equals("\n"))`
		531	`wr_body.End = wr_body.End - 1; // don't format the /n or \r char at the end - doing so causes wierd ms-word exceptions later on`
2126	ghuddy	532	`postFormats.Add( new postFormat_type(wr_body, tt) );`
		533	`break;`
		534
2130	ghuddy	535	`case style_handling_e.STYLE_USE_TAG_STYLE:`
		536	`// Replace <br> with actual required characters, and use the caller supplied style when serialising`
		537	`// the text`
		538	`tt.txt = tt.txt.Replace("<br>","\r\n");`
		539
		540	`// Serialise the text, applying the tag's style`
2126	ghuddy	541	`wr_body = TextualContent.appendAndSelectText( tt.txt, tt.styleName, continuation );`
		542	`continuation = true;`
2130	ghuddy	543
		544	`// Indent according to callers specified amount`
2126	ghuddy	545	`if (indent_pts > 0)`
		546	`wr_body.ParagraphFormat.LeftIndent = indent_pts;`
		547
2130	ghuddy	548	`// flag list numbering restart if this is printable text.`
		549	`if (tt.txt.Trim().Length > 0)`
		550	`postFormats.Add( new postFormat_type(null, tt) );`
2126	ghuddy	551	`break;`
		552
		553
2130	ghuddy	554	`case style_handling_e.STYLE_USE_CALLER_STYLE:`
		555	`// Replace <br> with actual required characters, and use the caller supplied style when serialising`
		556	`// the text`
		557	`tt.txt = tt.txt.Replace("<br>","\r\n");`
		558
		559	`// Serialise the text, applying callers style`
		560	`wr_body = TextualContent.appendAndSelectText( tt.txt, callerStyle, continuation );`
2126	ghuddy	561	`continuation = true;`
2130	ghuddy	562
		563	`// Indent according to callers specified amount`
2126	ghuddy	564	`if (indent_pts > 0)`
		565	`wr_body.ParagraphFormat.LeftIndent = indent_pts;`
		566
		567	`// flag list numbering restart if this is printable text.`
		568	`if (tt.txt.Trim().Length > 0)`
		569	`postFormats.Add( new postFormat_type(null, tt) );`
		570	`break;`
		571
		572	`default:`
		573	`break;`
		574	`}`
		575	`break;`
		576
		577	`default:`
		578	`break;`
		579	`}`
		580	`}`
2130	ghuddy	581	`} // end of serialisation loop`
2126	ghuddy	582
2130	ghuddy	583
		584	`// POST-FORMATTING LOOP - Now apply post formatting commands to text already serialised in previous loop`
2126	ghuddy	585	`int last_list_level = 0;`
		586	`foreach (postFormat_type pf in postFormats)`
		587	`{`
		588	`object style;`
		589
2128	ghuddy	590	`if (createWordDoc.abortCreationThread)`
		591	`return false;`
		592
2126	ghuddy	593	`// a null word range implies we must restart numbering for any lists`
		594	`if (pf.m_wr == null)`
		595	`{`
		596	`last_list_level = 0;`
		597	`}`
		598	`else`
		599	`{`
2130	ghuddy	600	`switch (pf.m_tk.styleId)`
2126	ghuddy	601	`{`
2130	ghuddy	602	`case style_id_e.STYLE_ID_BOLD:`
2126	ghuddy	603	`pf.m_wr.Select();`
		604	`createWordDoc.WordApp.Selection.Range.Bold = 1;`
		605	`last_list_level = 0;`
		606	`break;`
		607
2130	ghuddy	608	`case style_id_e.STYLE_ID_ITALIC:`
2126	ghuddy	609	`pf.m_wr.Select();`
		610	`createWordDoc.WordApp.Selection.Range.Italic = 1;`
		611	`last_list_level = 0;`
		612	`break;`
		613
2130	ghuddy	614	`case style_id_e.STYLE_ID_UNDERLINE:`
2126	ghuddy	615	`pf.m_wr.Select();`
2136	brianf	616	`createWordDoc.WordApp.Selection.Range.Underline = WdUnderline.wdUnderlineSingle;`
2126	ghuddy	617	`last_list_level = 0;`
		618	`break;`
		619
2136	brianf	620	`case style_id_e.STYLE_ID_FONT_COLOR:`
		621	`pf.m_wr.Select();`
		622	`createWordDoc.WordApp.Selection.Range.Font.Color = (WdColor) ReverseRGB((UInt32)pf.m_tk.level);`
		623	`last_list_level = 0;`
		624	`break;`
		625
2130	ghuddy	626	`default:`
		627	`// Handle bullets/lists`
		628	`if (pf.m_tk.styleId >= style_id_e.STYLE_ID_LIST_BULLET_0)`
		629	`{`
		630	`style = pf.m_tk.styleName;`
		631	`pf.m_wr.Select();`
		632	`createWordDoc.WordApp.Selection.Range.set_Style(ref style);`
2126	ghuddy	633
2130	ghuddy	634	`// Figure out if we have to restart numbering`
		635	`if (last_list_level < pf.m_tk.level)`
		636	`{`
		637	`// only need to restart numbering if this list displays numbering - bullets and simple`
		638	`// indents do not, but alpha and numeric lists do. The style_id_e type has been organised to`
		639	`// make this a simple test.`
		640	`if (pf.m_tk.styleId >= style_id_e.STYLE_ID_LIST_NUMBER_0)`
		641	`{`
		642	`// To restart numbering, the only way to do it is to (re-)apply the list template to the`
		643	`// selection with a "continue previous list" setting of false, and an "apply to"`
		644	`// setting of "whole list".`
		645	`object continuePreviousList = false;`
2136	brianf	646	`object applyTo = WdListApplyTo.wdListApplyToWholeList;`
		647	`object defListBehavour = WdDefaultListBehavior.wdWord10ListBehavior;`
2130	ghuddy	648
2136	brianf	649	`ListTemplate lt = createWordDoc.WordApp.Selection.Range.ListFormat.ListTemplate;`
2126	ghuddy	650
2130	ghuddy	651	`createWordDoc.WordApp.Selection.Range.ListFormat.ApplyListTemplate(`
		652	`lt, ref continuePreviousList, ref applyTo, ref defListBehavour);`
		653	`}`
		654	`}`
		655
		656	`// shift content right by relative indent adjustment we calculated earlier`
		657	`if (relative_indent_adjustment > 0)`
2126	ghuddy	658	`{`
2130	ghuddy	659	`createWordDoc.WordApp.Selection.Range.ParagraphFormat.LeftIndent += relative_indent_adjustment;`
2126	ghuddy	660	`}`
		661
2130	ghuddy	662	`last_list_level = pf.m_tk.level;`
2126	ghuddy	663	`}`
		664	`break;`
		665	`}`
		666	`}`
2130	ghuddy	667	`} // end of post-formatting loop`
		668
		669	`// Special handling for Proposed and Rejected requirement sections - here the text must be italicies or`
		670	`// struck through (see definition of the styleName_ReqPropBody and styleName_ReqRejBody styles in StyleContent.cs).`
		671	`if (callerStyle.Equals(EA_Constants.styleName_ReqPropBody))`
		672	`{`
		673	`// Complete construction of a range that will encompass ALL of the text we will serialize during the execution`
		674	`// of this function`
		675	`endLocation = createWordDoc.WordDocument.Content.End;`
2136	brianf	676	`Range wr_total = createWordDoc.WordDocument.Range(ref startLocation, ref endLocation);`
2130	ghuddy	677	`while (wr_total.Characters.Last.Text.Equals("\r") \|\| wr_total.Characters.Last.Text.Equals("\n"))`
		678	`wr_total.End = wr_total.End - 1; // don't format the \r\n char at the end - doing so causes wierd ms-word exceptions later on`
		679
		680	`// italicise`
		681	`wr_total.Font.Italic = (int)MsoTriState.msoTrue;`
2126	ghuddy	682	`}`
2130	ghuddy	683	`else if (callerStyle.Equals(EA_Constants.styleName_ReqRejBody))`
		684	`{`
		685	`// Complete construction of a range that will encompass ALL of the text we will serialize during the execution`
		686	`// of this function`
		687	`endLocation = createWordDoc.WordDocument.Content.End;`
2136	brianf	688	`Range wr_total = createWordDoc.WordDocument.Range(ref startLocation, ref endLocation);`
2130	ghuddy	689	`while (wr_total.Characters.Last.Text.Equals("\r") \|\| wr_total.Characters.Last.Text.Equals("\n"))`
		690	`wr_total.End = wr_total.End - 1; // don't format the \r\n char at the end - doing so causes wierd ms-word exceptions later on`
		691
		692	`// strikethrough - use msoCTrue since msoTrue simply toggles strikethough attribute, or so it seems`
		693	`wr_total.Font.StrikeThrough = (int)MsoTriState.msoCTrue;`
		694	`}`
		695
2126	ghuddy	696	`return true;`
		697	`}`
		698
2136	brianf	699	`public static UInt32 ReverseRGB(UInt32 value)`
		700	`{`
		701	`UInt32 i = (value & 0x000000FFU) << 16 \| (value & 0x0000FF00U) \|`
		702	`(value & 0x00FF0000U) >> 16;`
		703	`return i;`
		704	`}`
2126	ghuddy	705
2130	ghuddy	706	`}`
2126	ghuddy	707	`}`

Subversion Repositories DevTools

(root)/EA_DocGen/trunk/EA_DocGen/TextParser.cs – Rev 2136