HtmlParser.ts 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. const eventRegx = /^(click)(\s)*=|(param)(\s)*=/;
  2. const imageAttrReg = /(\s)*src(\s)*=|(\s)*height(\s)*=|(\s)*width(\s)*=|(\s)*align(\s)*=|(\s)*offset(\s)*=|(\s)*click(\s)*=|(\s)*param(\s)*=/;
  3. export interface IHtmlTextParserResultObj {
  4. text?: string;
  5. style?: IHtmlTextParserStack;
  6. }
  7. export interface IHtmlTextParserStack {
  8. color?: string;
  9. size?: number;
  10. event?: { [k: string]: string };
  11. isNewLine?: boolean;
  12. isImage?: boolean;
  13. src?: string;
  14. imageWidth?: number;
  15. imageHeight?: number;
  16. imageOffset?: string;
  17. imageAlign?: string;
  18. underline?: boolean;
  19. strikethrough?: boolean;
  20. offset?: number;
  21. italic?: boolean;
  22. bold?: boolean;
  23. colorGradient?: { lb: string; rb: string; lt: string; rt: string; };
  24. face?: { color: string; dilate: number; softness: number; };
  25. outline?: { color: string, thickness: number };
  26. underlay?: { color: string; x: number; y: number; dilate: number; softness: number; };
  27. glow?: { color: string; offset: number; inner: number; outer: number; power: number; };
  28. }
  29. /**
  30. * A utils class for parsing HTML texts. The parsed results will be an object array.
  31. */
  32. export class HtmlTextParser {
  33. private _specialSymbolArray: Array<[RegExp, string]> = [];
  34. private _stack: IHtmlTextParserStack[] = [];
  35. private _resultObjectArray: IHtmlTextParserResultObj[] = [];
  36. constructor() {
  37. this._specialSymbolArray.push([/&lt;/g, '<']);
  38. this._specialSymbolArray.push([/&gt;/g, '>']);
  39. this._specialSymbolArray.push([/&amp;/g, '&']);
  40. this._specialSymbolArray.push([/&quot;/g, '"']);
  41. this._specialSymbolArray.push([/&apos;/g, '\'']);
  42. this._specialSymbolArray.push([/&nbsp;/g, ' ']);
  43. }
  44. public parse(htmlString: string) {
  45. this._resultObjectArray.length = 0;
  46. this._stack.length = 0;
  47. let startIndex = 0;
  48. const length = htmlString.length;
  49. while (startIndex < length) {
  50. let tagEndIndex = htmlString.indexOf('>', startIndex);
  51. let tagBeginIndex = -1;
  52. if (tagEndIndex >= 0) {
  53. tagBeginIndex = htmlString.lastIndexOf('<', tagEndIndex);
  54. const noTagBegin = tagBeginIndex < (startIndex - 1);
  55. if (noTagBegin) {
  56. tagBeginIndex = htmlString.indexOf('<', tagEndIndex + 1);
  57. tagEndIndex = htmlString.indexOf('>', tagBeginIndex + 1);
  58. }
  59. }
  60. if (tagBeginIndex < 0) {
  61. this._stack.pop();
  62. this._processResult(htmlString.substring(startIndex));
  63. startIndex = length;
  64. } else {
  65. let newStr = htmlString.substring(startIndex, tagBeginIndex);
  66. const tagStr = htmlString.substring(tagBeginIndex + 1, tagEndIndex);
  67. if (tagStr === '') newStr = htmlString.substring(startIndex, tagEndIndex + 1);
  68. this._processResult(newStr);
  69. if (tagEndIndex === -1) {
  70. // cc.error('The HTML tag is invalid!');
  71. tagEndIndex = tagBeginIndex;
  72. } else if (htmlString.charAt(tagBeginIndex + 1) === '/') {
  73. this._stack.pop();
  74. } else {
  75. this._addToStack(tagStr);
  76. }
  77. startIndex = tagEndIndex + 1;
  78. }
  79. }
  80. return this._resultObjectArray;
  81. }
  82. private _attributeToObject(attribute: string) {
  83. attribute = attribute.trim();
  84. const obj: IHtmlTextParserStack = {};
  85. let header = /^(color|size)(\s)*=/.exec(attribute);
  86. let tagName = '';
  87. let nextSpace = 0;
  88. let eventHanlderString = '';
  89. if (header) {
  90. tagName = header[0];
  91. attribute = attribute.substring(tagName.length).trim();
  92. if (attribute === '') {
  93. return obj;
  94. }
  95. // parse color
  96. nextSpace = attribute.indexOf(' ');
  97. switch (tagName[0]) {
  98. case 'c':
  99. if (nextSpace > -1) {
  100. obj.color = attribute.substring(0, nextSpace).trim();
  101. } else {
  102. obj.color = attribute;
  103. }
  104. break;
  105. case 's':
  106. obj.size = parseInt(attribute);
  107. break;
  108. default:
  109. break;
  110. }
  111. // tag has event arguments
  112. if (nextSpace > -1) {
  113. eventHanlderString = attribute.substring(nextSpace + 1).trim();
  114. obj.event = this._processEventHandler(eventHanlderString);
  115. }
  116. return obj;
  117. }
  118. header = /^(br(\s)*\/)/.exec(attribute);
  119. if (header && header[0].length > 0) {
  120. tagName = header[0].trim();
  121. if (tagName.startsWith('br') && tagName[tagName.length - 1] === '/') {
  122. obj.isNewLine = true;
  123. this._resultObjectArray.push({ text: '', style: { isNewLine: true } as IHtmlTextParserStack });
  124. return obj;
  125. }
  126. }
  127. header = /^(img(\s)*src(\s)*=[^>]+\/)/.exec(attribute);
  128. let remainingArgument = '';
  129. let rightQuot = -1;
  130. if (header && header[0].length > 0) {
  131. tagName = header[0].trim();
  132. if (tagName.startsWith('img') && tagName[tagName.length - 1] === '/') {
  133. header = imageAttrReg.exec(attribute);
  134. let tagValue: string;
  135. let isValidImageTag = false;
  136. while (header) {
  137. // skip the invalid tags at first
  138. attribute = attribute.substring(attribute.indexOf(header[0]));
  139. tagName = attribute.substr(0, header[0].length);
  140. const originTagNameLength = tagName.length;
  141. tagName = tagName.replace(/[^a-zA-Z]/g, '').trim();
  142. tagName = tagName.toLowerCase();
  143. // remove space and = character
  144. remainingArgument = attribute.substring(originTagNameLength).trim();
  145. if (tagName === 'src') {
  146. rightQuot = this.getRightQuotationIndex(remainingArgument);
  147. } else {
  148. rightQuot = -1;
  149. }
  150. nextSpace = remainingArgument.indexOf(' ', rightQuot + 1 >= remainingArgument.length ? -1 : rightQuot + 1);
  151. tagValue = (nextSpace > -1) ? remainingArgument.substr(0, nextSpace) : remainingArgument;
  152. attribute = remainingArgument.substring(nextSpace).trim();
  153. if (tagValue.endsWith('/')) {
  154. tagValue = tagValue.slice(0, -1);
  155. }
  156. if (tagName === 'src') {
  157. switch (tagValue.charCodeAt(0)) {
  158. case 34: // "
  159. case 39: // '
  160. isValidImageTag = true;
  161. tagValue = tagValue.slice(1, -1);
  162. break;
  163. default:
  164. break;
  165. }
  166. obj.isImage = true;
  167. obj.src = tagValue;
  168. } else if (tagName === 'height') {
  169. obj.imageHeight = parseInt(tagValue);
  170. } else if (tagName === 'width') {
  171. obj.imageWidth = parseInt(tagValue);
  172. } else if (tagName === 'align') {
  173. switch (tagValue.charCodeAt(0)) {
  174. case 34: // "
  175. case 39: // '
  176. tagValue = tagValue.slice(1, -1);
  177. break;
  178. default:
  179. break;
  180. }
  181. obj.imageAlign = tagValue.toLowerCase();
  182. } else if (tagName === 'offset') {
  183. obj.imageOffset = tagValue;
  184. } else if (tagName === 'click') {
  185. obj.event = this._processEventHandler(`${tagName}=${tagValue}`);
  186. }
  187. if (obj.event && tagName === 'param') {
  188. obj.event[tagName] = tagValue.replace(/^"|"$/g, '');
  189. }
  190. header = imageAttrReg.exec(attribute);
  191. }
  192. if (isValidImageTag && obj.isImage) {
  193. this._resultObjectArray.push({ text: '', style: obj });
  194. }
  195. return {};
  196. }
  197. }
  198. header = /^(cg(\s)*[^>]*)/.exec(attribute);
  199. if (header) {
  200. attribute = header[0].substring("cg".length).trim();
  201. let defaultColorGradientObject = { lb: "#ffffff", rb: "#ffffff", lt: "#ffffff", rt: "#ffffff" };
  202. if (attribute) {
  203. let colorGradientAttrReg = /(\s)*lb(\s)*=|(\s)*rb(\s)*=|(\s)*lt(\s)*=|(\s)*rt(\s)*=/;
  204. header = colorGradientAttrReg.exec(attribute);
  205. let tagValue: string;
  206. let remainingArgument;
  207. while (header) {
  208. //skip the invalid tags at first
  209. attribute = attribute.substring(attribute.indexOf(header[0]));
  210. tagName = attribute.substr(0, header[0].length);
  211. //remove space and = character
  212. remainingArgument = attribute.substring(tagName.length).trim();
  213. nextSpace = remainingArgument.indexOf(' ');
  214. if (nextSpace > -1) {
  215. tagValue = remainingArgument.substr(0, nextSpace);
  216. } else {
  217. tagValue = remainingArgument;
  218. }
  219. tagName = tagName.replace(/[^a-zA-Z]/g, "").trim();
  220. tagName = tagName.toLocaleLowerCase();
  221. attribute = remainingArgument.substring(nextSpace).trim();
  222. if (tagName === "lb") {
  223. defaultColorGradientObject.lb = tagValue;
  224. } else if (tagName === "rb") {
  225. defaultColorGradientObject.rb = tagValue;
  226. } else if (tagName === "lt") {
  227. defaultColorGradientObject.lt = tagValue;
  228. } else if (tagName === "rt") {
  229. defaultColorGradientObject.rt = tagValue;
  230. }
  231. header = colorGradientAttrReg.exec(attribute);
  232. }
  233. }
  234. obj.colorGradient = defaultColorGradientObject;
  235. }
  236. header = /^(face(\s)*[^>]*)/.exec(attribute);
  237. if (header) {
  238. attribute = header[0].substring("face".length).trim();
  239. let defaultFaceObject = { color: "#ffffff", dilate: 0.5, softness: 0.01 };
  240. if (attribute) {
  241. let faceAttrReg = /(\s)*color(\s)*=|(\s)*dilate(\s)*=|(\s)*softness(\s)*=|(\s)*click(\s)*=|(\s)*param(\s)*=/;
  242. header = faceAttrReg.exec(attribute);
  243. let tagValue: string;
  244. let remainingArgument;
  245. while (header) {
  246. //skip the invalid tags at first
  247. attribute = attribute.substring(attribute.indexOf(header[0]));
  248. tagName = attribute.substr(0, header[0].length);
  249. //remove space and = character
  250. remainingArgument = attribute.substring(tagName.length).trim();
  251. nextSpace = remainingArgument.indexOf(' ');
  252. if (nextSpace > -1) {
  253. tagValue = remainingArgument.substr(0, nextSpace);
  254. } else {
  255. tagValue = remainingArgument;
  256. }
  257. tagName = tagName.replace(/[^a-zA-Z]/g, "").trim();
  258. tagName = tagName.toLocaleLowerCase();
  259. attribute = remainingArgument.substring(nextSpace).trim();
  260. if (tagName === "click") {
  261. obj.event = this._processEventHandler(`${tagName}=${tagValue}`);
  262. } else if (tagName === "color") {
  263. defaultFaceObject.color = tagValue;
  264. } else if (tagName === "dilate") {
  265. defaultFaceObject.dilate = Number(tagValue);
  266. } else if (tagName === "softness") {
  267. defaultFaceObject.softness = Number(tagValue);
  268. }
  269. if (obj.event && tagName === 'param') {
  270. obj.event.param = tagValue.replace(/^\"|\"$/g, '');
  271. }
  272. header = faceAttrReg.exec(attribute);
  273. }
  274. }
  275. obj.face = defaultFaceObject;
  276. }
  277. header = /^(outline(\s)*[^>]*)/.exec(attribute);
  278. if (header) {
  279. attribute = header[0].substring('outline'.length).trim();
  280. const defaultOutlineObject = { color: '#ffffff', thickness: 0.1 };
  281. if (attribute) {
  282. const outlineAttrReg = /(\s)*color(\s)*=|(\s)*thickness(\s)*=|(\s)*click(\s)*=|(\s)*param(\s)*=/;
  283. header = outlineAttrReg.exec(attribute);
  284. let tagValue: string;
  285. while (header) {
  286. // skip the invalid tags at first
  287. attribute = attribute.substring(attribute.indexOf(header[0]));
  288. tagName = attribute.substr(0, header[0].length);
  289. // remove space and = character
  290. remainingArgument = attribute.substring(tagName.length).trim();
  291. nextSpace = remainingArgument.indexOf(' ');
  292. if (nextSpace > -1) {
  293. tagValue = remainingArgument.substr(0, nextSpace);
  294. } else {
  295. tagValue = remainingArgument;
  296. }
  297. tagName = tagName.replace(/[^a-zA-Z]/g, '').trim();
  298. tagName = tagName.toLowerCase();
  299. attribute = remainingArgument.substring(nextSpace).trim();
  300. if (tagName === 'click') {
  301. obj.event = this._processEventHandler(`${tagName}=${tagValue}`);
  302. } else if (tagName === 'color') {
  303. defaultOutlineObject.color = tagValue;
  304. } else if (tagName === 'thickness') {
  305. defaultOutlineObject.thickness = Number(tagValue);
  306. }
  307. if (obj.event && tagName === 'param') {
  308. obj.event[tagName] = tagValue.replace(/^"|"$/g, '');
  309. }
  310. header = outlineAttrReg.exec(attribute);
  311. }
  312. }
  313. obj.outline = defaultOutlineObject;
  314. }
  315. header = /^(underlay(\s)*[^>]*)/.exec(attribute);
  316. if (header) {
  317. attribute = header[0].substring("underlay".length).trim();
  318. let defaultUnderlayObject = { color: "#ffffff", x: 0, y: 0, dilate: 0.5, softness: 0.1 };
  319. if (attribute) {
  320. let underlayAttrReg = /(\s)*color(\s)*=|(\s)*x(\s)*=|(\s)*y(\s)*=|(\s)*dilate(\s)*=|(\s)*softness(\s)*=|(\s)*click(\s)*=|(\s)*param(\s)*=/;
  321. header = underlayAttrReg.exec(attribute);
  322. let tagValue: string;
  323. let remainingArgument;
  324. while (header) {
  325. //skip the invalid tags at first
  326. attribute = attribute.substring(attribute.indexOf(header[0]));
  327. tagName = attribute.substr(0, header[0].length);
  328. //remove space and = character
  329. remainingArgument = attribute.substring(tagName.length).trim();
  330. nextSpace = remainingArgument.indexOf(' ');
  331. if (nextSpace > -1) {
  332. tagValue = remainingArgument.substr(0, nextSpace);
  333. } else {
  334. tagValue = remainingArgument;
  335. }
  336. tagName = tagName.replace(/[^a-zA-Z]/g, "").trim();
  337. tagName = tagName.toLocaleLowerCase();
  338. attribute = remainingArgument.substring(nextSpace).trim();
  339. if (tagName === "click") {
  340. obj.event = this._processEventHandler(`${tagName}=${tagValue}`);
  341. } else if (tagName === "color") {
  342. defaultUnderlayObject.color = tagValue;
  343. } else if (tagName === "dilate") {
  344. defaultUnderlayObject.dilate = Number(tagValue);
  345. } else if (tagName === "softness") {
  346. defaultUnderlayObject.softness = Number(tagValue);
  347. } else if (tagName === "x") {
  348. defaultUnderlayObject.x = Number(tagValue);
  349. } else if (tagName === "y") {
  350. defaultUnderlayObject.y = Number(tagValue);
  351. }
  352. if (obj.event && tagName === 'param') {
  353. obj.event.param = tagValue.replace(/^\"|\"$/g, '');
  354. }
  355. header = underlayAttrReg.exec(attribute);
  356. }
  357. }
  358. obj.underlay = defaultUnderlayObject;
  359. }
  360. header = /^(glow(\s)*[^>]*)/.exec(attribute);
  361. if (header) {
  362. attribute = header[0].substring("glow".length).trim();
  363. let defaultGlowObject = { color: "#000000", offset: 0.5, inner: 0.01, outer: 0.01, power: 1 };
  364. if (attribute) {
  365. let glowAttrReg = /(\s)*color(\s)*=|(\s)*offset(\s)*=|(\s)*inner(\s)*=|(\s)*outer(\s)*=|(\s)*power(\s)*=|(\s)*click(\s)*=|(\s)*param(\s)*=/;
  366. header = glowAttrReg.exec(attribute);
  367. let tagValue: string;
  368. let remainingArgument;
  369. while (header) {
  370. //skip the invalid tags at first
  371. attribute = attribute.substring(attribute.indexOf(header[0]));
  372. tagName = attribute.substr(0, header[0].length);
  373. //remove space and = character
  374. remainingArgument = attribute.substring(tagName.length).trim();
  375. nextSpace = remainingArgument.indexOf(' ');
  376. if (nextSpace > -1) {
  377. tagValue = remainingArgument.substr(0, nextSpace);
  378. } else {
  379. tagValue = remainingArgument;
  380. }
  381. tagName = tagName.replace(/[^a-zA-Z]/g, "").trim();
  382. tagName = tagName.toLocaleLowerCase();
  383. attribute = remainingArgument.substring(nextSpace).trim();
  384. if (tagName === "click") {
  385. obj.event = this._processEventHandler(`${tagName}=${tagValue}`);
  386. } else if (tagName === "color") {
  387. defaultGlowObject.color = tagValue;
  388. } else if (tagName === "offset") {
  389. defaultGlowObject.offset = Number(tagValue);
  390. } else if (tagName === "inner") {
  391. defaultGlowObject.inner = Number(tagValue);
  392. } else if (tagName === "outer") {
  393. defaultGlowObject.outer = Number(tagValue);
  394. } else if (tagName === "power") {
  395. defaultGlowObject.power = Number(tagValue);
  396. }
  397. if (obj.event && tagName === 'param') {
  398. obj.event.param = tagValue.replace(/^\"|\"$/g, '');
  399. }
  400. header = glowAttrReg.exec(attribute);
  401. }
  402. }
  403. obj.glow = defaultGlowObject;
  404. }
  405. header = /^(on|u|s|b|i)(\s)*/.exec(attribute);
  406. if (header && header[0].length > 0) {
  407. tagName = header[0];
  408. attribute = attribute.substring(tagName.length).trim();
  409. switch (tagName[0]) {
  410. case 'u':
  411. obj.underline = true;
  412. obj.offset = attribute[0] === "=" ? Number(attribute.slice(1)) : 0;
  413. break;
  414. case 's':
  415. obj.strikethrough = true;
  416. obj.offset = attribute[0] === "=" ? Number(attribute.slice(1)) : 0;
  417. break;
  418. case 'i':
  419. obj.italic = true;
  420. break;
  421. case 'b':
  422. obj.bold = true;
  423. break;
  424. default:
  425. break;
  426. }
  427. if (attribute === '') {
  428. return obj;
  429. }
  430. obj.event = this._processEventHandler(attribute);
  431. }
  432. return obj;
  433. }
  434. // find the right part of the first pair of following quotations.
  435. private getRightQuotationIndex(remainingArgument: string) {
  436. let leftQuot = -1;
  437. let rightQuot = -1;
  438. // Skip a pair of quotations for avoiding spaces in image name are detected.
  439. const leftSingleQuot = remainingArgument.indexOf('\'');
  440. const leftDoubleQuot = remainingArgument.indexOf('"');
  441. const useSingleQuot = leftSingleQuot > -1 && (leftSingleQuot < leftDoubleQuot || leftDoubleQuot === -1);
  442. const useDoubleQuot = leftDoubleQuot > -1 && (leftDoubleQuot < leftSingleQuot || leftSingleQuot === -1);
  443. if (useSingleQuot) {
  444. leftQuot = leftSingleQuot;
  445. rightQuot = remainingArgument.indexOf('\'', leftQuot + 1 >= remainingArgument.length ? -1 : leftQuot + 1);
  446. } else if (useDoubleQuot) {
  447. leftQuot = leftDoubleQuot;
  448. rightQuot = remainingArgument.indexOf('"', leftQuot + 1 >= remainingArgument.length ? -1 : leftQuot + 1);
  449. }
  450. return rightQuot;
  451. }
  452. private _processEventHandler(eventString: string) {
  453. const obj = {};
  454. let index = 0;
  455. let isValidTag = false;
  456. let eventNames = eventRegx.exec(eventString);
  457. while (eventNames) {
  458. let eventName = eventNames[0];
  459. let eventValue = '';
  460. isValidTag = false;
  461. eventString = eventString.substring(eventName.length).trim();
  462. if (eventString.charAt(0) === '"') {
  463. index = eventString.indexOf('"', 1);
  464. if (index > -1) {
  465. eventValue = eventString.substring(1, index).trim();
  466. isValidTag = true;
  467. }
  468. index++;
  469. } else if (eventString.charAt(0) === '\'') {
  470. index = eventString.indexOf('\'', 1);
  471. if (index > -1) {
  472. eventValue = eventString.substring(1, index).trim();
  473. isValidTag = true;
  474. }
  475. index++;
  476. } else {
  477. // skip the invalid attribute value
  478. const match = /(\S)+/.exec(eventString);
  479. if (match) {
  480. eventValue = match[0];
  481. } else {
  482. eventValue = '';
  483. }
  484. index = eventValue.length;
  485. }
  486. if (isValidTag) {
  487. eventName = eventName.substring(0, eventName.length - 1).trim();
  488. obj[eventName] = eventValue;
  489. }
  490. eventString = eventString.substring(index).trim();
  491. eventNames = eventRegx.exec(eventString);
  492. }
  493. return obj;
  494. }
  495. private _addToStack(attribute: string) {
  496. const obj = this._attributeToObject(attribute);
  497. if (this._stack.length === 0) {
  498. this._stack.push(obj);
  499. } else {
  500. if (obj.isNewLine || obj.isImage) {
  501. return;
  502. }
  503. // for nested tags
  504. const previousTagObj = this._stack[this._stack.length - 1];
  505. for (const key in previousTagObj) {
  506. if (!(obj[key])) {
  507. obj[key] = previousTagObj[key];
  508. }
  509. }
  510. this._stack.push(obj);
  511. }
  512. }
  513. private _processResult(value: string) {
  514. if (value.length === 0) {
  515. return;
  516. }
  517. value = this._escapeSpecialSymbol(value);
  518. if (this._stack.length > 0) {
  519. this._resultObjectArray.push({ text: value, style: this._stack[this._stack.length - 1] });
  520. } else {
  521. this._resultObjectArray.push({ text: value } as IHtmlTextParserResultObj);
  522. }
  523. }
  524. private _escapeSpecialSymbol(str: string) {
  525. for (const symbolArr of this._specialSymbolArray) {
  526. const key = symbolArr[0];
  527. const value = symbolArr[1];
  528. str = str.replace(key, value);
  529. }
  530. return str;
  531. }
  532. }