lemmatized-entity-parsers.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. import { AppConfig } from 'src/app/app.config';
  2. import { ParserRegister, xmlParser } from '.';
  3. import {
  4. GenericElement, LemmatizedEntitiesList, LemmatizedEntity, LemmatizedEntityInfo, LemmatizedEntityLabel,
  5. LemmatizedEntityRef, LemmatizedEntityType, Relation, XMLElement,
  6. } from '../../models/evt-models';
  7. import { xpath } from '../../utils/dom-utils';
  8. import { replaceNewLines } from '../../utils/xml-utils';
  9. import { AttributeMapParser, AttributeParser, EmptyParser, GenericElemParser, TextParser } from './basic-parsers';
  10. import { createParser, parseChildren, Parser } from './parser-models';
  11. export const lemmatizedEntitiesListsTagNamesMap: { [key: string]: string } = {
  12. lemmas: 'list',
  13. occurrences: 'w[ref], lem[ref]',
  14. };
  15. // error ? FS
  16. export function getLemListType(tagName): LemmatizedEntityType {
  17. return tagName.toLowerCase();
  18. }
  19. export function getLemListsToParseTagNames() {
  20. const neLemListsConfig = AppConfig.evtSettings.edition.lemmatizedEntitiesLists || {};
  21. return Object.keys(neLemListsConfig)
  22. .map((i) => neLemListsConfig[i].enabled ? lemmatizedEntitiesListsTagNamesMap[i] : undefined)
  23. .filter(ne => !!ne);
  24. }
  25. @xmlParser('evt-lemmatized-entities-list-parser', LemmatizedEntitiesListParser)
  26. export class LemmatizedEntitiesListParser extends EmptyParser implements Parser<XMLElement> {
  27. private neLemListsConfig = AppConfig.evtSettings.edition.lemmatizedEntitiesLists || {};
  28. attributeParser = createParser(AttributeParser, this.genericParse);
  29. parse(xml: XMLElement): LemmatizedEntitiesList {
  30. const parsedLemList: LemmatizedEntitiesList = {
  31. type: LemmatizedEntitiesList,
  32. id: xml.getAttribute('xml:id') || xpath(xml),
  33. label: '',
  34. lemmatizedEntityType: getLemListType(xml.tagName),
  35. content: [],
  36. sublists: [],
  37. originalEncoding: xml,
  38. relations: [],
  39. description: [],
  40. attributes: this.attributeParser.parse(xml),
  41. };
  42. const relationParse = createParser(RelationParser, this.genericParse);
  43. xml.childNodes.forEach((child: XMLElement) => {
  44. if (child.nodeType === 1) {
  45. switch (child.tagName.toLowerCase()) {
  46. case 'head':
  47. parsedLemList.label = replaceNewLines(child.textContent);
  48. break;
  49. case 'desc':
  50. parsedLemList.description.push(this.genericParse(child));
  51. break;
  52. case 'relation':
  53. if (this.neLemListsConfig.relations.enabled) {
  54. parsedLemList.relations.push(relationParse.parse(child));
  55. }
  56. break;
  57. case 'listrelation':
  58. if (this.neLemListsConfig.relations.enabled) {
  59. child.querySelectorAll<XMLElement>('relation').forEach(r => parsedLemList.relations.push(relationParse.parse(r)));
  60. }
  61. break;
  62. default:
  63. if (getLemListsToParseTagNames().indexOf(child.tagName) >= 0) {
  64. const subListParser = ParserRegister.get('evt-lemmatized-entities-list-parser');
  65. const parsedSubList = subListParser.parse(child) as LemmatizedEntitiesList;
  66. parsedLemList.sublists.push(parsedSubList);
  67. parsedLemList.content = parsedLemList.content.concat(parsedSubList.content);
  68. parsedLemList.relations = parsedLemList.relations.concat(parsedSubList.relations);
  69. } else {
  70. parsedLemList.content.push(this.genericParse(child) as LemmatizedEntity);
  71. }
  72. }
  73. }
  74. });
  75. parsedLemList.label = parsedLemList.label || xml.getAttribute('type') || `List of ${parsedLemList.lemmatizedEntityType}`;
  76. return parsedLemList;
  77. }
  78. }
  79. @xmlParser('evt-lemmatized-entity-parser', LemmatizedEntityRefParser)
  80. export class LemmatizedEntityRefParser extends EmptyParser implements Parser<XMLElement> {
  81. elementParser = createParser(GenericElemParser, this.genericParse);
  82. attributeParser = createParser(AttributeParser, this.genericParse);
  83. parse(xml: XMLElement): LemmatizedEntityRef | GenericElement {
  84. const ref = xml.getAttribute('ref');
  85. if (!ref) { return this.elementParser.parse(xml); }
  86. const neLemTypeMap: { [key: string]: LemmatizedEntityType } = {
  87. w: 'lem',
  88. lem: 'lem',
  89. item: 'item'
  90. };
  91. return {
  92. type: LemmatizedEntityRef,
  93. entityLemId: getLemEntityID(ref),
  94. entityLemType: neLemTypeMap[xml.tagName.toLowerCase()],
  95. path: xpath(xml),
  96. content: parseChildren(xml, this.genericParse),
  97. attributes: this.attributeParser.parse(xml),
  98. class: xml.tagName.toLowerCase(),
  99. };
  100. }
  101. }
  102. // Generic entity parser
  103. export class EntityParser extends EmptyParser implements Parser<XMLElement> {
  104. // TODO: try to refactor subclasses to use a function parameter to get labels
  105. attributeParsers = createParser(AttributeMapParser, this.genericParse);
  106. parse(xml: XMLElement): LemmatizedEntity {
  107. const elId = xml.getAttribute('xml:id') || xpath(xml);
  108. const label = replaceNewLines(xml.textContent) || 'No info';
  109. const entity: LemmatizedEntity = {
  110. type: LemmatizedEntity,
  111. id: elId,
  112. sortKey: xml.getAttribute('sortKey') || (label ? label[0] : '') || xml.getAttribute('xml:id') || xpath(xml),
  113. originalEncoding: xml,
  114. label,
  115. lemmatizedEntityType: this.getEntityType(xml.tagName),
  116. content: Array.from(xml.children).map((subchild: XMLElement) => this.parseEntityInfo(subchild)),
  117. attributes: this.attributeParsers.parse(xml),
  118. };
  119. return entity;
  120. }
  121. private parseEntityInfo(xml: XMLElement): LemmatizedEntityInfo {
  122. return {
  123. type: LemmatizedEntityInfo,
  124. label: xml.nodeType === 1 ? xml.tagName.toLowerCase() : 'info',
  125. content: [this.genericParse(xml)],
  126. attributes: xml.nodeType === 1 ? this.attributeParsers.parse(xml) : {},
  127. };
  128. }
  129. private getEntityType(tagName): LemmatizedEntityType { return tagName.toLowerCase(); }
  130. }
  131. @xmlParser('item', ItemParser)
  132. export class ItemParser extends EntityParser {
  133. parse(xml: XMLElement): LemmatizedEntity {
  134. return {
  135. ...super.parse(xml),
  136. label: this.getLabel(xml),
  137. };
  138. }
  139. private getLabel(xml: XMLElement) { // TODO: refactor me, also try to use a function parameter for the label for each entity
  140. const itemElement = xml.querySelector<XMLElement>('item');
  141. const wElement = xml.querySelector<XMLElement>('w');
  142. const lemElement = xml.querySelector<XMLElement>('lem');
  143. let label: LemmatizedEntityLabel;
  144. if (itemElement) {
  145. label = replaceNewLines(itemElement.textContent);
  146. } else if (wElement) {
  147. label = wElement ? `${replaceNewLines(wElement.textContent)} ` : '';
  148. } else if (lemElement) {
  149. label = lemElement ? `${replaceNewLines(lemElement.textContent)} ` : '';
  150. }
  151. return label;
  152. }
  153. }
  154. export class EventParser extends EntityParser {
  155. parse(xml: XMLElement): LemmatizedEntity {
  156. return {
  157. ...super.parse(xml),
  158. label: textLabel('label', xml),
  159. };
  160. }
  161. getLabel(xml: XMLElement) {
  162. const eventLabelElement = xml.querySelector<XMLElement>('label');
  163. return (eventLabelElement ? replaceNewLines(eventLabelElement.textContent) : '') || 'No info';
  164. }
  165. }
  166. // @xmlParser('interpGrp', InterpGroupParser)
  167. // export class InterpGroupParser) extends EntityParser {
  168. // parse(xml: XMLElement): LemmatizedEntity { return { ...super.parse(xml), label: this.getLabel(xml) }; }
  169. // private getLabel(xml: XMLElement) { // TODO: refactor me
  170. // const role = xml.getAttribute('xml:id');
  171. // let label: LemmatizedEntityLabel = 'No info';
  172. // if (role) {
  173. // label = role.trim();
  174. // } else {
  175. // label = replaceNewLines(xml.textContent) || 'No info';
  176. // }
  177. // return label;
  178. // }
  179. // }
  180. export class EntityInfoParser extends EmptyParser implements Parser<XMLElement> {
  181. attributeParsers = createParser(AttributeParser, this.genericParse);
  182. parse(xml: XMLElement): LemmatizedEntityInfo {
  183. return {
  184. type: LemmatizedEntityInfo,
  185. label: xml.nodeType === 1 ? xml.tagName.toLowerCase() : 'info',
  186. content: [this.genericParse(xml)],
  187. attributes: xml.nodeType === 1 ? this.attributeParsers.parse(xml) : {},
  188. };
  189. }
  190. }
  191. export class RelationParser extends EmptyParser implements Parser<XMLElement> {
  192. attributeParsers = createParser(AttributeParser, this.genericParse);
  193. entityInfoParser = createParser(EntityInfoParser, this.genericParse);
  194. textParser = createParser(TextParser, this.genericParse);
  195. parse(xml: XMLElement): Relation {
  196. const descriptionEls = xml.querySelectorAll<XMLElement>('desc');
  197. const attributes = this.attributeParsers.parse(xml);
  198. const { name, type } = attributes;
  199. const active = xml.getAttribute('active') || ''; // TODO: make get attributes return '' as default?
  200. const mutual = xml.getAttribute('mutual') || '';
  201. const passive = xml.getAttribute('passive') || '';
  202. const relation: Relation = {
  203. type: Relation,
  204. name,
  205. activeParts: active.replace(/#/g, '').split(' '), // TODO refactor to a single function
  206. mutualParts: mutual.replace(/#/g, '').split(' '),
  207. passiveParts: passive.replace(/#/g, '').split(' '),
  208. relationType: type,
  209. attributes,
  210. content: Array.from(xml.children).map((subchild: XMLElement) => this.entityInfoParser.parse(subchild)),
  211. description: [],
  212. };
  213. if (descriptionEls && descriptionEls.length > 0) {
  214. descriptionEls.forEach((el) => relation.description.push(this.genericParse(el)));
  215. } else {
  216. relation.description = [this.textParser.parse(xml)];
  217. }
  218. const parentListEl = xml.parentElement.tagName === 'listRelation' ? xml.parentElement : undefined;
  219. if (parentListEl) {
  220. relation.relationType = `${(parentListEl.getAttribute('type') || '')} ${(relation.relationType || '')}`.trim();
  221. }
  222. return relation;
  223. }
  224. }
  225. function getLemEntityID(ref: string) { return ref ? ref.replace(/#/g, '') : ''; }
  226. function textLabel(elemName: string, xml: XMLElement) {
  227. const el = xml.querySelector<XMLElement>(elemName);
  228. return (el ? replaceNewLines(el.textContent) : '') || 'No info';
  229. }