lemmatized-entity-parsers.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. import { AppConfig } from 'src/app/app.config';
  2. import { ParserRegister, xmlParser } from '.';
  3. import {
  4. GenericElement, LemmatizedEntitiesList, LemmatizedEntity, LemmatizedEntityInfo, LemmatizedEntityLabel,
  5. LemmatizedEntityRef, LemmatizedEntityType, Relation, XMLElement,
  6. } from '../../models/evt-models';
  7. import { xpath } from '../../utils/dom-utils';
  8. import { replaceNewLines } from '../../utils/xml-utils';
  9. import { AttributeMapParser, AttributeParser, EmptyParser, GenericElemParser, TextParser } from './basic-parsers';
  10. import { createParser, parseChildren, Parser } from './parser-models';
  11. export const lemmatizedEntitiesListsTagNamesMap: { [key: string]: string } = {
  12. lemmas: 'list',
  13. occurrences: 'w[ref], lem[ref]',
  14. };
  15. // error ? FS
  16. export function getLemListType(tagName): LemmatizedEntityType {
  17. return tagName.toLowerCase();
  18. }
  19. export function getLemListsToParseTagNames() {
  20. const neLemListsConfig = AppConfig.evtSettings.edition.lemmatizedEntitiesLists || {};
  21. return Object.keys(neLemListsConfig)
  22. .map((i) => neLemListsConfig[i].enabled ? lemmatizedEntitiesListsTagNamesMap[i] : undefined)
  23. .filter(ne => !!ne);
  24. }
  25. @xmlParser('evt-lemmatized-entities-list-parser', LemmatizedEntitiesListParser)
  26. export class LemmatizedEntitiesListParser extends EmptyParser implements Parser<XMLElement> {
  27. private neLemListsConfig = AppConfig.evtSettings.edition.lemmatizedEntitiesLists || {};
  28. attributeParser = createParser(AttributeParser, this.genericParse);
  29. parse(xml: XMLElement): LemmatizedEntitiesList {
  30. const parsedLemList: LemmatizedEntitiesList = {
  31. type: LemmatizedEntitiesList,
  32. id: xml.getAttribute('xml:id') || xpath(xml),
  33. label: '',
  34. lemmatizedEntityType: getLemListType(xml.tagName),
  35. content: [],
  36. sublists: [],
  37. originalEncoding: xml,
  38. relations: [],
  39. description: [],
  40. attributes: this.attributeParser.parse(xml),
  41. };
  42. const relationParse = createParser(RelationParser, this.genericParse);
  43. xml.childNodes.forEach((child: XMLElement) => {
  44. if (child.nodeType === 1) {
  45. switch (child.tagName.toLowerCase()) {
  46. case 'head':
  47. parsedLemList.label = replaceNewLines(child.textContent);
  48. break;
  49. case 'desc':
  50. parsedLemList.description.push(this.genericParse(child));
  51. break;
  52. case 'relation':
  53. if (this.neLemListsConfig.relations.enabled) {
  54. parsedLemList.relations.push(relationParse.parse(child));
  55. }
  56. break;
  57. case 'listrelation':
  58. if (this.neLemListsConfig.relations.enabled) {
  59. child.querySelectorAll<XMLElement>('relation').forEach(r => parsedLemList.relations.push(relationParse.parse(r)));
  60. }
  61. break;
  62. default:
  63. if (getLemListsToParseTagNames().indexOf(child.tagName) >= 0) {
  64. const subListParser = ParserRegister.get('evt-lemmatized-entities-list-parser');
  65. const parsedSubList = subListParser.parse(child) as LemmatizedEntitiesList;
  66. parsedLemList.sublists.push(parsedSubList);
  67. parsedLemList.content = parsedLemList.content.concat(parsedSubList.content);
  68. parsedLemList.relations = parsedLemList.relations.concat(parsedSubList.relations);
  69. } else {
  70. parsedLemList.content.push(this.genericParse(child) as LemmatizedEntity);
  71. }
  72. }
  73. }
  74. });
  75. parsedLemList.label = parsedLemList.label || xml.getAttribute('type') || `List of ${parsedLemList.lemmatizedEntityType}`;
  76. return parsedLemList;
  77. }
  78. }
  79. @xmlParser('evt-lemmatized-entity-parser', LemmatizedEntityRefParser)
  80. export class LemmatizedEntityRefParser extends EmptyParser implements Parser<XMLElement> {
  81. elementParser = createParser(GenericElemParser, this.genericParse);
  82. attributeParser = createParser(AttributeParser, this.genericParse);
  83. parse(xml: XMLElement): LemmatizedEntityRef | GenericElement {
  84. const ref = xml.getAttribute('ref');
  85. if (!ref) { return this.elementParser.parse(xml); }
  86. const neLemTypeMap: { [key: string]: LemmatizedEntityType } = {
  87. w: 'w',
  88. lemmas: 'w',
  89. lem: 'lem',
  90. item: 'item'
  91. };
  92. return {
  93. type: LemmatizedEntityRef,
  94. entityLemId: getLemEntityID(ref),
  95. entityLemType: neLemTypeMap[xml.tagName],
  96. path: xpath(xml),
  97. content: parseChildren(xml, this.genericParse),
  98. attributes: this.attributeParser.parse(xml),
  99. class: xml.tagName.toLowerCase(),
  100. };
  101. }
  102. }
  103. // Generic entity parser
  104. export class EntityParser extends EmptyParser implements Parser<XMLElement> {
  105. // TODO: try to refactor subclasses to use a function parameter to get labels
  106. attributeParsers = createParser(AttributeMapParser, this.genericParse);
  107. parse(xml: XMLElement): LemmatizedEntity {
  108. const elId = xml.getAttribute('xml:id') || xpath(xml);
  109. const label = replaceNewLines(xml.textContent) || 'No info';
  110. const entity: LemmatizedEntity = {
  111. type: LemmatizedEntity,
  112. id: elId,
  113. sortKey: xml.getAttribute('sortKey') || (label ? label[0] : '') || xml.getAttribute('xml:id') || xpath(xml),
  114. originalEncoding: xml,
  115. label,
  116. lemmatizedEntityType: this.getEntityType(xml.tagName),
  117. content: Array.from(xml.children).map((subchild: XMLElement) => this.parseEntityInfo(subchild)),
  118. attributes: this.attributeParsers.parse(xml),
  119. };
  120. return entity;
  121. }
  122. private parseEntityInfo(xml: XMLElement): LemmatizedEntityInfo {
  123. return {
  124. type: LemmatizedEntityInfo,
  125. label: xml.nodeType === 1 ? xml.tagName.toLowerCase() : 'info',
  126. content: [this.genericParse(xml)],
  127. attributes: xml.nodeType === 1 ? this.attributeParsers.parse(xml) : {},
  128. };
  129. }
  130. private getEntityType(tagName): LemmatizedEntityType { return tagName.toLowerCase(); }
  131. }
  132. @xmlParser('item', ItemParser)
  133. export class ItemParser extends EntityParser {
  134. parse(xml: XMLElement): LemmatizedEntity {
  135. return {
  136. ...super.parse(xml),
  137. label: this.getLabel(xml),
  138. };
  139. }
  140. private getLabel(xml: XMLElement) { // TODO: refactor me, also try to use a function parameter for the label for each entity
  141. const itemElement = xml.querySelector<XMLElement>('item');
  142. const wElement = xml.querySelector<XMLElement>('w');
  143. const lemElement = xml.querySelector<XMLElement>('lem');
  144. let label: LemmatizedEntityLabel;
  145. if (itemElement) {
  146. label = replaceNewLines(itemElement.textContent);
  147. } else if (wElement) {
  148. label = wElement ? `${replaceNewLines(wElement.textContent)} ` : '';
  149. } else if (lemElement) {
  150. label = lemElement ? `${replaceNewLines(lemElement.textContent)} ` : '';
  151. }
  152. return label;
  153. }
  154. }
  155. export class EventParser extends EntityParser {
  156. parse(xml: XMLElement): LemmatizedEntity {
  157. return {
  158. ...super.parse(xml),
  159. label: textLabel('label', xml),
  160. };
  161. }
  162. getLabel(xml: XMLElement) {
  163. const eventLabelElement = xml.querySelector<XMLElement>('label');
  164. return (eventLabelElement ? replaceNewLines(eventLabelElement.textContent) : '') || 'No info';
  165. }
  166. }
  167. // @xmlParser('interpGrp', InterpGroupParser)
  168. // export class InterpGroupParser) extends EntityParser {
  169. // parse(xml: XMLElement): LemmatizedEntity { return { ...super.parse(xml), label: this.getLabel(xml) }; }
  170. // private getLabel(xml: XMLElement) { // TODO: refactor me
  171. // const role = xml.getAttribute('xml:id');
  172. // let label: LemmatizedEntityLabel = 'No info';
  173. // if (role) {
  174. // label = role.trim();
  175. // } else {
  176. // label = replaceNewLines(xml.textContent) || 'No info';
  177. // }
  178. // return label;
  179. // }
  180. // }
  181. export class EntityInfoParser extends EmptyParser implements Parser<XMLElement> {
  182. attributeParsers = createParser(AttributeParser, this.genericParse);
  183. parse(xml: XMLElement): LemmatizedEntityInfo {
  184. return {
  185. type: LemmatizedEntityInfo,
  186. label: xml.nodeType === 1 ? xml.tagName.toLowerCase() : 'info',
  187. content: [this.genericParse(xml)],
  188. attributes: xml.nodeType === 1 ? this.attributeParsers.parse(xml) : {},
  189. };
  190. }
  191. }
  192. export class RelationParser extends EmptyParser implements Parser<XMLElement> {
  193. attributeParsers = createParser(AttributeParser, this.genericParse);
  194. entityInfoParser = createParser(EntityInfoParser, this.genericParse);
  195. textParser = createParser(TextParser, this.genericParse);
  196. parse(xml: XMLElement): Relation {
  197. const descriptionEls = xml.querySelectorAll<XMLElement>('desc');
  198. const attributes = this.attributeParsers.parse(xml);
  199. const { name, type } = attributes;
  200. const active = xml.getAttribute('active') || ''; // TODO: make get attributes return '' as default?
  201. const mutual = xml.getAttribute('mutual') || '';
  202. const passive = xml.getAttribute('passive') || '';
  203. const relation: Relation = {
  204. type: Relation,
  205. name,
  206. activeParts: active.replace(/#/g, '').split(' '), // TODO refactor to a single function
  207. mutualParts: mutual.replace(/#/g, '').split(' '),
  208. passiveParts: passive.replace(/#/g, '').split(' '),
  209. relationType: type,
  210. attributes,
  211. content: Array.from(xml.children).map((subchild: XMLElement) => this.entityInfoParser.parse(subchild)),
  212. description: [],
  213. };
  214. if (descriptionEls && descriptionEls.length > 0) {
  215. descriptionEls.forEach((el) => relation.description.push(this.genericParse(el)));
  216. } else {
  217. relation.description = [this.textParser.parse(xml)];
  218. }
  219. const parentListEl = xml.parentElement.tagName === 'listRelation' ? xml.parentElement : undefined;
  220. if (parentListEl) {
  221. relation.relationType = `${(parentListEl.getAttribute('type') || '')} ${(relation.relationType || '')}`.trim();
  222. }
  223. return relation;
  224. }
  225. }
  226. function getLemEntityID(ref: string) { return ref ? ref.replace(/#/g, '') : ''; }
  227. function textLabel(elemName: string, xml: XMLElement) {
  228. const el = xml.querySelector<XMLElement>(elemName);
  229. return (el ? replaceNewLines(el.textContent) : '') || 'No info';
  230. }