Source code for src.xmlChecks.xmlIdCheck

# SPDX-FileCopyrightText: Copyright © 2026 BBC
#
# SPDX-License-Identifier: BSD-3-Clause

from src.validationLogging.validationCodes import ValidationCode
from src.validationLogging.validationLogger import ValidationLogger
from xml.etree.ElementTree import Element
from src.xmlUtils import get_unqualified_name, \
    xmlIdAttr, unqualifiedIdAttr, make_qname
from .xmlCheck import XmlCheck
from .ttmlUtils import ns_ttml


[docs] class requireXmlId(XmlCheck): """ Checks that element has an xml:id attribute """
[docs] def run( self, input: Element, context: dict, validation_results: ValidationLogger) -> bool: valid = True if xmlIdAttr not in input.keys(): valid = False validation_results.error( location='{} xml:id omitted'.format( get_unqualified_name(input.tag)), message='Element missing required xml:id attribute', code=ValidationCode.ebuttd_p_xml_id_constraint ) return valid
[docs] class unqualifiedIdAttributeCheck(XmlCheck):
[docs] def run( self, input: Element, context: dict, validation_results: ValidationLogger) -> bool: elements_with_xml_id = \ set(input.findall('.//*[@{}]'.format(xmlIdAttr))) elements_with_unq_id = \ set(input.findall('.//*[@{}]'.format(unqualifiedIdAttr))) num_elements_with_unq_id = len(elements_with_unq_id) num_elements_with_unq_id_and_xml_id = \ len(elements_with_unq_id.intersection(elements_with_xml_id)) num_elements_with_unq_id_and_no_xml_id = \ num_elements_with_unq_id - num_elements_with_unq_id_and_xml_id if num_elements_with_unq_id_and_no_xml_id > 0 \ or num_elements_with_unq_id > 0: validation_results.warn( location='Parsed document', message='{} elements have unqualified id attributes, ' 'of which {} have no xml:id attribute. ' 'Check if they should have xml:id attributes!' .format( num_elements_with_unq_id, num_elements_with_unq_id_and_no_xml_id ), code=ValidationCode.xml_id_unqualified ) # Never fail on this return True
[docs] class duplicateXmlIdCheck(XmlCheck): @classmethod def _gatherXmlId(cls, e: Element, m: dict[str, list]): xmlId = e.get(xmlIdAttr) if xmlId: elist = m.get(xmlId, []) elist.append(e) m[xmlId] = elist
[docs] def run( self, input: Element, context: dict, validation_results: ValidationLogger) -> bool: xmlIdToElementMap = {} for e in input.iter(): duplicateXmlIdCheck._gatherXmlId(e=e, m=xmlIdToElementMap) valid = True for (xmlId, elist) in xmlIdToElementMap.items(): if len(elist) > 1: valid = False validation_results.error( location=', '.join(e.tag for e in elist), message='Duplicate xml:id found with value ' + xmlId, code=ValidationCode.xml_id_unique ) if valid: validation_results.good( location='Parsed document', message='xml:id values are unique', code=ValidationCode.xml_id_unique ) context['xmlId_to_element_map'] = xmlIdToElementMap return valid
# Namespace prefixes will be mapped to document namespace later IDREF_attr_to_applicable_elements = { 'agent': ['ttm:agent'], 'region': ['tt:region'], # region must have a layout element ancestor } # Namespace prefixes will be mapped to document namespace later IDREFS_attr_to_applicable_elements = { 'style': [ 'tt:style' # style must have a styling element ancestor ], 'animate': [ 'tt:animate', # must have an animation element ancestor 'tt:set' # must have an animation element ancestor ], 'ttm:agent': [ 'ttm:agent' # ttm:agent attribute != agent attribute! ], }
[docs] def qualify(tag: str, tt_ns: str) -> str: if ':' not in tag: return tag colon_pos = tag.index(':') match tag[:colon_pos]: case 'tt': return make_qname(tt_ns, tag[colon_pos+1:]) case 'ttm': return make_qname(tt_ns + '#metadata', tag[colon_pos+1:]) return tag
[docs] def qualifyTags(attr_to_ell_map: dict[str, list[str]], tt_ns: str) -> dict[str, list[str]]: return { qualify(tag=k, tt_ns=tt_ns): [qualify(tag=vi, tt_ns=tt_ns) for vi in v] for k, v in attr_to_ell_map.items() }
[docs] class IDREFSelementApplicabilityCheck(XmlCheck): """ Checks that IDREFS attributes dereference to an appropriate element. """
[docs] def run( self, input: Element, context: dict, validation_results: ValidationLogger) -> bool: valid = True xmlIdToElementMap = context.get('xmlId_to_element_map') if xmlIdToElementMap is None: validation_results.skip( location=input.tag, message='Skipping IDREFS element applicability checks', code=ValidationCode.ttml_idref_element_applicability ) else: # Qualify the attribute and element names tt_ns = \ context.get('root_ns', ns_ttml) idref_map = qualifyTags( attr_to_ell_map=IDREF_attr_to_applicable_elements, tt_ns=tt_ns) idrefs_map = qualifyTags( attr_to_ell_map=IDREFS_attr_to_applicable_elements, tt_ns=tt_ns) all_idref_attrs = \ set(idref_map.keys()).union(set(idrefs_map.keys())) # Iterate through element's descendants for el in input.iter(): # For each element, check the attributes idrefs_attrs = sorted(set(el.keys()).intersection(all_idref_attrs)) for attr in idrefs_attrs: el_list = \ idref_map[attr] if attr in idref_map \ else idrefs_map[attr] idrefs = el.get(attr, '').split() if len(idrefs) == 0: validation_results.error( location='{} element {} attribute' .format(el.tag, attr), message='Attribute must reference an element', code=ValidationCode.ttml_idref_empty ) valid = False elif len(idrefs) > 1 and attr in idref_map: validation_results.error( location='{} element {} attribute' .format(el.tag, attr), message='Attribute has {} references, 1 permitted' .format(len(idrefs)), code=ValidationCode.ttml_idref_too_many ) valid = False for idref in idrefs: ref_el_list = xmlIdToElementMap.get(idref) ref_el = ref_el_list[0] \ if ref_el_list is not None and len(ref_el_list) == 1 \ else None if ref_el is None or ref_el.tag not in el_list: # Possible TODO: include element referenced, # and acceptable list validation_results.error( location='{} element {} attribute reference {}' .format(el.tag, attr, idref), message='Attribute references {} element, ' 'not in the list of acceptable elements' .format(ref_el.tag if ref_el is not None else 'no'), code=ValidationCode .ttml_idref_element_applicability ) valid = False return valid