Source code for src.xmlChecks.daptmRepresentsCheck

# SPDX-FileCopyrightText: Copyright © 2026 BBC
#
# SPDX-License-Identifier: BSD-3-Clause

from src.validationLogging.validationCodes import ValidationCode
from src.validationLogging.validationLogger import ValidationLogger
from xml.etree.ElementTree import Element
from src.xmlUtils import make_qname
from .daptUtils import isScriptEvent, isText, ns_daptm
from .ttmlUtils import ns_ttml
from .xmlCheck import XmlCheck
from src.registries.contentDescriptorRegistry import \
    content_descriptor_registry_entries, \
    content_descriptor_user_defined_value_prefix


def _tokenise_content_descriptor(descriptor: str) -> list[str]:
    return descriptor.split('.')


def _is_content_descriptor_subtype(subtype: str, parent: str) -> bool:
    tokenised_subtype = _tokenise_content_descriptor(subtype)
    tokenised_parent = _tokenise_content_descriptor(parent)
    return tokenised_subtype[0:len(tokenised_parent)] == tokenised_parent


tokenised_content_descriptor_registry_entries = [
    _tokenise_content_descriptor(cdv)
    for cdv in content_descriptor_registry_entries
]


[docs] class daptmRepresentsCheck(XmlCheck): """ Checks values of dapt:scriptRepresents and daptm:represents attributes """ def __init__(self) -> None: super().__init__() def _is_valid_content_descriptor(self, descriptor: str) -> bool: valid = True # Check if the content descriptor is valid descriptor_tokens = _tokenise_content_descriptor( descriptor=descriptor) # check everything up to and excluding the first token # beginning with the user defined value prefix non_user_defined_tokens = [] user_defined_token_found = False for token in descriptor_tokens: if token.startswith(content_descriptor_user_defined_value_prefix): user_defined_token_found = True break else: non_user_defined_tokens.append(token) if len(non_user_defined_tokens) > 0 \ and non_user_defined_tokens \ not in tokenised_content_descriptor_registry_entries: valid = False if len(descriptor_tokens) == 0 or \ (len(non_user_defined_tokens) == 0 and not user_defined_token_found): valid = False return valid
[docs] def run( self, input: Element, context: dict, validation_results: ValidationLogger) -> bool: tt_ns = \ context.get('root_ns', ns_ttml) scriptRepresents_attr_tag = make_qname(ns_daptm, 'scriptRepresents') represents_attr_tag = make_qname(ns_daptm, 'represents') permitted_represents_el_tags = [ make_qname(namespace=tt_ns, name=el_name) for el_name in ['tt', 'body', 'div', 'p', 'span'] ] required_computed_represents_el_tags = [ make_qname(namespace=tt_ns, name=el_name) for el_name in ['div', 'p', 'span'] ] valid = True # Get tt/daptm:scriptRepresents value which MUST be present scriptRepresents_val = input.get(scriptRepresents_attr_tag) scriptRepresents_vals = [] if scriptRepresents_val is None: valid = False validation_results.error( location='{} element'.format(input.tag), message='Required daptm:scriptRepresents attribute is missing', code=ValidationCode.dapt_metadata_scriptRepresents ) else: # Split on white space, check each value is valid, store scriptRepresents_string_vals = scriptRepresents_val.split() for val in scriptRepresents_string_vals: if self._is_valid_content_descriptor(descriptor=val): # store it scriptRepresents_vals.append(val) else: valid = False validation_results.error( location='{} element daptm:scriptRepresents attribute' .format(input.tag), message='Value {} is not a valid content descriptor' .format(val), code=ValidationCode.dapt_metadata_scriptRepresents ) # Get elements with represents attribute # For each one: # check it is present on an element where it's allowed # check it is a valid value # check it is a sub-type of a value in scriptRepresents els = input.findall( './/{}[@{}]'.format('*', represents_attr_tag) ) for el in els: if el.tag not in permitted_represents_el_tags: valid = False validation_results.error( location='{} element'.format(el.tag), message='daptm:represents attribute not permitted ' 'on this element', code=ValidationCode.dapt_metadata_represents ) represents_val = el.get(represents_attr_tag, '') if not self._is_valid_content_descriptor(represents_val): valid = False validation_results.error( location='{} element daptm:represents attribute' .format(el.tag), message='Invalid content descriptor "{}"' .format(represents_val), code=ValidationCode.dapt_metadata_content_descriptor ) is_subtype_of_scriptRepresents = False for parent in scriptRepresents_vals: is_subtype_of_scriptRepresents |= \ _is_content_descriptor_subtype( subtype=represents_val, parent=parent) if is_subtype_of_scriptRepresents: break if not is_subtype_of_scriptRepresents: valid = False validation_results.error( location='{} element daptm:represents attribute' .format(el.tag), message='Content descriptor "{}" is not a subtype ' 'of scriptRepresents values {}' .format(represents_val, scriptRepresents_vals), code=ValidationCode.dapt_metadata_represents ) # Iterate through the tree to derive the computed represents. # For each element that requires a valid computed represents attribute: # check the computed represents attribute is valid - this will # catch empty computed represents attributes on the relevant # elements valid &= self.recursively_compute_child_represents( input=input, parent_computed_represents='', represents_attr_tag=represents_attr_tag, permitted_represents_el_tags=permitted_represents_el_tags, required_computed_represents_el_tags=required_computed_represents_el_tags, validation_results=validation_results ) return valid
[docs] def recursively_compute_child_represents( self, input: Element, parent_computed_represents: str, represents_attr_tag: str, permitted_represents_el_tags: list[str], required_computed_represents_el_tags: list[str], validation_results: ValidationLogger, ) -> bool: valid = True this_computed_represents = input.get(represents_attr_tag, '') \ if represents_attr_tag in input.keys() \ else parent_computed_represents if (isScriptEvent(el=input) or isText(el=input)) \ and not self._is_valid_content_descriptor(this_computed_represents): valid = False validation_results.error( location='{} element daptm:represents attribute' .format(input.tag), message='Computed value "{}" is not valid' .format(this_computed_represents), code=ValidationCode.dapt_metadata_represents ) children = [el for el in input if el.tag in permitted_represents_el_tags] for child in children: valid &= self.recursively_compute_child_represents( input=child, parent_computed_represents=this_computed_represents, represents_attr_tag=represents_attr_tag, permitted_represents_el_tags=permitted_represents_el_tags, required_computed_represents_el_tags=required_computed_represents_el_tags, validation_results=validation_results ) return valid