Source code for src.xmlChecks.daptmRepresentsCheck

# SPDX-FileCopyrightText: Copyright © 2026 BBC
#
# SPDX-License-Identifier: BSD-3-Clause

from src.validationLogging.validationCodes import ValidationCode
from src.validationLogging.validationLogger import ValidationLogger
from xml.etree.ElementTree import Element
from src.xmlUtils import make_qname
from .daptUtils import isScriptEvent, isText, ns_daptm
from .ttmlUtils import ns_ttml
from .xmlCheck import XmlCheck
from src.registries.contentDescriptorRegistry import \
    content_descriptor_registry_entries, \
    content_descriptor_user_defined_value_prefix


def _tokenise_content_descriptor(descriptor: str) -> list[str]:
    return descriptor.split('.')


def _is_content_descriptor_subtype(subtype: str, parent: str) -> bool:
    tokenised_subtype = _tokenise_content_descriptor(subtype)
    tokenised_parent = _tokenise_content_descriptor(parent)
    return tokenised_subtype[0:len(tokenised_parent)] == tokenised_parent


tokenised_content_descriptor_registry_entries = [
    _tokenise_content_descriptor(cdv)
    for cdv in content_descriptor_registry_entries
]



[docs]
class daptmRepresentsCheck(XmlCheck):
    """
    Checks values of dapt:scriptRepresents and daptm:represents attributes
    """

    def __init__(self) -> None:
        super().__init__()

    def _is_valid_content_descriptor(self,
                                     descriptor: str) -> bool:
        valid = True

        # Check if the content descriptor is valid
        descriptor_tokens = _tokenise_content_descriptor(
            descriptor=descriptor)

        # check everything up to and excluding the first token
        # beginning with the user defined value prefix
        non_user_defined_tokens = []
        user_defined_token_found = False
        for token in descriptor_tokens:
            if token.startswith(content_descriptor_user_defined_value_prefix):
                user_defined_token_found = True
                break
            else:
                non_user_defined_tokens.append(token)

        if len(non_user_defined_tokens) > 0 \
           and non_user_defined_tokens \
           not in tokenised_content_descriptor_registry_entries:
            valid = False

        if len(descriptor_tokens) == 0 or \
           (len(non_user_defined_tokens) == 0
           and not user_defined_token_found):
            valid = False

        return valid


[docs]
    def run(
            self,
            input: Element,
            context: dict,
            validation_results: ValidationLogger) -> bool:
        tt_ns = \
            context.get('root_ns', ns_ttml)
        scriptRepresents_attr_tag = make_qname(ns_daptm, 'scriptRepresents')
        represents_attr_tag = make_qname(ns_daptm, 'represents')
        permitted_represents_el_tags = [
            make_qname(namespace=tt_ns, name=el_name)
            for el_name in ['tt', 'body', 'div', 'p', 'span']
            ]
        required_computed_represents_el_tags = [
            make_qname(namespace=tt_ns, name=el_name)
            for el_name in ['div', 'p', 'span']
        ]
        valid = True

        # Get tt/daptm:scriptRepresents value which MUST be present
        scriptRepresents_val = input.get(scriptRepresents_attr_tag)
        scriptRepresents_vals = []
        if scriptRepresents_val is None:
            valid = False
            validation_results.error(
                location='{} element'.format(input.tag),
                message='Required daptm:scriptRepresents attribute is missing',
                code=ValidationCode.dapt_metadata_scriptRepresents
            )
        else:
            # Split on white space, check each value is valid, store
            scriptRepresents_string_vals = scriptRepresents_val.split()
            for val in scriptRepresents_string_vals:
                if self._is_valid_content_descriptor(descriptor=val):
                    # store it
                    scriptRepresents_vals.append(val)
                else:
                    valid = False
                    validation_results.error(
                        location='{} element daptm:scriptRepresents attribute'
                                 .format(input.tag),
                        message='Value {} is not a valid content descriptor'
                                .format(val),
                        code=ValidationCode.dapt_metadata_scriptRepresents
                    )

        # Get elements with represents attribute
        # For each one:
        #   check it is present on an element where it's allowed
        #   check it is a valid value
        #   check it is a sub-type of a value in scriptRepresents

        els = input.findall(
            './/{}[@{}]'.format('*', represents_attr_tag)
        )

        for el in els:
            if el.tag not in permitted_represents_el_tags:
                valid = False
                validation_results.error(
                    location='{} element'.format(el.tag),
                    message='daptm:represents attribute not permitted '
                            'on this element',
                    code=ValidationCode.dapt_metadata_represents
                )

            represents_val = el.get(represents_attr_tag, '')
            if not self._is_valid_content_descriptor(represents_val):
                valid = False
                validation_results.error(
                    location='{} element daptm:represents attribute'
                             .format(el.tag),
                    message='Invalid content descriptor "{}"'
                            .format(represents_val),
                    code=ValidationCode.dapt_metadata_content_descriptor
                )

            is_subtype_of_scriptRepresents = False
            for parent in scriptRepresents_vals:
                is_subtype_of_scriptRepresents |= \
                    _is_content_descriptor_subtype(
                        subtype=represents_val,
                        parent=parent)
                if is_subtype_of_scriptRepresents:
                    break

            if not is_subtype_of_scriptRepresents:
                valid = False
                validation_results.error(
                    location='{} element daptm:represents attribute'
                             .format(el.tag),
                    message='Content descriptor "{}" is not a subtype '
                            'of scriptRepresents values {}'
                            .format(represents_val, scriptRepresents_vals),
                    code=ValidationCode.dapt_metadata_represents
                )

        # Iterate through the tree to derive the computed represents.
        # For each element that requires a valid computed represents attribute:
        # check the computed represents attribute is valid - this will
        # catch empty computed represents attributes on the relevant
        # elements
        valid &= self.recursively_compute_child_represents(
            input=input,
            parent_computed_represents='',
            represents_attr_tag=represents_attr_tag,
            permitted_represents_el_tags=permitted_represents_el_tags,
            required_computed_represents_el_tags=required_computed_represents_el_tags,
            validation_results=validation_results
        )

        return valid



[docs]
    def recursively_compute_child_represents(
            self,
            input: Element,
            parent_computed_represents: str,
            represents_attr_tag: str,
            permitted_represents_el_tags: list[str],
            required_computed_represents_el_tags: list[str],
            validation_results: ValidationLogger,
            ) -> bool:
        valid = True

        this_computed_represents = input.get(represents_attr_tag, '') \
            if represents_attr_tag in input.keys() \
            else parent_computed_represents

        if (isScriptEvent(el=input) or isText(el=input)) \
           and not self._is_valid_content_descriptor(this_computed_represents):
            valid = False
            validation_results.error(
                location='{} element daptm:represents attribute'
                         .format(input.tag),
                message='Computed value "{}" is not valid'
                        .format(this_computed_represents),
                code=ValidationCode.dapt_metadata_represents
            )

        children = [el for el in input
                    if el.tag in permitted_represents_el_tags]
        for child in children:
            valid &= self.recursively_compute_child_represents(
                input=child,
                parent_computed_represents=this_computed_represents,
                represents_attr_tag=represents_attr_tag,
                permitted_represents_el_tags=permitted_represents_el_tags,
                required_computed_represents_el_tags=required_computed_represents_el_tags,
                validation_results=validation_results
            )

        return valid