# SPDX-FileCopyrightText: Copyright © 2026 BBC
#
# SPDX-License-Identifier: BSD-3-Clause
from src.validationLogging.validationCodes import ValidationCode
from src.validationLogging.validationLogger import ValidationLogger
from xml.etree.ElementTree import Element
from src.xmlUtils import get_unqualified_name, \
xmlIdAttr, unqualifiedIdAttr, make_qname
from .xmlCheck import XmlCheck
from .ttmlUtils import ns_ttml
[docs]
class requireXmlId(XmlCheck):
"""
Checks that element has an xml:id attribute
"""
[docs]
def run(
self,
input: Element,
context: dict,
validation_results: ValidationLogger) -> bool:
valid = True
if xmlIdAttr not in input.keys():
valid = False
validation_results.error(
location='{} xml:id omitted'.format(
get_unqualified_name(input.tag)),
message='Element missing required xml:id attribute',
code=ValidationCode.ebuttd_p_xml_id_constraint
)
return valid
[docs]
class unqualifiedIdAttributeCheck(XmlCheck):
[docs]
def run(
self,
input: Element,
context: dict,
validation_results: ValidationLogger) -> bool:
elements_with_xml_id = \
set(input.findall('.//*[@{}]'.format(xmlIdAttr)))
elements_with_unq_id = \
set(input.findall('.//*[@{}]'.format(unqualifiedIdAttr)))
num_elements_with_unq_id = len(elements_with_unq_id)
num_elements_with_unq_id_and_xml_id = \
len(elements_with_unq_id.intersection(elements_with_xml_id))
num_elements_with_unq_id_and_no_xml_id = \
num_elements_with_unq_id - num_elements_with_unq_id_and_xml_id
if num_elements_with_unq_id_and_no_xml_id > 0 \
or num_elements_with_unq_id > 0:
validation_results.warn(
location='Parsed document',
message='{} elements have unqualified id attributes, '
'of which {} have no xml:id attribute. '
'Check if they should have xml:id attributes!'
.format(
num_elements_with_unq_id,
num_elements_with_unq_id_and_no_xml_id
),
code=ValidationCode.xml_id_unqualified
)
# Never fail on this
return True
[docs]
class duplicateXmlIdCheck(XmlCheck):
@classmethod
def _gatherXmlId(cls, e: Element, m: dict[str, list]):
xmlId = e.get(xmlIdAttr)
if xmlId:
elist = m.get(xmlId, [])
elist.append(e)
m[xmlId] = elist
[docs]
def run(
self,
input: Element,
context: dict,
validation_results: ValidationLogger) -> bool:
xmlIdToElementMap = {}
for e in input.iter():
duplicateXmlIdCheck._gatherXmlId(e=e, m=xmlIdToElementMap)
valid = True
for (xmlId, elist) in xmlIdToElementMap.items():
if len(elist) > 1:
valid = False
validation_results.error(
location=', '.join(e.tag for e in elist),
message='Duplicate xml:id found with value ' + xmlId,
code=ValidationCode.xml_id_unique
)
if valid:
validation_results.good(
location='Parsed document',
message='xml:id values are unique',
code=ValidationCode.xml_id_unique
)
context['xmlId_to_element_map'] = xmlIdToElementMap
return valid
# Namespace prefixes will be mapped to document namespace later
IDREF_attr_to_applicable_elements = {
'agent': ['ttm:agent'],
'region': ['tt:region'], # region must have a layout element ancestor
}
# Namespace prefixes will be mapped to document namespace later
IDREFS_attr_to_applicable_elements = {
'style': [
'tt:style' # style must have a styling element ancestor
],
'animate': [
'tt:animate', # must have an animation element ancestor
'tt:set' # must have an animation element ancestor
],
'ttm:agent': [
'ttm:agent' # ttm:agent attribute != agent attribute!
],
}
[docs]
def qualify(tag: str, tt_ns: str) -> str:
if ':' not in tag:
return tag
colon_pos = tag.index(':')
match tag[:colon_pos]:
case 'tt':
return make_qname(tt_ns, tag[colon_pos+1:])
case 'ttm':
return make_qname(tt_ns + '#metadata', tag[colon_pos+1:])
return tag
[docs]
class IDREFSelementApplicabilityCheck(XmlCheck):
"""
Checks that IDREFS attributes dereference to an appropriate element.
"""
[docs]
def run(
self,
input: Element,
context: dict,
validation_results: ValidationLogger) -> bool:
valid = True
xmlIdToElementMap = context.get('xmlId_to_element_map')
if xmlIdToElementMap is None:
validation_results.skip(
location=input.tag,
message='Skipping IDREFS element applicability checks',
code=ValidationCode.ttml_idref_element_applicability
)
else:
# Qualify the attribute and element names
tt_ns = \
context.get('root_ns', ns_ttml)
idref_map = qualifyTags(
attr_to_ell_map=IDREF_attr_to_applicable_elements,
tt_ns=tt_ns)
idrefs_map = qualifyTags(
attr_to_ell_map=IDREFS_attr_to_applicable_elements,
tt_ns=tt_ns)
all_idref_attrs = \
set(idref_map.keys()).union(set(idrefs_map.keys()))
# Iterate through element's descendants
for el in input.iter():
# For each element, check the attributes
idrefs_attrs = sorted(set(el.keys()).intersection(all_idref_attrs))
for attr in idrefs_attrs:
el_list = \
idref_map[attr] if attr in idref_map \
else idrefs_map[attr]
idrefs = el.get(attr, '').split()
if len(idrefs) == 0:
validation_results.error(
location='{} element {} attribute'
.format(el.tag, attr),
message='Attribute must reference an element',
code=ValidationCode.ttml_idref_empty
)
valid = False
elif len(idrefs) > 1 and attr in idref_map:
validation_results.error(
location='{} element {} attribute'
.format(el.tag, attr),
message='Attribute has {} references, 1 permitted'
.format(len(idrefs)),
code=ValidationCode.ttml_idref_too_many
)
valid = False
for idref in idrefs:
ref_el_list = xmlIdToElementMap.get(idref)
ref_el = ref_el_list[0] \
if ref_el_list is not None and len(ref_el_list) == 1 \
else None
if ref_el is None or ref_el.tag not in el_list:
# Possible TODO: include element referenced,
# and acceptable list
validation_results.error(
location='{} element {} attribute reference {}'
.format(el.tag, attr, idref),
message='Attribute references {} element, '
'not in the list of acceptable elements'
.format(ref_el.tag if ref_el is not None else 'no'),
code=ValidationCode
.ttml_idref_element_applicability
)
valid = False
return valid