# SPDX-FileCopyrightText: Copyright © 2026 BBC
#
# SPDX-License-Identifier: BSD-3-Clause
from math import floor
from src.validationLogging.validationCodes import ValidationCode
from src.validationLogging.validationLogger import ValidationLogger
from xml.etree.ElementTree import Element
from src.xmlUtils import get_unqualified_name, make_qname, \
xmlIdAttr
from .xmlCheck import XmlCheck
from .ttmlUtils import ns_ttml
from src.timeExpression import TimeExpressionHandler
from src.styleAttribs import two_percent_vals_regex
from operator import itemgetter
import traceback
timing_attr_keys = [
'begin',
'end',
'dur'
]
[docs]
class bbcTimingCheck(XmlCheck):
"""
Checks timings in document
Things we need to check:
* Time expressions are well formed in begin, end, dur attributes:
* clock-time hh:mm:ss or hh:mm:ss.sss
* If timeContainer is present, its value is "par"
* error if a different value
* warn if present and "par"
Extra things to check:
* What's the earliest begin and latest end time, for info
* In case this is in a segment, do the times overlap the segment interval?
* Are there enough distinct subtitles in the first 23 minutes?
* Are the gaps between subtitles long enough or zero?
"""
_min_short_gap = 0.8
_desired_min_gap = 1.5
_min_count_early_begins = 2
_early_begin_threshold = 23 * 60 # First 23 minutes
def __init__(self,
epoch: float = 0.0,
segment_dur: float | None = None,
segment_relative_timing: bool = False):
super().__init__()
self._epoch = epoch
self._segment_dur = segment_dur
self._segment_relative_timing = segment_relative_timing
def _collect_timed_elements(
self,
te: TimeExpressionHandler,
el: Element,
epoch_s: float,
parent_end: float | None,
begin_defined: bool,
end_defined: bool,
time_el_map: dict[float, list[tuple[Element, float]]],
validation_results: ValidationLogger,
# depth: int = 0
) -> tuple[bool, float, float]:
# prefix = ' ' * depth
# print('{}_collect_timed_elements for {}, epoch: {}s, begin_defined: {}'.
# format(prefix, el.tag, epoch_s, begin_defined))
valid = True
for timing_attr in timing_attr_keys:
if timing_attr in el.keys():
if not te.isNonFrameClockTime(el.get(timing_attr, '')):
valid = False
validation_results.error(
location='{} element xml:id {}'.format(
el.tag,
el.get(xmlIdAttr, 'omitted')),
message='{}={} is not a valid non-frame clock time'
.format(
timing_attr,
el.get(timing_attr)),
code=ValidationCode.ebuttd_timing_attribute_constraint
)
this_begin = te.seconds(el.get('begin', '')) \
if 'begin' in el.keys() \
else 0
if 'begin' in el.keys():
begin_defined = True
# print('{}begin is defined by this element'.format(prefix))
this_epoch_s = epoch_s + this_begin
this_end = epoch_s + te.seconds(el.get('end', '')) \
if 'end' in el.keys() \
else parent_end
if 'end' in el.keys():
end_defined = True
if parent_end is not None and this_end is not None:
this_end = min(parent_end, this_end)
# Note: dur attribute prohibited in EBU-TT-D
if 'dur' in el.keys():
valid = False
validation_results.error(
location='{} element xml:id {}'.format(
el.tag,
el.get(xmlIdAttr, 'omitted')),
message='dur attribute present, '
'not permitted in EBU-TT-D - '
'ignoring in time computations.',
code=ValidationCode.ebuttd_timing_attribute_constraint
)
child_begins = []
child_ends = []
for child_el in el:
# br and metadata elements cannot have begin attributes
if get_unqualified_name(child_el.tag) in ['div', 'p', 'span']:
(child_valid, child_begin, child_end) = \
self._collect_timed_elements(
te=te,
el=child_el,
epoch_s=this_epoch_s,
parent_end=this_end,
begin_defined=begin_defined,
end_defined=end_defined,
time_el_map=time_el_map,
validation_results=validation_results,
# depth=depth + 1
)
valid &= child_valid
child_begins.append(child_begin)
child_ends.append(child_end)
child_begins.sort()
if not begin_defined and len(child_begins) > 0:
# print(prefix+'setting epoch for {} to {}'.format(el.tag, child_begins[0]))
this_epoch_s = child_begins[0]
# elif begin_defined:
# print(prefix+'for {}, begin is defined, not setting epoch'.format(el.tag))
# else:
# print(prefix+'for {}, begin not defined but no child begins'.format(el.tag))
if not end_defined and len(child_ends) > 0:
this_end = child_ends[-1]
el_list = time_el_map.get(this_epoch_s, [])
el_list.append((el, this_end))
time_el_map[this_epoch_s] = el_list
return (valid, this_epoch_s, this_end)
def _makeTimeExpressionHandler(
self,
tt: Element,
tt_ns: str,
) -> TimeExpressionHandler:
ttp_ns = tt_ns + '#parameter'
preferredFrameRateKey = make_qname(ttp_ns, 'frameRate')
frameRateKey = preferredFrameRateKey \
if preferredFrameRateKey in tt.keys() \
else 'frameRate'
preferredFrameRateMultiplierKey = \
make_qname(ttp_ns, 'frameRateMultiplier')
frameRateMultiplierKey = preferredFrameRateMultiplierKey \
if preferredFrameRateMultiplierKey in tt.keys() \
else 'frameRateMultiplier'
preferredTickRateKey = make_qname(ttp_ns, 'tickRate')
tickRateKey = preferredTickRateKey \
if preferredTickRateKey in tt.keys() \
else 'tickRate'
return TimeExpressionHandler(
framerate=tt.get(frameRateKey),
framerate_multiplier=tt.get(frameRateMultiplierKey),
tickrate=tt.get(tickRateKey)
)
def _checkEnoughSubsAtBeginning(
self,
time_el_map: dict[float, list[tuple[Element, float]]],
validation_results: ValidationLogger,
) -> bool:
valid = True
count_early_begins = 0
early_begin_threshold = self._early_begin_threshold + self._epoch
for begin, el_list in time_el_map.items():
if begin >= early_begin_threshold:
continue
count_early_begins += len(
[el[1] for el in el_list
if get_unqualified_name(el[0].tag) in ['p']])
if count_early_begins < self._min_count_early_begins:
valid = False
hours = floor(early_begin_threshold / 3600)
minutes = floor((early_begin_threshold - hours * 3600) / 60)
seconds = early_begin_threshold % 60
validation_results.error(
location='p elements beginning before {:02}:{:02}:{:06.3f}'
.format(
hours,
minutes,
seconds),
message='{} subtitle(s) found, minimum {} required'
.format(
count_early_begins,
self._min_count_early_begins),
code=ValidationCode.bbc_timing_minimum_subtitles
)
return valid
def _spatiallyOverlap(
self,
css_a: dict[str, str],
css_b: dict[str, str]) -> bool:
# Inner function to extract the key edges
# (left, right, top, bottom)
def get_edges(css: dict[str, str]
) -> tuple[float, float, float, float]:
origin_match = two_percent_vals_regex.match(
css.get('origin', '0% 0%'))
extent_match = two_percent_vals_regex.match(
css.get('extent', '100% 100%'))
if origin_match is None or extent_match is None:
raise Exception(
'Cannot decode either origin {} or extent {} or both'
.format(css.get('origin'), css.get('extent')))
left = float(origin_match.group('x'))
top = float(origin_match.group('y'))
right = left + float(extent_match.group('x'))
bottom = top + float(extent_match.group('y'))
return (left, right, top, bottom)
la, ra, ta, ba = get_edges(css=css_a)
lb, rb, tb, bb = get_edges(css=css_b)
return ra > lb and la < rb and ta < bb and ba > tb
def _getOverlappingRegions(
self,
region_id_to_css_map: dict[str, dict[str, str]],
) -> dict[str, list[str]]:
rv = {}
for region_id_a, css_a in region_id_to_css_map.items():
overlap_region_ids = []
for region_id_b, css_b in region_id_to_css_map.items():
if self._spatiallyOverlap(css_a=css_a, css_b=css_b):
overlap_region_ids.append(region_id_b)
if len(overlap_region_ids) > 0:
rv[region_id_a] = overlap_region_ids
return rv
def _regionsOverlap(
self,
r_id1: str,
r_id2: str,
region_overlaps: dict[str, list[str]]
) -> bool:
if r_id1 == r_id2:
return False
if r_id1 in region_overlaps:
return r_id2 in region_overlaps[r_id1]
if r_id2 in region_overlaps:
return r_id1 in region_overlaps[r_id2]
return False
def _checkForOverlappingRegions(
self,
time_el_map: dict[float, list[tuple[Element, float]]],
el_region_id_map: dict[Element, str],
region_id_to_css_map: dict[str, dict[str, str]],
validation_results: ValidationLogger,
) -> bool:
# Inner function to reduce duplication later
def validateOverlap(
r_id1: str,
r_id2: str,
region_overlaps: dict[str, list[str]],
validation_results: ValidationLogger) -> bool:
if self._regionsOverlap(
r_id1=el_region,
r_id2=oel_region,
region_overlaps=region_overlaps):
validation_results.error(
location='<{}> xml:id={} region={} and '
'<{}> xml:id={} region={}'
.format(
el.tag,
el.get(xmlIdAttr, 'omitted'),
el_region,
oel.tag,
oel.get(xmlIdAttr, 'omitted'),
oel_region
),
message='Elements overlap spatially '
'and temporally',
code=ValidationCode.ebuttd_overlapping_region_constraint
)
return False
return True
valid = True
# Identify any regions that might overlap
region_overlaps = self._getOverlappingRegions(
region_id_to_css_map=region_id_to_css_map)
# Find the subset of p elements that are associated
# with any of those regions
potential_overlap_elements = {
k: v for k, v in el_region_id_map.items() if v in region_overlaps
}
# For each p element, check if any other p elements are
# selected into an overlapping region and overlap temporally
# - if so, that's a validation error
filtered_time_el_map = {
begin: [
(el, end) for el, end in l
if el in potential_overlap_elements
and get_unqualified_name(el.tag) == 'p'
]
for begin, l in time_el_map.items()
}
sorted_begins = sorted(filtered_time_el_map.keys())
# Go through the filtered time element map finding
# all groups of elements that temporally overlap and
# have regions that spatially overlap
num_begins = len(sorted_begins)
for el_begin_index in range(num_begins):
el_end_list = \
filtered_time_el_map.get(sorted_begins[el_begin_index], [])
num_ends = len(el_end_list)
for el_end_index in range(num_ends):
el, end = el_end_list[el_end_index]
el_region = el_region_id_map.get(el)
# First check other elements with same begin,
# which by definition overlap temporally
for oei in range(el_end_index + 1, num_ends):
oel, oend = el_end_list[oei]
oel_region = el_region_id_map.get(oel)
valid &= validateOverlap(
r_id1=el_region,
r_id2=oel_region,
region_overlaps=region_overlaps,
validation_results=validation_results)
# Then check for elements with later begins
for obi in range(el_begin_index + 1, num_begins):
ob = sorted_begins[obi]
if end > ob:
oell = filtered_time_el_map.get(ob)
for oel, oend in oell:
# These temporally overlap
# If el and oel have different regions
# and their regions overlap, we have found
# an invalid condition
# their regions are in el_to_region_map
oel_region = el_region_id_map.get(oel)
valid &= validateOverlap(
r_id1=el_region,
r_id2=oel_region,
region_overlaps=region_overlaps,
validation_results=validation_results)
else:
# Since it's an ordered list, we can skip
# all the later begin times: they also
# won't be earlier than end
break
return valid
def _checkForShortGaps(
self,
time_el_map: dict[float, list[tuple[Element, float]]],
validation_results: ValidationLogger,
) -> bool:
valid = True
begin_end_list = []
for begin, el_list in time_el_map.items():
end_list = [el[1] for el in el_list
if get_unqualified_name(el[0].tag) in ['span', 'p']]
max_end = \
max(end_list) if None not in end_list and len(end_list)>0 \
else None
begin_end_list.append((begin, max_end))
begin_end_list.sort(key=itemgetter(0))
for i in range(0, len(begin_end_list) - 1):
gap_to_next = begin_end_list[i+1][0] - begin_end_list[i][1]
if gap_to_next > 0 and gap_to_next < self._min_short_gap:
valid = False
validation_results.error(
location='Gap from {}s to {}s'.format(
begin_end_list[i][1],
begin_end_list[i+1][0]
),
message='Non-zero gap between subtitles is '
'shorter than {}s'
.format(self._min_short_gap),
code=ValidationCode.bbc_timing_gaps
)
elif gap_to_next >= self._min_short_gap \
and gap_to_next < self._desired_min_gap:
validation_results.warn(
location='Gap from {}s to {}s'.format(
begin_end_list[i][1],
begin_end_list[i+1][0]
),
message='Short gap between subtitles should be '
'at least {}s'
.format(self._desired_min_gap),
code=ValidationCode.bbc_timing_gaps
)
return valid
def _checkSubsOverlapSegment(
self,
doc_begin: float,
doc_end: float,
validation_results: ValidationLogger) -> bool:
valid = True
if self._segment_dur is not None:
epoch = 0 if self._segment_relative_timing else self._epoch
max_end = epoch + self._segment_dur
if doc_begin > max_end or \
(doc_end is not None and doc_end <= epoch):
valid = False
validation_results.error(
location='Timed content',
message='Document content is timed outside the segment '
'interval [{}s..{}s)'.format(epoch, max_end),
code=ValidationCode.bbc_timing_segment_overlap
)
else:
validation_results.good(
location='Timed content',
message='Document content overlaps the segment '
'interval [{}s..{}s)'.format(epoch, max_end),
code=ValidationCode.bbc_timing_segment_overlap
)
return valid
[docs]
def run(
self,
input: Element,
context: dict,
validation_results: ValidationLogger) -> bool:
tt_ns = \
context.get('root_ns', ns_ttml)
valid = True
time_expression_handler = self._makeTimeExpressionHandler(
tt=input,
tt_ns=tt_ns
)
time_el_map = {}
body_el_key = make_qname(namespace=tt_ns, name='body')
body_el = input.find('./'+body_el_key)
if body_el is None:
body_el = input.find('./{*}body')
if body_el is None:
validation_results.skip(
location='{} element'.format(input.tag),
message='No body element found, skipping timing tests',
code=ValidationCode.ttml_document_timing
)
return valid
try:
(te_valid, doc_begin, doc_end) = self._collect_timed_elements(
te=time_expression_handler,
el=body_el,
epoch_s=0,
parent_end=None,
begin_defined=False,
end_defined=False,
time_el_map=time_el_map,
validation_results=validation_results)
valid &= te_valid
valid &= self._checkForShortGaps(
time_el_map=time_el_map,
validation_results=validation_results
)
if self._segment_dur is None \
or self._early_begin_threshold <= self._segment_dur:
valid &= self._checkEnoughSubsAtBeginning(
time_el_map=time_el_map,
validation_results=validation_results
)
else:
validation_results.info(
location='Document',
message='Not checking for enough early subtitles '
'because segment duration is shorter than '
'search period.',
code=ValidationCode.bbc_timing_minimum_subtitles
)
valid &= self._checkSubsOverlapSegment(
doc_begin=doc_begin,
doc_end=doc_end,
validation_results=validation_results
)
el_to_region_id_map = context.get('elements_to_region_id_map')
region_id_to_css_map = context.get('region_id_to_css_map')
if el_to_region_id_map is None or region_id_to_css_map is None:
validation_results.skip(
location='Document',
message='Skipping check for overlapping regions '
'because region reference checks appear not '
'to have completed.',
code=ValidationCode.ebuttd_overlapping_region_constraint
)
else:
valid &= self._checkForOverlappingRegions(
time_el_map=time_el_map,
el_region_id_map=el_to_region_id_map,
region_id_to_css_map=region_id_to_css_map,
validation_results=validation_results
)
validation_results.info(
location='Document',
message='First text appears at {}s, end of doc is {}'.format(
doc_begin,
'undefined' if doc_end is None else '{}s'.format(doc_end)
),
code=ValidationCode.ttml_document_timing
)
except Exception as e:
valid = False
validation_results.error(
location='body element or descendants',
message='Exception encountered while trying to compute times:'
' {}, trace: {}'
.format(
str(e),
''.join(traceback.format_exception(e))),
code=ValidationCode.ttml_document_timing
)
return valid