libdcp
smpte_subtitle_asset.cc
Go to the documentation of this file.
1 /*
2  Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
3 
4  This file is part of libdcp.
5 
6  libdcp is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  libdcp is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with libdcp. If not, see <http://www.gnu.org/licenses/>.
18 
19  In addition, as a special exception, the copyright holders give
20  permission to link the code of portions of this program with the
21  OpenSSL library under certain conditions as described in each
22  individual source file, and distribute linked combinations
23  including the two.
24 
25  You must obey the GNU General Public License in all respects
26  for all of the code used other than OpenSSL. If you modify
27  file(s) with this exception, you may extend this exception to your
28  version of the file(s), but you are not obligated to do so. If you
29  do not wish to do so, delete this exception statement from your
30  version. If you delete this exception statement from all source
31  files in the program, then also delete it here.
32 */
33 
34 
40 #include "compose.hpp"
41 #include "crypto_context.h"
42 #include "dcp_assert.h"
43 #include "exceptions.h"
44 #include "raw_convert.h"
45 #include "smpte_load_font_node.h"
46 #include "smpte_subtitle_asset.h"
47 #include "subtitle_image.h"
48 #include "util.h"
49 #include "warnings.h"
50 #include "xml.h"
51 LIBDCP_DISABLE_WARNINGS
52 #include <asdcp/AS_DCP.h>
53 #include <asdcp/KM_util.h>
54 #include <asdcp/KM_log.h>
55 #include <libxml++/libxml++.h>
56 LIBDCP_ENABLE_WARNINGS
57 #include <boost/algorithm/string.hpp>
58 
59 
60 using std::string;
61 using std::list;
62 using std::vector;
63 using std::map;
64 using std::shared_ptr;
65 using std::dynamic_pointer_cast;
66 using std::make_shared;
67 using boost::split;
68 using boost::is_any_of;
69 using boost::shared_array;
70 using boost::optional;
71 using boost::starts_with;
72 using namespace dcp;
73 
74 
75 static string const subtitle_smpte_ns = "http://www.smpte-ra.org/schemas/428-7/2010/DCST";
76 
77 
78 SMPTESubtitleAsset::SMPTESubtitleAsset ()
79  : MXF (Standard::SMPTE)
80  , _edit_rate (24, 1)
81  , _time_code_rate (24)
82  , _xml_id (make_uuid())
83 {
84 
85 }
86 
87 
88 SMPTESubtitleAsset::SMPTESubtitleAsset (boost::filesystem::path file)
89  : SubtitleAsset (file)
90 {
91  auto xml = make_shared<cxml::Document>("SubtitleReel");
92 
93  auto reader = make_shared<ASDCP::TimedText::MXFReader>();
94  auto r = Kumu::RESULT_OK;
95  {
97  r = reader->OpenRead (_file->string().c_str ());
98  }
99  if (!ASDCP_FAILURE(r)) {
100  /* MXF-wrapped */
101  ASDCP::WriterInfo info;
102  reader->FillWriterInfo (info);
103  _id = read_writer_info (info);
104  if (!_key_id) {
105  /* Not encrypted; read it in now */
106  string xml_string;
107  reader->ReadTimedTextResource (xml_string);
108  _raw_xml = xml_string;
109  xml->read_string (xml_string);
110  parse_xml (xml);
111  read_mxf_descriptor (reader);
112  read_mxf_resources (reader, make_shared<DecryptionContext>(optional<Key>(), Standard::SMPTE));
113  } else {
114  read_mxf_descriptor (reader);
115  }
116  } else {
117  /* Plain XML */
118  try {
119  _raw_xml = dcp::file_to_string (file);
120  xml = make_shared<cxml::Document>("SubtitleReel");
121  xml->read_file (file);
122  parse_xml (xml);
123  } catch (cxml::Error& e) {
124  boost::throw_exception (
125  ReadError (
126  String::compose (
127  "Failed to read subtitle file %1; MXF failed with %2, XML failed with %3",
128  file, static_cast<int>(r), e.what()
129  )
130  )
131  );
132  }
133 
134  /* Try to read PNG files from the same folder that the XML is in; the wisdom of this is
135  debatable, at best...
136  */
137  for (auto i: _subtitles) {
138  auto im = dynamic_pointer_cast<SubtitleImage>(i);
139  if (im && im->png_image().size() == 0) {
140  /* Even more dubious; allow <id>.png or urn:uuid:<id>.png */
141  auto p = file.parent_path() / String::compose("%1.png", im->id());
142  if (boost::filesystem::is_regular_file(p)) {
143  im->read_png_file (p);
144  } else if (starts_with (im->id(), "urn:uuid:")) {
145  p = file.parent_path() / String::compose("%1.png", remove_urn_uuid(im->id()));
146  if (boost::filesystem::is_regular_file(p)) {
147  im->read_png_file (p);
148  }
149  }
150  }
151  }
152  _standard = Standard::SMPTE;
153  }
154 
155  /* Check that all required image data have been found */
156  for (auto i: _subtitles) {
157  auto im = dynamic_pointer_cast<SubtitleImage>(i);
158  if (im && im->png_image().size() == 0) {
159  throw MissingSubtitleImageError (im->id());
160  }
161  }
162 }
163 
164 
165 void
166 SMPTESubtitleAsset::parse_xml (shared_ptr<cxml::Document> xml)
167 {
168  _xml_id = remove_urn_uuid(xml->string_child("Id"));
169  _load_font_nodes = type_children<dcp::SMPTELoadFontNode> (xml, "LoadFont");
170 
171  _content_title_text = xml->string_child ("ContentTitleText");
172  _annotation_text = xml->optional_string_child ("AnnotationText");
173  _issue_date = LocalTime (xml->string_child ("IssueDate"));
174  _reel_number = xml->optional_number_child<int> ("ReelNumber");
175  _language = xml->optional_string_child ("Language");
176 
177  /* This is supposed to be two numbers, but a single number has been seen in the wild */
178  auto const er = xml->string_child ("EditRate");
179  vector<string> er_parts;
180  split (er_parts, er, is_any_of (" "));
181  if (er_parts.size() == 1) {
182  _edit_rate = Fraction (raw_convert<int> (er_parts[0]), 1);
183  } else if (er_parts.size() == 2) {
184  _edit_rate = Fraction (raw_convert<int> (er_parts[0]), raw_convert<int> (er_parts[1]));
185  } else {
186  throw XMLError ("malformed EditRate " + er);
187  }
188 
189  _time_code_rate = xml->number_child<int> ("TimeCodeRate");
190  if (xml->optional_string_child ("StartTime")) {
191  _start_time = Time (xml->string_child("StartTime"), _time_code_rate);
192  }
193 
194  /* Now we need to drop down to xmlpp */
195 
196  vector<ParseState> ps;
197  for (auto i: xml->node()->get_children()) {
198  auto const e = dynamic_cast<xmlpp::Element const *>(i);
199  if (e && e->get_name() == "SubtitleList") {
200  parse_subtitles (e, ps, _time_code_rate, Standard::SMPTE);
201  }
202  }
203 
204  /* Guess intrinsic duration */
205  _intrinsic_duration = latest_subtitle_out().as_editable_units_ceil(_edit_rate.numerator / _edit_rate.denominator);
206 }
207 
208 
209 void
210 SMPTESubtitleAsset::read_mxf_resources (shared_ptr<ASDCP::TimedText::MXFReader> reader, shared_ptr<DecryptionContext> dec)
211 {
212  ASDCP::TimedText::TimedTextDescriptor descriptor;
213  reader->FillTimedTextDescriptor (descriptor);
214 
215  /* Load fonts and images */
216 
217  for (
218  auto i = descriptor.ResourceList.begin();
219  i != descriptor.ResourceList.end();
220  ++i) {
221 
222  ASDCP::TimedText::FrameBuffer buffer;
223  buffer.Capacity (10 * 1024 * 1024);
224  reader->ReadAncillaryResource (i->ResourceID, buffer, dec->context(), dec->hmac());
225 
226  char id[64];
227  Kumu::bin2UUIDhex (i->ResourceID, ASDCP::UUIDlen, id, sizeof(id));
228 
229  shared_array<uint8_t> data (new uint8_t[buffer.Size()]);
230  memcpy (data.get(), buffer.RoData(), buffer.Size());
231 
232  switch (i->Type) {
233  case ASDCP::TimedText::MT_OPENTYPE:
234  {
235  auto j = _load_font_nodes.begin();
236  while (j != _load_font_nodes.end() && (*j)->urn != id) {
237  ++j;
238  }
239 
240  if (j != _load_font_nodes.end ()) {
241  _fonts.push_back (Font ((*j)->id, (*j)->urn, ArrayData (data, buffer.Size ())));
242  }
243  break;
244  }
245  case ASDCP::TimedText::MT_PNG:
246  {
247  auto j = _subtitles.begin();
248  while (j != _subtitles.end() && ((!dynamic_pointer_cast<SubtitleImage>(*j)) || dynamic_pointer_cast<SubtitleImage>(*j)->id() != id)) {
249  ++j;
250  }
251 
252  if (j != _subtitles.end()) {
253  dynamic_pointer_cast<SubtitleImage>(*j)->set_png_image (ArrayData(data, buffer.Size()));
254  }
255  break;
256  }
257  default:
258  break;
259  }
260  }
261 }
262 
263 
264 void
265 SMPTESubtitleAsset::read_mxf_descriptor (shared_ptr<ASDCP::TimedText::MXFReader> reader)
266 {
267  ASDCP::TimedText::TimedTextDescriptor descriptor;
268  reader->FillTimedTextDescriptor (descriptor);
269 
270  _intrinsic_duration = descriptor.ContainerDuration;
271  /* The thing which is called AssetID in the descriptor is also known as the
272  * ResourceID of the MXF. We store that, at present just for verification
273  * purposes.
274  */
275  char id[64];
276  Kumu::bin2UUIDhex (descriptor.AssetID, ASDCP::UUIDlen, id, sizeof(id));
277  _resource_id = id;
278 }
279 
280 
281 void
283 {
284  /* See if we already have a key; if we do, and we have a file, we'll already
285  have read that file.
286  */
287  auto const had_key = static_cast<bool>(_key);
288 
289  MXF::set_key (key);
290 
291  if (!_key_id || !_file || had_key) {
292  /* Either we don't have any data to read, it wasn't
293  encrypted, or we've already read it, so we don't
294  need to do anything else.
295  */
296  return;
297  }
298 
299  /* Our data was encrypted; now we can decrypt it */
300 
301  auto reader = make_shared<ASDCP::TimedText::MXFReader>();
302  auto r = reader->OpenRead (_file->string().c_str ());
303  if (ASDCP_FAILURE (r)) {
304  boost::throw_exception (
305  ReadError (
306  String::compose ("Could not read encrypted subtitle MXF (%1)", static_cast<int> (r))
307  )
308  );
309  }
310 
311  auto dec = make_shared<DecryptionContext>(key, Standard::SMPTE);
312  string xml_string;
313  reader->ReadTimedTextResource (xml_string, dec->context(), dec->hmac());
314  _raw_xml = xml_string;
315  auto xml = make_shared<cxml::Document>("SubtitleReel");
316  xml->read_string (xml_string);
317  parse_xml (xml);
318  read_mxf_resources (reader, dec);
319 }
320 
321 
322 vector<shared_ptr<LoadFontNode>>
323 SMPTESubtitleAsset::load_font_nodes () const
324 {
325  vector<shared_ptr<LoadFontNode>> lf;
326  copy (_load_font_nodes.begin(), _load_font_nodes.end(), back_inserter(lf));
327  return lf;
328 }
329 
330 
331 bool
332 SMPTESubtitleAsset::valid_mxf (boost::filesystem::path file)
333 {
334  ASDCP::TimedText::MXFReader reader;
335  Kumu::DefaultLogSink().UnsetFilterFlag(Kumu::LOG_ALLOW_ALL);
336  auto r = reader.OpenRead (file.string().c_str ());
337  Kumu::DefaultLogSink().SetFilterFlag(Kumu::LOG_ALLOW_ALL);
338  return !ASDCP_FAILURE (r);
339 }
340 
341 
342 string
343 SMPTESubtitleAsset::xml_as_string () const
344 {
345  xmlpp::Document doc;
346  auto root = doc.create_root_node ("SubtitleReel");
347  root->set_namespace_declaration (subtitle_smpte_ns);
348  root->set_namespace_declaration ("http://www.w3.org/2001/XMLSchema", "xs");
349 
350  DCP_ASSERT (_xml_id);
351  root->add_child("Id")->add_child_text("urn:uuid:" + *_xml_id);
352  root->add_child("ContentTitleText")->add_child_text(_content_title_text);
353  if (_annotation_text) {
354  root->add_child("AnnotationText")->add_child_text(_annotation_text.get());
355  }
356  root->add_child("IssueDate")->add_child_text(_issue_date.as_string(true));
357  if (_reel_number) {
358  root->add_child("ReelNumber")->add_child_text(raw_convert<string>(_reel_number.get()));
359  }
360  if (_language) {
361  root->add_child("Language")->add_child_text(_language.get());
362  }
363  root->add_child("EditRate")->add_child_text(_edit_rate.as_string());
364  root->add_child("TimeCodeRate")->add_child_text(raw_convert<string>(_time_code_rate));
365  if (_start_time) {
366  root->add_child("StartTime")->add_child_text(_start_time.get().as_string(Standard::SMPTE));
367  }
368 
369  for (auto i: _load_font_nodes) {
370  auto load_font = root->add_child("LoadFont");
371  load_font->add_child_text ("urn:uuid:" + i->urn);
372  load_font->set_attribute ("ID", i->id);
373  }
374 
375  subtitles_as_xml (root->add_child("SubtitleList"), _time_code_rate, Standard::SMPTE);
376 
377  return doc.write_to_string ("UTF-8");
378 }
379 
380 
381 void
382 SMPTESubtitleAsset::write (boost::filesystem::path p) const
383 {
384  EncryptionContext enc (key(), Standard::SMPTE);
385 
386  ASDCP::WriterInfo writer_info;
387  fill_writer_info (&writer_info, _id);
388 
389  ASDCP::TimedText::TimedTextDescriptor descriptor;
390  descriptor.EditRate = ASDCP::Rational (_edit_rate.numerator, _edit_rate.denominator);
391  descriptor.EncodingName = "UTF-8";
392 
393  /* Font references */
394 
395  for (auto i: _load_font_nodes) {
396  auto j = _fonts.begin();
397  while (j != _fonts.end() && j->load_id != i->id) {
398  ++j;
399  }
400  if (j != _fonts.end ()) {
401  ASDCP::TimedText::TimedTextResourceDescriptor res;
402  unsigned int c;
403  Kumu::hex2bin (i->urn.c_str(), res.ResourceID, Kumu::UUID_Length, &c);
404  DCP_ASSERT (c == Kumu::UUID_Length);
405  res.Type = ASDCP::TimedText::MT_OPENTYPE;
406  descriptor.ResourceList.push_back (res);
407  }
408  }
409 
410  /* Image subtitle references */
411 
412  for (auto i: _subtitles) {
413  auto si = dynamic_pointer_cast<SubtitleImage>(i);
414  if (si) {
415  ASDCP::TimedText::TimedTextResourceDescriptor res;
416  unsigned int c;
417  Kumu::hex2bin (si->id().c_str(), res.ResourceID, Kumu::UUID_Length, &c);
418  DCP_ASSERT (c == Kumu::UUID_Length);
419  res.Type = ASDCP::TimedText::MT_PNG;
420  descriptor.ResourceList.push_back (res);
421  }
422  }
423 
424  descriptor.NamespaceName = subtitle_smpte_ns;
425  unsigned int c;
426  DCP_ASSERT (_xml_id);
427  Kumu::hex2bin (_xml_id->c_str(), descriptor.AssetID, ASDCP::UUIDlen, &c);
428  DCP_ASSERT (c == Kumu::UUID_Length);
429  descriptor.ContainerDuration = _intrinsic_duration;
430 
431  ASDCP::TimedText::MXFWriter writer;
432  /* This header size is a guess. Empirically it seems that each subtitle reference is 90 bytes, and we need some extra.
433  The default size is not enough for some feature-length PNG sub projects (see DCP-o-matic #1561).
434  */
435  ASDCP::Result_t r = writer.OpenWrite (p.string().c_str(), writer_info, descriptor, _subtitles.size() * 90 + 16384);
436  if (ASDCP_FAILURE (r)) {
437  boost::throw_exception (FileError ("could not open subtitle MXF for writing", p.string(), r));
438  }
439 
440  _raw_xml = xml_as_string ();
441 
442  r = writer.WriteTimedTextResource (*_raw_xml, enc.context(), enc.hmac());
443  if (ASDCP_FAILURE (r)) {
444  boost::throw_exception (MXFFileError ("could not write XML to timed text resource", p.string(), r));
445  }
446 
447  /* Font payload */
448 
449  for (auto i: _load_font_nodes) {
450  auto j = _fonts.begin();
451  while (j != _fonts.end() && j->load_id != i->id) {
452  ++j;
453  }
454  if (j != _fonts.end ()) {
455  ASDCP::TimedText::FrameBuffer buffer;
456  ArrayData data_copy(j->data);
457  buffer.SetData (data_copy.data(), data_copy.size());
458  buffer.Size (j->data.size());
459  r = writer.WriteAncillaryResource (buffer, enc.context(), enc.hmac());
460  if (ASDCP_FAILURE(r)) {
461  boost::throw_exception (MXFFileError ("could not write font to timed text resource", p.string(), r));
462  }
463  }
464  }
465 
466  /* Image subtitle payload */
467 
468  for (auto i: _subtitles) {
469  auto si = dynamic_pointer_cast<SubtitleImage>(i);
470  if (si) {
471  ASDCP::TimedText::FrameBuffer buffer;
472  buffer.SetData (si->png_image().data(), si->png_image().size());
473  buffer.Size (si->png_image().size());
474  r = writer.WriteAncillaryResource (buffer, enc.context(), enc.hmac());
475  if (ASDCP_FAILURE(r)) {
476  boost::throw_exception (MXFFileError ("could not write PNG data to timed text resource", p.string(), r));
477  }
478  }
479  }
480 
481  writer.Finalize ();
482 
483  _file = p;
484 }
485 
486 bool
487 SMPTESubtitleAsset::equals (shared_ptr<const Asset> other_asset, EqualityOptions options, NoteHandler note) const
488 {
489  if (!SubtitleAsset::equals (other_asset, options, note)) {
490  return false;
491  }
492 
493  auto other = dynamic_pointer_cast<const SMPTESubtitleAsset>(other_asset);
494  if (!other) {
495  note (NoteType::ERROR, "Subtitles are in different standards");
496  return false;
497  }
498 
499  auto i = _load_font_nodes.begin();
500  auto j = other->_load_font_nodes.begin();
501 
502  while (i != _load_font_nodes.end ()) {
503  if (j == other->_load_font_nodes.end ()) {
504  note (NoteType::ERROR, "<LoadFont> nodes differ");
505  return false;
506  }
507 
508  if ((*i)->id != (*j)->id) {
509  note (NoteType::ERROR, "<LoadFont> nodes differ");
510  return false;
511  }
512 
513  ++i;
514  ++j;
515  }
516 
517  if (_content_title_text != other->_content_title_text) {
518  note (NoteType::ERROR, "Subtitle content title texts differ");
519  return false;
520  }
521 
522  if (_language != other->_language) {
523  note (NoteType::ERROR, String::compose("Subtitle languages differ (`%1' vs `%2')", _language.get_value_or("[none]"), other->_language.get_value_or("[none]")));
524  return false;
525  }
526 
527  if (_annotation_text != other->_annotation_text) {
528  note (NoteType::ERROR, "Subtitle annotation texts differ");
529  return false;
530  }
531 
532  if (_issue_date != other->_issue_date) {
533  if (options.issue_dates_can_differ) {
534  note (NoteType::NOTE, "Subtitle issue dates differ");
535  } else {
536  note (NoteType::ERROR, "Subtitle issue dates differ");
537  return false;
538  }
539  }
540 
541  if (_reel_number != other->_reel_number) {
542  note (NoteType::ERROR, "Subtitle reel numbers differ");
543  return false;
544  }
545 
546  if (_edit_rate != other->_edit_rate) {
547  note (NoteType::ERROR, "Subtitle edit rates differ");
548  return false;
549  }
550 
551  if (_time_code_rate != other->_time_code_rate) {
552  note (NoteType::ERROR, "Subtitle time code rates differ");
553  return false;
554  }
555 
556  if (_start_time != other->_start_time) {
557  note (NoteType::ERROR, "Subtitle start times differ");
558  return false;
559  }
560 
561  return true;
562 }
563 
564 
565 void
566 SMPTESubtitleAsset::add_font (string load_id, dcp::ArrayData data)
567 {
568  string const uuid = make_uuid ();
569  _fonts.push_back (Font(load_id, uuid, data));
570  _load_font_nodes.push_back (make_shared<SMPTELoadFontNode>(load_id, uuid));
571 }
572 
573 
574 void
575 SMPTESubtitleAsset::add (shared_ptr<Subtitle> s)
576 {
577  SubtitleAsset::add (s);
578  _intrinsic_duration = latest_subtitle_out().as_editable_units_ceil(_edit_rate.numerator / _edit_rate.denominator);
579 }
580 
Class to hold an arbitrary block of data.
Definition: array_data.h:55
int size() const override
Definition: array_data.h:85
boost::optional< boost::filesystem::path > file() const
Definition: asset.h:97
boost::optional< boost::filesystem::path > _file
Definition: asset.h:122
An exception related to a file.
Definition: exceptions.h:56
A fraction (i.e. a thing with an integer numerator and an integer denominator).
Definition: types.h:214
A key for decrypting/encrypting assets.
Definition: key.h:59
A representation of a local time (down to the second), including its offset from GMT (equivalent to x...
Definition: local_time.h:64
std::string as_string(bool with_millisecond=false) const
Definition: local_time.cc:186
An exception related to an MXF file.
Definition: exceptions.h:82
Parent for classes which represent MXF files.
Definition: mxf.h:74
virtual void set_key(Key)
Definition: mxf.cc:112
boost::optional< Key > _key
Definition: mxf.h:157
std::string read_writer_info(ASDCP::WriterInfo const &)
Definition: mxf.cc:124
void fill_writer_info(ASDCP::WriterInfo *w, std::string id) const
Definition: mxf.cc:82
boost::optional< std::string > _key_id
Definition: mxf.h:155
boost::optional< Key > key() const
Definition: mxf.h:104
Any error that occurs when reading data from a DCP.
Definition: exceptions.h:106
void write(boost::filesystem::path path) const override
void set_key(Key key) override
boost::optional< std::string > _xml_id
boost::optional< std::string > _language
boost::optional< std::string > _resource_id
A parent for classes representing a file containing subtitles.
std::vector< std::shared_ptr< Subtitle > > _subtitles
std::vector< Font > _fonts
void subtitles_as_xml(xmlpp::Element *root, int time_code_rate, Standard standard) const
boost::optional< std::string > _raw_xml
A representation of time within a DCP.
Definition: dcp_time.h:73
int64_t as_editable_units_ceil(int tcr_) const
Definition: dcp_time.cc:350
An XML error.
Definition: exceptions.h:164
DCP_ASSERT macro.
Exceptions thrown by libdcp.
Namespace for everything in libdcp.
Definition: array_data.h:50
Methods for conversion to/from string.
SMPTELoadFontNode class.
SMPTESubtitleAsset class.
A class to describe what "equality" means for a particular test.
Definition: types.h:249
bool issue_dates_can_differ
Definition: types.h:266
SubtitleImage class.
Utility methods and classes.
Helpers for XML reading with libcxml.