libdcp
language_tag.cc
Go to the documentation of this file.
1 /*
2  Copyright (C) 2020-2021 Carl Hetherington <cth@carlh.net>
3 
4  This file is part of libdcp.
5 
6  libdcp is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  libdcp is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with libdcp. If not, see <http://www.gnu.org/licenses/>.
18 
19  In addition, as a special exception, the copyright holders give
20  permission to link the code of portions of this program with the
21  OpenSSL library under certain conditions as described in each
22  individual source file, and distribute linked combinations
23  including the two.
24 
25  You must obey the GNU General Public License in all respects
26  for all of the code used other than OpenSSL. If you modify
27  file(s) with this exception, you may extend this exception to your
28  version of the file(s), but you are not obligated to do so. If you
29  do not wish to do so, delete this exception statement from your
30  version. If you delete this exception statement from all source
31  files in the program, then also delete it here.
32 */
33 
34 
40 #include "compose.hpp"
41 #include "dcp_assert.h"
42 #include "exceptions.h"
43 #include "file.h"
44 #include "language_tag.h"
45 #include <boost/algorithm/string.hpp>
46 #include <string>
47 
48 
49 using std::make_pair;
50 using std::ostream;
51 using std::pair;
52 using std::string;
53 using std::vector;
54 using boost::algorithm::trim;
55 using boost::optional;
56 using namespace dcp;
57 
58 
59 static vector<LanguageTag::SubtagData> language_list;
60 static vector<LanguageTag::SubtagData> variant_list;
61 static vector<LanguageTag::SubtagData> region_list;
62 static vector<LanguageTag::SubtagData> script_list;
63 static vector<LanguageTag::SubtagData> extlang_list;
64 
65 static vector<pair<string, string>> dcnc_list;
66 
67 
68 static
69 optional<LanguageTag::SubtagData>
70 find_in_list (vector<LanguageTag::SubtagData> const& list, string subtag)
71 {
72  for (auto const& i: list) {
73  if (boost::iequals(i.subtag, subtag)) {
74  return i;
75  }
76  }
77 
78  return {};
79 }
80 
81 
82 LanguageTag::Subtag::Subtag (string subtag, SubtagType type)
83  : _subtag (subtag)
84 {
85  if (!get_subtag_data(type, subtag)) {
86  throw LanguageTagError(String::compose("Unknown %1 string %2", subtag_type_name(type), subtag));
87  }
88 }
89 
90 
91 LanguageTag::LanguageTag (string tag)
92 {
93  vector<string> parts;
94  boost::split (parts, tag, boost::is_any_of("-"));
95  if (parts.empty()) {
96  throw LanguageTagError (String::compose("Could not parse language tag %1", tag));
97  }
98 
99  vector<string>::size_type p = 0;
100  _language = LanguageSubtag (parts[p]);
101  ++p;
102 
103  if (p == parts.size()) {
104  return;
105  }
106 
107  try {
108  _script = ScriptSubtag (parts[p]);
109  ++p;
110  } catch (...) {}
111 
112  if (p == parts.size()) {
113  return;
114  }
115 
116  try {
117  _region = RegionSubtag (parts[p]);
118  ++p;
119  } catch (...) {}
120 
121  if (p == parts.size()) {
122  return;
123  }
124 
125  try {
126  while (true) {
127  _variants.push_back (VariantSubtag(parts[p]));
128  ++p;
129  if (p == parts.size()) {
130  return;
131  }
132  }
133  } catch (...) {}
134 
135  try {
136  while (true) {
137  _extlangs.push_back (ExtlangSubtag(parts[p]));
138  ++p;
139  if (p == parts.size()) {
140  return;
141  }
142  }
143  } catch (...) {}
144 
145  if (p < parts.size()) {
146  throw LanguageTagError (String::compose("Unrecognised subtag %1", parts[p]));
147  }
148 }
149 
150 
151 string
152 LanguageTag::to_string () const
153 {
154  if (!_language) {
155  throw LanguageTagError("No language set up");
156  }
157 
158  auto s = _language->subtag();
159 
160  if (_script) {
161  s += "-" + _script->subtag();
162  }
163 
164  if (_region) {
165  s += "-" + _region->subtag();
166  }
167 
168  for (auto i: _variants) {
169  s += "-" + i.subtag();
170  }
171 
172  for (auto i: _extlangs) {
173  s += "-" + i.subtag();
174  }
175 
176  return s;
177 }
178 
179 
180 void
181 LanguageTag::set_language (LanguageSubtag language)
182 {
183  _language = language;
184 }
185 
186 
187 void
188 LanguageTag::set_script (ScriptSubtag script)
189 {
190  _script = script;
191 }
192 
193 
194 void
195 LanguageTag::set_region (RegionSubtag region)
196 {
197  _region = region;
198 }
199 
200 
201 void
202 LanguageTag::add_variant (VariantSubtag variant)
203 {
204  if (find(_variants.begin(), _variants.end(), variant) != _variants.end()) {
205  throw LanguageTagError (String::compose("Duplicate Variant subtag %1", variant.subtag()));
206  }
207 
208  _variants.push_back (variant);
209 }
210 
211 
212 template <class T>
213 void
214 check_for_duplicates (vector<T> const& subtags, dcp::LanguageTag::SubtagType type)
215 {
216  vector<T> sorted = subtags;
217  sort (sorted.begin(), sorted.end());
218  optional<T> last;
219  for (auto const& i: sorted) {
220  if (last && i == *last) {
221  throw LanguageTagError (String::compose("Duplicate %1 subtag %2", dcp::LanguageTag::subtag_type_name(type), i.subtag()));
222  }
223  last = i;
224  }
225 }
226 
227 
228 void
229 LanguageTag::set_variants (vector<VariantSubtag> variants)
230 {
231  check_for_duplicates (variants, SubtagType::VARIANT);
232  _variants = variants;
233 }
234 
235 
236 void
237 LanguageTag::add_extlang (ExtlangSubtag extlang)
238 {
239  if (find(_extlangs.begin(), _extlangs.end(), extlang) != _extlangs.end()) {
240  throw LanguageTagError (String::compose("Duplicate Extlang subtag %1", extlang.subtag()));
241  }
242 
243  _extlangs.push_back (extlang);
244 }
245 
246 
247 void
248 LanguageTag::set_extlangs (vector<ExtlangSubtag> extlangs)
249 {
250  check_for_duplicates (extlangs, SubtagType::EXTLANG);
251  _extlangs = extlangs;
252 }
253 
254 
255 string
256 LanguageTag::description () const
257 {
258  if (!_language) {
259  throw LanguageTagError("No language set up");
260  }
261 
262  string d;
263 
264  for (auto const& i: _variants) {
265  optional<SubtagData> variant = get_subtag_data (SubtagType::VARIANT, i.subtag());
266  DCP_ASSERT (variant);
267  d += variant->description + " dialect of ";
268  }
269 
270  auto language = get_subtag_data (SubtagType::LANGUAGE, _language->subtag());
271  DCP_ASSERT (language);
272  d += language->description;
273 
274  if (_script) {
275  auto script = get_subtag_data (SubtagType::SCRIPT, _script->subtag());
276  DCP_ASSERT (script);
277  d += " written using the " + script->description + " script";
278  }
279 
280  if (_region) {
281  auto region = get_subtag_data (SubtagType::REGION, _region->subtag());
282  DCP_ASSERT (region);
283  d += " for " + region->description;
284  }
285 
286  for (auto const& i: _extlangs) {
287  auto extlang = get_subtag_data (SubtagType::EXTLANG, i.subtag());
288  DCP_ASSERT (extlang);
289  d += ", " + extlang->description;
290  }
291 
292  return d;
293 }
294 
295 
296 vector<LanguageTag::SubtagData> const &
297 LanguageTag::get_all (SubtagType type)
298 {
299  switch (type) {
300  case SubtagType::LANGUAGE:
301  return language_list;
302  case SubtagType::SCRIPT:
303  return script_list;
304  case SubtagType::REGION:
305  return region_list;
306  case SubtagType::VARIANT:
307  return variant_list;
308  case SubtagType::EXTLANG:
309  return extlang_list;
310  }
311 
312  return language_list;
313 }
314 
315 
316 string
317 LanguageTag::subtag_type_name (SubtagType type)
318 {
319  switch (type) {
320  case SubtagType::LANGUAGE:
321  return "Language";
322  case SubtagType::SCRIPT:
323  return "Script";
324  case SubtagType::REGION:
325  return "Region";
326  case SubtagType::VARIANT:
327  return "Variant";
328  case SubtagType::EXTLANG:
329  return "Extended";
330  }
331 
332  return {};
333 }
334 
335 
336 bool
337 dcp::operator== (dcp::LanguageTag const& a, dcp::LanguageTag const& b)
338 {
339  return a.to_string() == b.to_string();
340 }
341 
342 
343 bool
344 dcp::operator!= (dcp::LanguageTag const& a, dcp::LanguageTag const& b)
345 {
346  return a.to_string() != b.to_string();
347 }
348 
349 
350 bool
351 dcp::operator< (dcp::LanguageTag const& a, dcp::LanguageTag const& b)
352 {
353  return a.to_string() < b.to_string();
354 }
355 
356 
357 ostream&
358 dcp::operator<< (ostream& os, dcp::LanguageTag const& tag)
359 {
360  os << tag.to_string();
361  return os;
362 }
363 
364 
365 vector<pair<LanguageTag::SubtagType, LanguageTag::SubtagData>>
366 LanguageTag::subtags () const
367 {
368  vector<pair<SubtagType, SubtagData>> s;
369 
370  if (_language) {
371  s.push_back (make_pair(SubtagType::LANGUAGE, *get_subtag_data(SubtagType::LANGUAGE, _language->subtag())));
372  }
373 
374  if (_script) {
375  s.push_back (make_pair(SubtagType::SCRIPT, *get_subtag_data(SubtagType::SCRIPT, _script->subtag())));
376  }
377 
378  if (_region) {
379  s.push_back (make_pair(SubtagType::REGION, *get_subtag_data(SubtagType::REGION, _region->subtag())));
380  }
381 
382  for (auto const& i: _variants) {
383  s.push_back (make_pair(SubtagType::VARIANT, *get_subtag_data(SubtagType::VARIANT, i.subtag())));
384  }
385 
386  for (auto const& i: _extlangs) {
387  s.push_back (make_pair(SubtagType::EXTLANG, *get_subtag_data(SubtagType::EXTLANG, i.subtag())));
388  }
389 
390  return s;
391 }
392 
393 
394 optional<LanguageTag::SubtagData>
395 LanguageTag::get_subtag_data (LanguageTag::SubtagType type, string subtag)
396 {
397  switch (type) {
398  case SubtagType::LANGUAGE:
399  return find_in_list(language_list, subtag);
400  case SubtagType::SCRIPT:
401  return find_in_list(script_list, subtag);
402  case SubtagType::REGION:
403  return find_in_list(region_list, subtag);
404  case SubtagType::VARIANT:
405  return find_in_list(variant_list, subtag);
406  case SubtagType::EXTLANG:
407  return find_in_list(extlang_list, subtag);
408  }
409 
410  return {};
411 }
412 
413 
414 optional<string>
415 LanguageTag::get_subtag_description (LanguageTag::SubtagType type, string subtag)
416 {
417  auto data = get_subtag_data (type, subtag);
418  if (!data) {
419  return {};
420  }
421 
422  return data->description;
423 }
424 
425 
426 void
427 load_language_tag_list (boost::filesystem::path tags_directory, string name, std::function<void (std::string, std::string)> add)
428 {
429  File f(tags_directory / name, "r");
430  if (!f) {
431  throw FileError ("Could not open tags file", tags_directory / name, errno);
432  }
433  char buffer[512];
434 
435  while (!f.eof()) {
436  char* r = f.gets(buffer, sizeof(buffer));
437  if (r == 0) {
438  break;
439  }
440  string a = buffer;
441  trim (a);
442  r = f.gets(buffer, sizeof(buffer));
443  if (r == 0) {
444  throw FileError ("Bad tags file", tags_directory / name, -1);
445  }
446  string b = buffer;
447  trim (b);
448  add (a, b);
449  }
450 }
451 
452 
453 void
454 dcp::load_language_tag_lists (boost::filesystem::path tags_directory)
455 {
456  auto add_subtag = [](vector<LanguageTag::SubtagData>& list, string a, string b) {
457  list.push_back (LanguageTag::SubtagData(a, b));
458  };
459 
460  load_language_tag_list (tags_directory, "language", [&add_subtag](string a, string b) { add_subtag(language_list, a, b); });
461  load_language_tag_list (tags_directory, "variant", [&add_subtag](string a, string b) { add_subtag(variant_list, a, b); });
462  load_language_tag_list (tags_directory, "region", [&add_subtag](string a, string b) { add_subtag(region_list, a, b); });
463  load_language_tag_list (tags_directory, "script", [&add_subtag](string a, string b) { add_subtag(script_list, a, b); });
464  load_language_tag_list (tags_directory, "extlang", [&add_subtag](string a, string b) { add_subtag(extlang_list, a, b); });
465 
466  load_language_tag_list (tags_directory, "dcnc", [](string a, string b) { dcnc_list.push_back(make_pair(a, b)); });
467 }
468 
469 
470 vector<pair<string, string>> dcp::dcnc_tags ()
471 {
472  return dcnc_list;
473 }
474 
475 
An exception related to a file.
Definition: exceptions.h:56
Definition: file.h:49
DCP_ASSERT macro.
Exceptions thrown by libdcp.
LanguageTag class.
Namespace for everything in libdcp.
Definition: array_data.h:50