libdcp
language_tag.cc
Go to the documentation of this file.
1 /*
2  Copyright (C) 2020-2021 Carl Hetherington <cth@carlh.net>
3 
4  This file is part of libdcp.
5 
6  libdcp is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  libdcp is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with libdcp. If not, see <http://www.gnu.org/licenses/>.
18 
19  In addition, as a special exception, the copyright holders give
20  permission to link the code of portions of this program with the
21  OpenSSL library under certain conditions as described in each
22  individual source file, and distribute linked combinations
23  including the two.
24 
25  You must obey the GNU General Public License in all respects
26  for all of the code used other than OpenSSL. If you modify
27  file(s) with this exception, you may extend this exception to your
28  version of the file(s), but you are not obligated to do so. If you
29  do not wish to do so, delete this exception statement from your
30  version. If you delete this exception statement from all source
31  files in the program, then also delete it here.
32 */
33 
34 
40 #include "compose.hpp"
41 #include "dcp_assert.h"
42 #include "exceptions.h"
43 #include "language_tag.h"
44 #include <boost/algorithm/string.hpp>
45 #include <string>
46 
47 
48 using std::make_pair;
49 using std::ostream;
50 using std::pair;
51 using std::string;
52 using std::vector;
53 using boost::optional;
54 using boost::algorithm::trim;
55 using namespace dcp;
56 
57 
58 static vector<LanguageTag::SubtagData> language_list;
59 static vector<LanguageTag::SubtagData> variant_list;
60 static vector<LanguageTag::SubtagData> region_list;
61 static vector<LanguageTag::SubtagData> script_list;
62 static vector<LanguageTag::SubtagData> extlang_list;
63 
64 static vector<pair<string, string>> dcnc_list;
65 
66 
67 static
68 optional<LanguageTag::SubtagData>
69 find_in_list (vector<LanguageTag::SubtagData> const& list, string subtag)
70 {
71  for (auto const& i: list) {
72  if (boost::iequals(i.subtag, subtag)) {
73  return i;
74  }
75  }
76 
77  return {};
78 }
79 
80 
81 LanguageTag::Subtag::Subtag (string subtag, SubtagType type)
82  : _subtag (subtag)
83 {
84  if (!get_subtag_data(type, subtag)) {
85  throw LanguageTagError(String::compose("Unknown %1 string %2", subtag_type_name(type), subtag));
86  }
87 }
88 
89 
90 LanguageTag::LanguageTag (string tag)
91 {
92  vector<string> parts;
93  boost::split (parts, tag, boost::is_any_of("-"));
94  if (parts.empty()) {
95  throw LanguageTagError (String::compose("Could not parse language tag %1", tag));
96  }
97 
98  vector<string>::size_type p = 0;
99  _language = LanguageSubtag (parts[p]);
100  ++p;
101 
102  if (p == parts.size()) {
103  return;
104  }
105 
106  try {
107  _script = ScriptSubtag (parts[p]);
108  ++p;
109  } catch (...) {}
110 
111  if (p == parts.size()) {
112  return;
113  }
114 
115  try {
116  _region = RegionSubtag (parts[p]);
117  ++p;
118  } catch (...) {}
119 
120  if (p == parts.size()) {
121  return;
122  }
123 
124  try {
125  while (true) {
126  _variants.push_back (VariantSubtag(parts[p]));
127  ++p;
128  if (p == parts.size()) {
129  return;
130  }
131  }
132  } catch (...) {}
133 
134  try {
135  while (true) {
136  _extlangs.push_back (ExtlangSubtag(parts[p]));
137  ++p;
138  if (p == parts.size()) {
139  return;
140  }
141  }
142  } catch (...) {}
143 
144  if (p < parts.size()) {
145  throw LanguageTagError (String::compose("Unrecognised subtag %1", parts[p]));
146  }
147 }
148 
149 
150 string
151 LanguageTag::to_string () const
152 {
153  if (!_language) {
154  throw LanguageTagError("No language set up");
155  }
156 
157  auto s = _language->subtag();
158 
159  if (_script) {
160  s += "-" + _script->subtag();
161  }
162 
163  if (_region) {
164  s += "-" + _region->subtag();
165  }
166 
167  for (auto i: _variants) {
168  s += "-" + i.subtag();
169  }
170 
171  for (auto i: _extlangs) {
172  s += "-" + i.subtag();
173  }
174 
175  return s;
176 }
177 
178 
179 void
180 LanguageTag::set_language (LanguageSubtag language)
181 {
182  _language = language;
183 }
184 
185 
186 void
187 LanguageTag::set_script (ScriptSubtag script)
188 {
189  _script = script;
190 }
191 
192 
193 void
194 LanguageTag::set_region (RegionSubtag region)
195 {
196  _region = region;
197 }
198 
199 
200 void
201 LanguageTag::add_variant (VariantSubtag variant)
202 {
203  if (find(_variants.begin(), _variants.end(), variant) != _variants.end()) {
204  throw LanguageTagError (String::compose("Duplicate Variant subtag %1", variant.subtag()));
205  }
206 
207  _variants.push_back (variant);
208 }
209 
210 
211 template <class T>
212 void
213 check_for_duplicates (vector<T> const& subtags, dcp::LanguageTag::SubtagType type)
214 {
215  vector<T> sorted = subtags;
216  sort (sorted.begin(), sorted.end());
217  optional<T> last;
218  for (auto const& i: sorted) {
219  if (last && i == *last) {
220  throw LanguageTagError (String::compose("Duplicate %1 subtag %2", dcp::LanguageTag::subtag_type_name(type), i.subtag()));
221  }
222  last = i;
223  }
224 }
225 
226 
227 void
228 LanguageTag::set_variants (vector<VariantSubtag> variants)
229 {
230  check_for_duplicates (variants, SubtagType::VARIANT);
231  _variants = variants;
232 }
233 
234 
235 void
236 LanguageTag::add_extlang (ExtlangSubtag extlang)
237 {
238  if (find(_extlangs.begin(), _extlangs.end(), extlang) != _extlangs.end()) {
239  throw LanguageTagError (String::compose("Duplicate Extlang subtag %1", extlang.subtag()));
240  }
241 
242  _extlangs.push_back (extlang);
243 }
244 
245 
246 void
247 LanguageTag::set_extlangs (vector<ExtlangSubtag> extlangs)
248 {
249  check_for_duplicates (extlangs, SubtagType::EXTLANG);
250  _extlangs = extlangs;
251 }
252 
253 
254 string
255 LanguageTag::description () const
256 {
257  if (!_language) {
258  throw LanguageTagError("No language set up");
259  }
260 
261  string d;
262 
263  for (auto const& i: _variants) {
264  optional<SubtagData> variant = get_subtag_data (SubtagType::VARIANT, i.subtag());
265  DCP_ASSERT (variant);
266  d += variant->description + " dialect of ";
267  }
268 
269  auto language = get_subtag_data (SubtagType::LANGUAGE, _language->subtag());
270  DCP_ASSERT (language);
271  d += language->description;
272 
273  if (_script) {
274  auto script = get_subtag_data (SubtagType::SCRIPT, _script->subtag());
275  DCP_ASSERT (script);
276  d += " written using the " + script->description + " script";
277  }
278 
279  if (_region) {
280  auto region = get_subtag_data (SubtagType::REGION, _region->subtag());
281  DCP_ASSERT (region);
282  d += " for " + region->description;
283  }
284 
285  for (auto const& i: _extlangs) {
286  auto extlang = get_subtag_data (SubtagType::EXTLANG, i.subtag());
287  DCP_ASSERT (extlang);
288  d += ", " + extlang->description;
289  }
290 
291  return d;
292 }
293 
294 
295 vector<LanguageTag::SubtagData> const &
296 LanguageTag::get_all (SubtagType type)
297 {
298  switch (type) {
299  case SubtagType::LANGUAGE:
300  return language_list;
301  case SubtagType::SCRIPT:
302  return script_list;
303  case SubtagType::REGION:
304  return region_list;
305  case SubtagType::VARIANT:
306  return variant_list;
307  case SubtagType::EXTLANG:
308  return extlang_list;
309  }
310 
311  return language_list;
312 }
313 
314 
315 string
316 LanguageTag::subtag_type_name (SubtagType type)
317 {
318  switch (type) {
319  case SubtagType::LANGUAGE:
320  return "Language";
321  case SubtagType::SCRIPT:
322  return "Script";
323  case SubtagType::REGION:
324  return "Region";
325  case SubtagType::VARIANT:
326  return "Variant";
327  case SubtagType::EXTLANG:
328  return "Extended";
329  }
330 
331  return {};
332 }
333 
334 bool
335 dcp::LanguageTag::VariantSubtag::operator== (VariantSubtag const & other) const
336 {
337  return subtag() == other.subtag();
338 }
339 
340 
341 bool
342 dcp::LanguageTag::VariantSubtag::operator< (VariantSubtag const & other) const
343 {
344  return subtag() < other.subtag();
345 }
346 
347 
348 bool
349 dcp::LanguageTag::ExtlangSubtag::operator== (ExtlangSubtag const & other) const
350 {
351  return subtag() == other.subtag();
352 }
353 
354 
355 bool
356 dcp::LanguageTag::ExtlangSubtag::operator< (ExtlangSubtag const & other) const
357 {
358  return subtag() < other.subtag();
359 }
360 
361 
362 bool
363 dcp::operator== (dcp::LanguageTag const& a, dcp::LanguageTag const& b)
364 {
365  return a.to_string() == b.to_string();
366 }
367 
368 
369 bool
370 dcp::operator!= (dcp::LanguageTag const& a, dcp::LanguageTag const& b)
371 {
372  return a.to_string() != b.to_string();
373 }
374 
375 
376 bool
377 dcp::operator< (dcp::LanguageTag const& a, dcp::LanguageTag const& b)
378 {
379  return a.to_string() < b.to_string();
380 }
381 
382 
383 ostream&
384 dcp::operator<< (ostream& os, dcp::LanguageTag const& tag)
385 {
386  os << tag.to_string();
387  return os;
388 }
389 
390 
391 vector<pair<LanguageTag::SubtagType, LanguageTag::SubtagData>>
392 LanguageTag::subtags () const
393 {
394  vector<pair<SubtagType, SubtagData>> s;
395 
396  if (_language) {
397  s.push_back (make_pair(SubtagType::LANGUAGE, *get_subtag_data(SubtagType::LANGUAGE, _language->subtag())));
398  }
399 
400  if (_script) {
401  s.push_back (make_pair(SubtagType::SCRIPT, *get_subtag_data(SubtagType::SCRIPT, _script->subtag())));
402  }
403 
404  if (_region) {
405  s.push_back (make_pair(SubtagType::REGION, *get_subtag_data(SubtagType::REGION, _region->subtag())));
406  }
407 
408  for (auto const& i: _variants) {
409  s.push_back (make_pair(SubtagType::VARIANT, *get_subtag_data(SubtagType::VARIANT, i.subtag())));
410  }
411 
412  for (auto const& i: _extlangs) {
413  s.push_back (make_pair(SubtagType::EXTLANG, *get_subtag_data(SubtagType::EXTLANG, i.subtag())));
414  }
415 
416  return s;
417 }
418 
419 
420 optional<LanguageTag::SubtagData>
421 LanguageTag::get_subtag_data (LanguageTag::SubtagType type, string subtag)
422 {
423  switch (type) {
424  case SubtagType::LANGUAGE:
425  return find_in_list(language_list, subtag);
426  case SubtagType::SCRIPT:
427  return find_in_list(script_list, subtag);
428  case SubtagType::REGION:
429  return find_in_list(region_list, subtag);
430  case SubtagType::VARIANT:
431  return find_in_list(variant_list, subtag);
432  case SubtagType::EXTLANG:
433  return find_in_list(extlang_list, subtag);
434  }
435 
436  return {};
437 }
438 
439 
440 optional<string>
441 LanguageTag::get_subtag_description (LanguageTag::SubtagType type, string subtag)
442 {
443  auto data = get_subtag_data (type, subtag);
444  if (!data) {
445  return {};
446  }
447 
448  return data->description;
449 }
450 
451 
452 void
453 load_language_tag_list (boost::filesystem::path tags_directory, string name, std::function<void (std::string, std::string)> add)
454 {
455  auto f = fopen_boost (tags_directory / name, "r");
456  if (!f) {
457  throw FileError ("Could not open tags file", tags_directory / name, errno);
458  }
459  char buffer[512];
460 
461  int i = 0;
462  while (!feof(f)) {
463  char* r = fgets (buffer, sizeof(buffer), f);
464  if (r == 0) {
465  break;
466  }
467  string a = buffer;
468  trim (a);
469  r = fgets (buffer, sizeof(buffer), f);
470  if (r == 0) {
471  fclose (f);
472  throw FileError ("Bad tags file", tags_directory / name, -1);
473  }
474  string b = buffer;
475  trim (b);
476  add (a, b);
477  ++i;
478  }
479 
480  fclose (f);
481 }
482 
483 
484 void
485 dcp::load_language_tag_lists (boost::filesystem::path tags_directory)
486 {
487  auto add_subtag = [](vector<LanguageTag::SubtagData>& list, string a, string b) {
488  list.push_back (LanguageTag::SubtagData(a, b));
489  };
490 
491  load_language_tag_list (tags_directory, "language", [&add_subtag](string a, string b) { add_subtag(language_list, a, b); });
492  load_language_tag_list (tags_directory, "variant", [&add_subtag](string a, string b) { add_subtag(variant_list, a, b); });
493  load_language_tag_list (tags_directory, "region", [&add_subtag](string a, string b) { add_subtag(region_list, a, b); });
494  load_language_tag_list (tags_directory, "script", [&add_subtag](string a, string b) { add_subtag(script_list, a, b); });
495  load_language_tag_list (tags_directory, "extlang", [&add_subtag](string a, string b) { add_subtag(extlang_list, a, b); });
496 
497  load_language_tag_list (tags_directory, "dcnc", [](string a, string b) { dcnc_list.push_back(make_pair(a, b)); });
498 }
499 
500 
501 vector<pair<string, string>> dcp::dcnc_tags ()
502 {
503  return dcnc_list;
504 }
505 
506 
An exception related to a file.
Definition: exceptions.h:56
DCP_ASSERT macro.
Exceptions thrown by libdcp.
Namespace for everything in libdcp.
Definition: array_data.h:50
FILE * fopen_boost(boost::filesystem::path, std::string)
Definition: util.cc:232