Project

General

Profile

Feature #4509 » 0001-eit-Scrape-genre-from-text-in-OTA-EIT.-4509.patch

Em Smith, 2017-12-16 16:11

View differences:

data/conf/epggrab/eit/scrape/Bulsatcom_39E
1
{
2
    "season_num": [
3
        "сезон ([0-9]+)",
4
        "[, ] сезон ([0-9]+)",
5
        "сез.? ([0-9]+)",
6
        "[, ] с. ([0-9]+)",
7
        "с. ([0-9]+), еп[.]",
8
        "с. ([0-9]+)",
9
        "еп. [0-9]+,.*, ([0-9]+), ?сез"
10
    ],
11
    "episode_num": [
12
        "([0-9]+) серия",
13
        "еп. ([0-9]+)",
14
        "[, ] ([0-9]+) еп[.]",
15
        "([0-9]+) еп.[,]",
16
        "епизод ([0-9]+)",
17
        "Епизод ([0-9]+)",
18
        "[, ] ([0-9]+) епизод",
19
        "([0-9]+) епизод"
20
    ],
21
    "airdate": [
22
        ", ([0-9][0-9][0-9][0-9])"
23
    ],
24
    "genre" : [ {
25
        "Romance": ["(драма, романтичен)"],
26
        "Documentary": ["(документален)"]
27
    }
28
    ]
29
}
data/conf/epggrab/eit/scrape/uk
24 24
  ],
25 25
  "is_new" : [
26 26
      "^(New: )"
27
  ],
28
  "genre" : [ {
29
      "Movie / Drama": ["(Movie|Film)"],
30
      "Detective / Thriller" : ["(Murder mystery|thriller|sleuth|detective|Miss Marple|Poirot|Agatha Christie|^Columbo)"],
31
      "Adventure / Western / War" : ["(Action adventure|wartime)", "^(Action|Western)"],
32
      "Science fiction / Fantasy / Horror" : ["^(Sci-fi)", "^(Horror)", "(superhero fantasy)"],
33
      "Comedy" : ["(Comedy-drama| comedy|action adventure|^Comedy)"],
34
      "Romance" : ["(Romcom)"],
35
      "Adult movie / Drama" : ["(18[+])", "(Adults only)", "(Very strong language|Extreme violence)"],
36
      "News / Current affairs" : ["(BBC News|ITV News|Sky News)"],
37
      "News / Weather report": ["(Followed by [Ww]eather|weather forecast|Shipping Forecast)"],
38
      "Documentary" : ["(Documentary series)"],
39
      "Talk show" : ["(chats about)"],
40
      "Sports" : ["^(Snooker)"],
41
      "Football / Soccer" : ["^(Football|Match of the)", "(NFL|Premier League)"],
42
      "Team sports (excluding football)" : ["^(Rugby)"],
43
      "Equestrian" : ["Racing.*(Ascot|Cheltenham)"],
44
      "Children's / Youth programs" : ["(Family animation|Children's comedy)"],
45
      "Cartoons / Puppets" : ["(Family animation)"],
46
      "Music / Ballet / Dance" : ["(Dancing)"],
47
      "Nature / Animals / Environment" : ["(Attenborough)"],
48
      "Social / Political issues / Economics" : ["( politics)", "(Mayor's )?Question Time", "House of (Lords|Commons)", "Welsh Assembly|in Parliament" ],
49
      "Advertisement / Shopping" : ["(Auction|Teleshopping)"],
50
      "Cooking" : ["(cooks up|whips up)"]
51
  }
27 52
  ]
28 53
}
src/epg.c
2713 2713
  return (*a == '\0' && *b == '\0'); // end of string(both)
2714 2714
}
2715 2715

2716
static uint8_t _epg_genre_find_by_name ( const char *name, const char *lang )
2716
uint8_t epg_genre_find_by_name ( const char *name, const char *lang )
2717 2717
{
2718 2718
  uint8_t a, b;
2719 2719
  const char *s;
......
2812 2812
int epg_genre_list_add_by_str ( epg_genre_list_t *list, const char *str, const char *lang )
2813 2813
{
2814 2814
  epg_genre_t g;
2815
  g.code = _epg_genre_find_by_name(str, lang);
2815
  g.code = epg_genre_find_by_name(str, lang);
2816 2816
  return epg_genre_list_add(list, &g);
2817 2817
}
2818 2818

src/epg.h
97 97
/* Search */
98 98
int epg_genre_list_contains
99 99
  ( epg_genre_list_t *list, epg_genre_t *genre, int partial );
100

100
uint8_t epg_genre_find_by_name ( const char *name, const char *lang );
101 101
/* List all available genres */
102 102
htsmsg_t *epg_genres_list_all ( int major_only, int major_prefix, const char *lang );
103 103

src/epggrab.h
273 273
  char                   *scrape_config;  ///< Config to use or blank/NULL for default.
274 274
  int                     scrape_episode; ///< Scrape season/episode from EIT summary
275 275
  int                     scrape_subtitle;///< Scrape subtitle from EIT summary
276
  int                     scrape_genre; ///< Scrape genre from EIT text fields
276 277
};
277 278

278 279
/*
src/epggrab/module.c
312 312
      .off    = offsetof(epggrab_module_ota_scraper_t, scrape_subtitle),
313 313
      .group  = 2,
314 314
    },
315
    {
316
      .type   = PT_BOOL,
317
      .id     = "scrape_genre",
318
      .name   = N_("Scrape Genre"),
319
      .desc   = N_("Enable/disable scraping genre from the programme text fields. "
320
                   "Some broadcasters do not send genre information or "
321
                   "send inadequate genre information. "
322
                   "This allows scraping of genre "
323
                   "from within the broadcast text fields if supported by the "
324
                   "configuration file. "
325
                   "This is less accurate than information a broadcaster could provide "
326
                   "but is useful when the information is not provided or is poor. "
327
                   "Broadcasters that provide DVB genre information do not require "
328
                   "this option to be enabled but may gain additional genres by "
329
                   "enabling it. For example, UK users benefit from enabling this."
330
                   ),
331
      .off    = offsetof(epggrab_module_ota_scraper_t, scrape_genre),
332
      .group  = 2,
333
    },
315 334
    {}
316 335
  }
317 336
};
src/epggrab/module/eit.c
46 46
#define EIT_SPEC_NZ_FREEVIEW        2
47 47
#define EIT_SPEC_UK_CABLE_VIRGIN    3
48 48

49
typedef struct eit_genre_regex
50
{
51
  uint8_t genre;              ///< Genre code from epg.c
52
  eit_pattern_list_t p_genre; ///< Regex across fields to match this genre.
53
} eit_genre_regex_t;
49 54

50 55
/* Provider configuration */
51 56
typedef struct eit_module_t
......
56 61
  eit_pattern_list_t p_airdate;        ///< Original air date parser
57 62
  eit_pattern_list_t p_scrape_subtitle;///< Scrape subtitle from summary data
58 63
  eit_pattern_list_t p_is_new;         ///< Is programme new to air
64
  int num_eit_genre_regex;
65
  eit_genre_regex_t *eit_genres;
59 66
} eit_module_t;
60 67

61 68
/* ************************************************************************
......
486 493
  return changed;
487 494
}
488 495

496
/* Genre is handle differently to others in that we build
497
 * up lists of genres in the event and then afterwards if the
498
 * list exists we then see if the entire list has changed.
499
 */
500
static void
501
_eit_scrape_genre(const char *str,
502
                  eit_module_t *eit_mod,
503
                  eit_event_t *ev)
504
{
505
  char buffer[2048];
506
  int i = 0;
507

508
  if (!str || !*str) return;
509
  if (!eit_mod->num_eit_genre_regex) return;
510

511
  for (; i < eit_mod->num_eit_genre_regex; ++i) {
512
    eit_genre_regex_t *egr = &eit_mod->eit_genres[i];
513
    if (eit_pattern_apply_list(buffer, sizeof(buffer), str, &egr->p_genre)) {
514
      /* Free'd by caller */
515
      if (!ev->genre) ev->genre = calloc(1, sizeof(epg_genre_list_t));
516
      epg_genre_list_add_by_eit(ev->genre, egr->genre);
517
    }
518
  }
519
}
520

489 521

490 522
/* ************************************************************************
491 523
 * EIT Event
......
683 715
                                     eit_mod, &en, &copyright_year, &is_new);
684 716
  }
685 717

718
  if (eit_mod->scrape_genre) {
719
    /* Genre scraping builds up a list in ev.genre so has no
720
     * "scraped" value here to check.
721
     */
722
    if (ev.title)
723
      _eit_scrape_genre(lang_str_get(ev.title, ev.default_charset),
724
                        eit_mod, &ev);
725
    if (ev.desc)
726
      _eit_scrape_genre(lang_str_get(ev.desc, ev.default_charset),
727
                        eit_mod, &ev);
728

729
    if (ev.summary)
730
      _eit_scrape_genre(lang_str_get(ev.summary, ev.default_charset),
731
                        eit_mod, &ev);
732
  }
733

686 734
  /* Update Episode */
687 735
  if (ee) {
688 736
    *save |= epg_broadcast_set_episode(ebc, ee, &changes2);
......
1080 1128
  eit_pattern_free_list(&mod->p_airdate);
1081 1129
  eit_pattern_free_list(&mod->p_scrape_subtitle);
1082 1130
  eit_pattern_free_list(&mod->p_is_new);
1131
  mod->num_eit_genre_regex = 0;
1132
  free(mod->eit_genres);
1133
}
1134

1135
/// Convert a message containing an array of genre names to regex matches
1136
/// in to internal format for EPG mapping.
1137
/// For example: [ { "Documentary" : ["(Documentary series)"] }] becomes
1138
/// epg_genre 23 --> regex
1139
static void _eit_scrape_load_one_genre_regex(htsmsg_t *m, eit_module_t *mod)
1140
{
1141
  htsmsg_field_t *f;
1142
  if (!m)
1143
    return;
1144
  HTSMSG_FOREACH(f, m) {
1145
    htsmsg_t *value = htsmsg_get_list_by_field(f);
1146
    if (value && f->hmf_name && *f->hmf_name) {
1147
      const uint8_t genre_int = epg_genre_find_by_name(f->hmf_name, NULL);
1148
      if (genre_int) {
1149
        ++mod->num_eit_genre_regex;
1150
        mod->eit_genres = realloc(mod->eit_genres,
1151
                                  mod->num_eit_genre_regex * sizeof(eit_genre_regex_t));
1152
        eit_genre_regex_t *egr = &mod->eit_genres[mod->num_eit_genre_regex - 1];
1153
        egr->genre = genre_int;
1154
        eit_pattern_compile_list(&egr->p_genre, value);
1155
        tvhinfo(LS_TBL_EIT, "module %s - Scrape \"%s\" to genre 0x%x", mod->id, f->hmf_name, genre_int);
1156
      }
1157
    }
1158
  }
1159
}
1160

1161

1162
static void _eit_scrape_load_genre_regex(htsmsg_t *m, eit_module_t *mod)
1163
{
1164
  htsmsg_field_t *f;
1165
  if (!m)
1166
    return;
1167
  HTSMSG_FOREACH(f, m) {
1168
    htsmsg_t  *value = htsmsg_get_map_by_field(f);
1169
    _eit_scrape_load_one_genre_regex(value, mod);
1170
  }
1083 1171
}
1084 1172

1085 1173
static int _eit_scrape_load_one ( htsmsg_t *m, eit_module_t* mod )
......
1095 1183
    eit_pattern_compile_list(&mod->p_scrape_subtitle, htsmsg_get_list(m, "scrape_subtitle"));
1096 1184
  }
1097 1185

1186
  if (mod->scrape_genre) {
1187
    _eit_scrape_load_genre_regex(htsmsg_get_list(m, "genre"), mod);
1188
  }
1098 1189
  return 1;
1099 1190
}
1100 1191

1101 1192
static void _eit_module_load_config(eit_module_t *mod)
1102 1193
{
1103
  if (!mod->scrape_episode && !mod->scrape_subtitle) {
1194
  if (!mod->scrape_episode && !mod->scrape_subtitle && !mod->scrape_genre) {
1104 1195
    tvhinfo(LS_TBL_EIT, "module %s - scraper disabled by config", mod->id);
1105 1196
    return;
1106 1197
  }
src/webui/static/app/tvheadend.js
166 166
  "15" : "couch_and_lamp",
167 167
  "16" : "red_heart",
168 168
  "18" : "no_one_under_eighteen_symbol",
169
  "21" : "sun_behind_cloud",
169 170
  "24" : "speaking_head_in_silhouette",
170 171
  "33" : "speaking_head_in_silhouette",
171 172
  "43" : "soccer_ball",
172
- 
(24-24/25)