Project

General

Profile

Feature #4509 » 0001-eit-Scrape-genre-from-text-in-OTA-EIT.-4509.patch

Em Smith, 2017-10-19 19:28

View differences:

src/epggrab.h
273 273
  char                   *scrape_config;  ///< Config to use or blank/NULL for default.
274 274
  int                     scrape_episode; ///< Scrape season/episode from EIT summary
275 275
  int                     scrape_subtitle;///< Scrape subtitle from EIT summary
276
  int                     scrape_genre; ///< Scrape genre from EIT text fields
276 277
};
277 278

278 279
/*
src/epggrab/module.c
312 312
      .off    = offsetof(epggrab_module_ota_scraper_t, scrape_subtitle),
313 313
      .group  = 2,
314 314
    },
315
    {
316
      .type   = PT_BOOL,
317
      .id     = "scrape_genre",
318
      .name   = N_("Scrape Genre"),
319
      .desc   = N_("Enable/disable scraping genre from the programme text fields. "
320
                   "Some broadcasters do not send genre information. "
321
                   "This allows scraping of genre "
322
                   "from within the broadcast text fields if supported by the "
323
                   "configuration file. "
324
                   "This is less accurate than information a broadcaster could provide "
325
                   "but is useful when the information is not provided. "
326
                   "Broadcasters that provide DVB genre information do not require "
327
                   "this option to be enabled."
328
                   ),
329
      .off    = offsetof(epggrab_module_ota_scraper_t, scrape_genre),
330
      .group  = 2,
331
    },
315 332
    {}
316 333
  }
317 334
};
src/epggrab/module/eit.c
46 46
#define EIT_SPEC_NZ_FREEVIEW        2
47 47
#define EIT_SPEC_UK_CABLE_VIRGIN    3
48 48

49
typedef struct eit_genre_regex
50
{
51
  uint8_t genre;              ///< Genre code from epg.c
52
  eit_pattern_list_t p_genre; ///< Regex across fields to match this genre.
53
} eit_genre_regex_t;
49 54

50 55
/* Provider configuration */
51 56
typedef struct eit_module_t
......
55 60
  eit_pattern_list_t p_enum;
56 61
  eit_pattern_list_t p_airdate;        ///< Original air date parser
57 62
  eit_pattern_list_t p_scrape_subtitle;///< Scrape subtitle from summary data
63
  int num_eit_genre_regex;
64
  eit_genre_regex_t *eit_genres;
58 65
} eit_module_t;
59 66

60 67
/* ************************************************************************
......
483 490
  return changed;
484 491
}
485 492

493
/* Genre is handle differently to others in that we build
494
 * up lists of genres in the event and then afterwards if the
495
 * list exists we then see if the entire list has changed.
496
 */
497
static void
498
_eit_scrape_genre(const char *str,
499
                  eit_module_t *eit_mod,
500
                  eit_event_t *ev)
501
{
502
  if (!str || !*str) return;
503
  if (!eit_mod->num_eit_genre_regex) return;
504

505
  char buffer[2048];
506
  int i = 0;
507
  for (; i < eit_mod->num_eit_genre_regex; ++i) {
508
    eit_genre_regex_t *egr = &eit_mod->eit_genres[i];
509
    if (eit_pattern_apply_list(buffer, sizeof(buffer), str, &egr->p_genre)) {
510
      if (!ev->genre) ev->genre = calloc(1, sizeof(epg_genre_list_t));
511
      epg_genre_list_add_by_eit(ev->genre, egr->genre);
512
    }
513
  }
514
}
515

486 516

487 517
/* ************************************************************************
488 518
 * EIT Event
......
678 708
                                     eit_mod, &en, &first_aired);
679 709
  }
680 710

711
  if (eit_mod->scrape_genre) {
712
    /* Genre scraping builds up a list in ev.genre so has no
713
     * "scraped" value here to check.
714
     */
715
    if (ev.title)
716
      _eit_scrape_genre(lang_str_get(ev.title, ev.default_charset),
717
                        eit_mod, &ev);
718
    if (ev.desc)
719
      _eit_scrape_genre(lang_str_get(ev.desc, ev.default_charset),
720
                        eit_mod, &ev);
721

722
    if (ev.summary)
723
      _eit_scrape_genre(lang_str_get(ev.summary, ev.default_charset),
724
                        eit_mod, &ev);
725
  }
726

681 727
  /* Update Episode */
682 728
  if (ee) {
683 729
    *save |= epg_broadcast_set_episode(ebc, ee, &changes2);
......
1062 1108
  eit_pattern_free_list(&mod->p_enum);
1063 1109
  eit_pattern_free_list(&mod->p_airdate);
1064 1110
  eit_pattern_free_list(&mod->p_scrape_subtitle);
1111
  mod->num_eit_genre_regex = 0;
1112
  free(mod->eit_genres);
1113
}
1114

1115
static void _eit_scrape_load_genre_regex(htsmsg_t *m, eit_module_t *mod)
1116
{
1117
  htsmsg_field_t *f;
1118
  HTSMSG_FOREACH(f, m) {
1119
    htsmsg_t *value;
1120
    if (!strncmp(f->hmf_name, "genre_", 6) && (value = htsmsg_get_list_by_field(f))) {
1121
      int genre_int;
1122
      if (sscanf(f->hmf_name, "genre_%x", &genre_int) == 1) {
1123
        /* One item parsed, this is the hex category code */
1124
        if (genre_int < 0 || genre_int > 255)
1125
          continue;
1126
        ++mod->num_eit_genre_regex;
1127
        mod->eit_genres = realloc(mod->eit_genres,
1128
                                  mod->num_eit_genre_regex * sizeof(eit_genre_regex_t));
1129
        eit_genre_regex_t *egr = &mod->eit_genres[mod->num_eit_genre_regex - 1];
1130
        egr->genre = genre_int;
1131
        eit_pattern_compile_list(&egr->p_genre, value);
1132
      }
1133
    }
1134
  }
1065 1135
}
1066 1136

1067 1137
static int _eit_scrape_load_one ( htsmsg_t *m, eit_module_t* mod )
......
1076 1146
    eit_pattern_compile_list(&mod->p_scrape_subtitle, htsmsg_get_list(m, "scrape_subtitle"));
1077 1147
  }
1078 1148

1149
  if (mod->scrape_genre) {
1150
    _eit_scrape_load_genre_regex(m, mod);
1151
  }
1079 1152
  return 1;
1080 1153
}
1081 1154

(23-23/25)