Commercial detection with silences

From MythTV Official Wiki
Revision as of 13:11, 22 February 2013 by Hippo (talk | contribs) (Only set detected marks.)

Jump to: navigation, search


Author Hippo
Description A python program based on Mythcommflag-wrapper (thank you Cowbut) that can be used on UK FreeviewHD channels and probably others.
Supports Version25.png  


I tried out the scripts in Mythcommflag-wrapper and they worked well on the Freeview channels I receive but not on the FreeviewHD channels. The reason is that the audio on FreevieHD is an AAC stream and not an MP3 stream. Fixing that would require decoding from AAC and encoding back to MP3 before letting the script analyse the MP3 stream. So I wrote a little C program to analyze an uncompressed audio stream and a Python program to wrap it up and turn the output into a commercial skip list.

To use this

  • Compile the two C programs and put them somewhere the Python program can find it. (e.g. /usr/local/bin)
  • Copy the Python program to somehwere the backend can find it.
  • Follow the instructions on Mythcommflag-wrapper except the job setting should be 'mausc-wrapper.py %JOBID%'

The python program uses avconv to decode the program file to an AU stream. If you don't have avconv replace it with ffmpeg or mythffmpeg (avconv is the new name for ffmpeg). It upconverts the audio to 6 channels so that it works even when the audio switches around. If you know you only ever get stereo you can replace the 6 with 2 to save a bit of CPU power. It might have to go up in future. Up-converting is better because it's low power and always works whereas down-converting may fail depending on your version of avconv/ffmpeg.

This can do near-realtime commflagging by enabling the backend setting to start commflagging when the recording starts. (mythtv-setup/General/Page9-JobQueueGlobal). The programs mark entries in the cutlist <max-break-setting> after the start of a break is detected so this will be after the commercial break has ended. If you are displaying the programme and get too close to the end you will be in the commercials before they are flagged. C'est la vie.

It's low CPU in that it only decodes the audio stream and since it follows the end of the recording it shouldn't thrash the memory or disk. avconv takes about 2% to decode ITV1-HD on a 1.6GHz Atom Asus motherboard. catagrower takes about 1% and could be a lot better if made less portable.

Script.png catagrower.c

/* Copyright 2012 Crackers Phipps. */
/* Public domain. */
/* Compile with
   gcc -std=c99 -O catagrower.c -o catagrower
*/
/* This program will stop when the file has not grown for this many seconds. */
#define TIMEOUT 60

/* MythTV files are often large. */
#define _FILE_OFFSET_BITS 64

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>

static void usage(const char *name) {
  fprintf(stderr, "Usage: %s <file>\n", name);
  fprintf(stderr, "<file>: file to be monitored.\n");
  fprintf(stderr, "The contents of the file will be copied to stdout.\n");
  fprintf(stderr, "Copying will stop when the file has stopped growing.\n");
}

int main(int argc, char **argv) {

  /* Check usage. */
  if (2 != argc) {
    usage(argv[0]);
    exit(1);
  }

  /* Load options. */
  int fd;
  if (-1 == (fd = open(argv[1], O_RDONLY))) {
    fprintf(stderr, "Could not open %s for reading.\n", argv[1]);
    usage(argv[0]);
    exit(2);
  }

#define BUFFSIZE 4096
  int timer = TIMEOUT;
  char buffer[BUFFSIZE];
  int bytes;
  while (timer > 0) {
    while (0 != (bytes = read(fd, buffer, BUFFSIZE))) {
      write (STDOUT_FILENO, buffer, bytes);
      timer = TIMEOUT;
    }
    sleep(1);
    timer--;
  }
  return 0;
}


Script.png mausc.c

/* Copyright 2013 Tinsel Phipps. */
/* Public domain. Links with libsndfile which is GPL. */
/* Compile with
   gcc -std=c99 -O mausc.c -o mausc -lsndfile -lm
   You may need the libsndfile-dev package installed.
*/
#include <stdlib.h>
#include <math.h>
#include <sndfile.h>
#include <errno.h>
#include <unistd.h>
#include <limits.h>

static void usage(const char *name) {
  fprintf(stderr, "Usage: %s <threshold> <min> <max> <rate>\n", name);
  fprintf(stderr, "<threshold>: silence threshold in dB.\n");
  fprintf(stderr, "<min>: minimum time for silence detection in seconds.\n");
  fprintf(stderr, "<max>: maximum length of breaks in seconds.\n");
  fprintf(stderr, "<rate>: frame rate of video.\n");
  fprintf(stderr, "An AU format file should be fed into this program.\n");
  fprintf(stderr, "Example: %s -70 0.15 400 25 < audio.au\n", name);
}

int main(int argc, char **argv) {

  /* Check usage. */
  if (5 != argc) {
    usage(argv[0]);
    exit(1);
  }

  /* Load options. */
  float threshold, min, max, rate;
  if (1 != sscanf(argv[1], "%f", &threshold)) {
    fprintf(stderr, "Could not parse threshold option into a number.\n");
    usage(argv[0]);
    exit(2);
  }
  if (1 != sscanf(argv[2], "%f", &min)) {
    fprintf(stderr, "Could not parse min option into a number.\n");
    usage(argv[0]);
    exit(2);
  }
  if (1 != sscanf(argv[3], "%f", &max)) {
    fprintf(stderr, "Could not parse max option into a number.\n");
    usage(argv[0]);
    exit(2);
  }
  if (1 != sscanf(argv[4], "%f", &rate)) {
    fprintf(stderr, "Could not parse rate option into a number.\n");
    usage(argv[0]);
    exit(2);
  }

  /* Scale threshold to integer range that libsndfile will use. */
  threshold = INT_MAX * pow(10, threshold / 20);
  /* Scale times to frames. */
  min = min * rate;
  max = max * rate;

  /* Check the input is an audiofile. */
  SNDFILE *input;
  SF_INFO metadata;
  input = sf_open_fd(STDIN_FILENO, SFM_READ, &metadata, SF_FALSE);
  if (NULL == input) {
    sf_perror(NULL);
    return sf_error(NULL);
  }

  /* Allocate data buffer to contain audio data from one video frame. */
  size_t frameSamples = metadata.channels * metadata.samplerate / rate;

  int *samples;
  samples = malloc(frameSamples * sizeof(int));
  if (NULL == samples) {
    perror(NULL);
    return errno;
  }

  /* Process the file one frame at a time and process cuts along the way. */
  int frames = 0;
  int silent = 0;
  int last_silent = 0;
  int gapend = 0;
  int gapstart = 0;
  int first_gapstart = 0;
  while (frameSamples == sf_read_int(input, samples, frameSamples)) {
    frames++;
    int maxabs = 0;
    for (unsigned i = 0; i < frameSamples; i++) {
      samples[i] = abs(samples[i]);
      maxabs = (maxabs > samples[i]) ? maxabs : samples[i];
    }
    last_silent = silent;
    silent = (maxabs < threshold);
    /* Remember first transition to silence. */
    if (silent && !gapstart) {
      gapstart = frames;
    }
    /* Store last transition out of silence. */
    if (!silent && last_silent) {
      /* Make sure it is long enough. */
      if (frames > gapstart + min) {
        gapend = frames;
        if (!first_gapstart) {
          first_gapstart = gapstart;
        }
      }
      gapstart = 0;
    }
    /* Create a skip when max frames have passed. */
    if (first_gapstart && gapend && frames > first_gapstart + max) {
      printf("%d %d\n", first_gapstart, gapend);
      fflush(stdout);
      gapstart = 0;
      gapend = 0;
      first_gapstart = 0;
    }
  }
  /* At end of file can have an unprocessed gap. */
  if (first_gapstart) {
    if (first_gapstart == gapstart) {
      gapend = frames;
    }
    printf("%d %d\n", first_gapstart, gapend);
  }
  return sf_close(input);
}


Script.png mausc-wrapper

#!/usr/bin/env python
# Build a skiplist from silence in the audio track.
# Based on http://www.mythtv.org/wiki/Transcode_wrapper_stub
from MythTV import MythDB, Job, Recorded, findfile, MythLog
from os import path
from subprocess import Popen, PIPE
from optparse import OptionParser

def runjob(jobid=None, chanid=None, starttime=None):
    # Tunable settings (would like to retrieve per channel from the database)
    thresh = -70 # Silence threshold in dB.
    minquiet = 0.15 # Minimum time for silence detection in seconds.
    maxbreak = 400 # Maximum length of adverts breaks.
    rate = 25 # Frame rate of video. (should be automatic)

    db = MythDB()
    if jobid:
        job = Job(jobid, db=db)
        chanid = job.chanid
        starttime = job.starttime

    try:
        rec = Recorded((chanid, starttime), db=db)
    except:
        if jobid:
            job.update({'status':job.ERRORED,
                        'comment':'ERROR: Could not find recording.'})
        else:
            print 'Could not find recording.'
        exit(1)

    # Get program handle in standard format.
    starttime = rec.starttime
    chanid = rec.chanid

    sg = findfile(rec.basename, rec.storagegroup, db=db)
    if sg is None:
        if jobid:
            job.update({'status':job.ERRORED,
                        'comment':'ERROR: Local access to recording not found.'})
        else:
            print 'Local access to recording not found.'
        exit(1)

    infile = path.join(sg.dirname, rec.basename)

    # Purge any existing skip list.
    rec.markup.clean()
    rec.commflagged = 0
    rec.update()

    # Write out the file contents and keep going till recording is finished.
    p1 = Popen(["catagrower", infile],
               stdout = PIPE)
    # Pipe through avconv to extract uncompressed audio stream.
    p2 = Popen(["avconv", "-v", "8", "-i", "pipe:0", "-f", "au", "-ac", "6", "-"],
               stdin = p1.stdout, stdout = PIPE)
    # Pipe to mausc which will spit out a list of breaks.
    p3 = Popen(["mausc", str(thresh), str(minquiet), str(maxbreak), str(rate)],
               stdin = p2.stdout, stdout = PIPE)

    # Store breaks in the database.
    breaks = 0
    while 1:
        line = p3.stdout.readline()
        if not line:
            break
        start, end = line.split()
        rec.markup.append(start, rec.markup.MARK_COMM_START, None)
        rec.markup.append(end, rec.markup.MARK_COMM_END, None)
        rec.commflagged = 1
        rec.update()
        breaks = breaks + 1
        if jobid is None:
            print 'Got a break at frame %s' % start

    if jobid:
        job.update({'status':272,
                    'comment':'Audio commflag detected %s breaks.' % breaks
                    })
    else:
        print 'Audio commflag detected %s breaks.' % breaks

def main():
    parser = OptionParser(usage="usage: %prog [options] [jobid]")

    parser.add_option('--chanid', action='store', type='int',
                      dest='chanid', help='Use chanid for manual operation')
    parser.add_option('--starttime', action='store', type='string',
                      dest='stime', help='Use starttime for manual operation')
    MythLog.loadOptParse(parser)
    opts, args = parser.parse_args()

    if len(args) == 1:
        runjob(jobid=args[0])
    elif opts.chanid and opts.stime:
        runjob(chanid=opts.chanid, starttime=opts.stime)
    else:
        print 'Script must be provided either jobid, or chanid and starttime.'
        parser.print_help()
        exit(1)

if __name__ == '__main__':
    main()