Commercial detection with silences

From MythTV Official Wiki
Revision as of 16:43, 5 July 2012 by Wagnerrp (talk | contribs) (Add error reporting when run as job. Use bindings to handle `recordedmarkup` modification. Have logger apply additional arguments to OptionParser to allow MythTV's verbosity arguments to be used.)

Jump to: navigation, search

Author Hippo
Description A python program based on Mythcommflag-wrapper (thank you Cowbut) that can be used on UK FreeviewHD channels and probably others.
Supports Version25.png  

I tried out the scripts in Mythcommflag-wrapper and they worked well on the Freeview channels I receive but not on the FreeviewHD channels. The reason is that the audio on FreevieHD is an AAC stream and not an MP3 stream. Fixing that would require decoding from AAC and encoding back to MP3 before letting the script analyse the MP3 stream. So I wrote a little C program to analyze an uncompressed audio stream and a Python program to wrap it up and turn the output into a commercial skip list.

To use this

  • Compile the C program and put it somewhere the Python program can find it.
  • Copy the Python program to somehwere the backend can find it.
  • Follow the instructions on Mythcommflag-wrapper.

There is some SQL in here that should be done with the MythTV Python bindings but I couldn't figure out how to do it. I didn't really want to call up mythutil just to edit the skiplist.

The wrapper script uses avconv to decode the program file to an AU stream. If you don't have avconv replace it with ffmpeg (avconv is the new name for ffmpeg).

This does not do realtime commflagging but it might be able to by making the C program pace the input stream (it knows the data rate) The Python program would have to be changed to not buffer the output of the C program as well.

Script.png mausc.c

/* Copyright 2012 Tinsel Phipps. */
/* Public domain. Links with libsndfile which is GPL. */
/* Compile with
   gcc -std=c99 -Wall -Wextra -Werror -O mausc.c -o mausc -lsndfile -lm
#include <stdlib.h>
#include <math.h>
#include <sndfile.h>
#include <errno.h>
#include <unistd.h>
#include <limits.h>

static void usage(const char *name) {
  fprintf(stderr, "Usage: %s <threshold> <min> <rate>\n", name);
  fprintf(stderr, "<threshold>: silence threshold in dB.\n");
  fprintf(stderr, "<min>: minimum time for silence detection in seconds.\n");
  fprintf(stderr, "<rate>: frame rate of video.\n");
  fprintf(stderr, "An AU format file should be fed into this program.\n");
  fprintf(stderr, "Example: %s -70, 0.15, 25 <\n", name);

int main(int argc, char **argv) {

  /* Check usage. */
  if (4 != argc) {

  /* Load options. */
  float threshold, min, rate;
  if (1 != sscanf(argv[1], "%f", &threshold)) {
    fprintf(stderr, "Could not parse threshold option into a number.\n");
  if (1 != sscanf(argv[2], "%f", &min)) {
    fprintf(stderr, "Could not parse min option into a number.\n");
  if (1 != sscanf(argv[3], "%f", &rate)) {
    fprintf(stderr, "Could not parse rate option into a number.\n");

  /* Scale threshold to integer range that libsndfile will use. */
  threshold = INT_MAX * pow(10, threshold / 20);
  /* Scale min time to frames. */
  min = min * rate;

  /* Check the input is an audiofile. */
  SNDFILE *input;
  SF_INFO metadata;
  input = sf_open_fd(STDIN_FILENO, SFM_READ, &metadata, SF_FALSE);
  if (NULL == input) {
    return sf_error(NULL);

  /* Allocate data buffer to contain audio data from one video frame. */
  size_t frameSamples = metadata.channels * metadata.samplerate / rate;

  int *samples;
  samples = malloc(frameSamples * sizeof(int));
  if (NULL == samples) {
    return errno;

  /* Indices of frame numbers of quiet periods. */
  int start = 0;
  int end = 0;
  /* Process the file one frame at a time and print out cuts along the way. */
  int frames = 0;
  while (frameSamples == sf_read_int(input, samples, frameSamples)) {
    int maxabs = 0;
    for (unsigned i = 0; i < frameSamples; i++) {
      samples[i] = abs(samples[i]);
      maxabs = (maxabs > samples[i]) ? maxabs : samples[i];
    if (maxabs < threshold) {
      end = frames;
    } else {
      if (end - start > min) {
        printf("%d %d\n", start, end);
      start = frames;
  return sf_close(input);


#!/usr/bin/env python
# Build a skiplist from silence in the audio track.
# Based on
from MythTV import MythDB, Job, Recorded, findfile, MythLog
from os import path
from subprocess import Popen, PIPE
from optparse import OptionParser

def runjob(jobid=None, chanid=None, starttime=None):
    # Tunable settings (would like to retrieve per channel from the database)
    threshold = -70 # Silence threshold in dB.
    minsilence = 0.15 # Minimum time for silence detection in seconds.
    maxbreak = 400 # Maximum length of adverts breaks.
    framerate = 25 # Frame rate of video. (should be automatic)

    db = MythDB()
    if jobid:
        job = Job(jobid, db=db)
        chanid = job.chanid
        starttime = job.starttime

        rec = Recorded((chanid, starttime), db=db)
        if jobid:
                        'comment':'ERROR: Could not find recording.'})
            print 'Could not find recording.'

    # Get program handle in standard format.
    starttime = rec.starttime
    chanid = rec.chanid

    sg = findfile(rec.basename, rec.storagegroup, db=db)
    if sg is None:
        if jobid:
                        'comment':'ERROR: Local access to recording not found.'})
            print 'Local access to recording not found.'

    infile = path.join(sg.dirname, rec.basename)

    # Purge any existing skip list.
    rec.commflagged = 0

    # Extract uncompressed audio stream from recording.
    p1 = Popen(["avconv", "-v", "fatal", "-i", infile, "-f", "au", "-"],
               stdout = PIPE)
    # Pipe to mausc which will spit out a list of silent intervals in frames.
    p2 = Popen(["mausc", str(threshold), str(minsilence), str(framerate)],
               stdin = p1.stdout, stdout = PIPE)
    output, error = p2.communicate()

    # Convert maxbreak from seconds to frames.
    maxbreak = maxbreak * framerate

    # Coalesce short silences into larger breaks to skip.
    breaks = 0
    breakstart = 1
    breakend = 1
    for line in output.splitlines():
        start, end = line.split()
        # Uncomment the next two lines to not cut before the first break.
        #if 1 == breakstart:
        #    breakstart = int(start)
        if int(end) - breakstart > maxbreak:
            if 1 != breakend:
                rec.markup.append(breakstart, rec.markup.MARK_COMM_START, None)
                rec.markup.append(breakend, rec.markup.MARK_COMM_END, None)
                breaks = breaks + 1
            breakstart = int(start)
        breakend = end

    if 1 != breakstart: # Add the last break if not flushed.
        rec.markup.append(breakstart, rec.markup.MARK_COMM_START, None)
        rec.markup.append(breakend, rec.markup.MARK_COMM_END, None)
        breaks = breaks + 1

    # Commit to database.

    if jobid:
                    'comment':'Audio commflag detected %s breaks.' % breaks

def main():
    parser = OptionParser(usage="usage: %prog [options] [jobid]")

    parser.add_option('--chanid', action='store', type='int',
                      dest='chanid', help='Use chanid for manual operation')
    parser.add_option('--starttime', action='store', type='string',
                      dest='stime', help='Use starttime for manual operation')
    opts, args = parser.parse_args()

    if len(args) == 1:
    elif opts.chanid and opts.stime:
        runjob(chanid=opts.chanid, starttime=opts.stime)
        print 'Script must be provided either jobid, or chanid and starttime.'

if __name__ == '__main__':