Commercial detection with silences

From MythTV Official Wiki
Revision as of 11:09, 5 July 2012 by Hippo (talk | contribs) (Created page with "{{Script info |author=Hippo |short=A replacement for mythcommflag that works by detecting short silent periods around commercials. |long=A python program based on [[Mythcommfl...")

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

Author Hippo
Description A python program based on Mythcommflag-wrapper (thank you Cowbut) that can be used on UK FreeviewHD channels and probably others.
Supports Version25.png  

I tried out the scripts in Mythcommflag-wrapper and they worked well on the Freeview channels I receive but not on the FreeviewHD channels. The reason is that the audio on FreevieHD is an AAC stream and not an MP3 stream. Fixing that would require decoding from AAC and encoding back to MP3 before letting the script analyse the MP3 stream. So I wrote a little C program to analyze an uncompressed audio stream and a Python program to wrap it up and turn the output into a commercial skip list.

To use this

  • Compile the C program and put it somewhere the Python program can find it.
  • Copy the Python program to somehwer the backend can find it.
  • Follow the instructions on Mythcommflag-wrapper.

Script.png mausc.c

/* Copyright 2012 Tinsel Phipps. */
/* Public domain. Links with libsndfile which is GPL. */
/* Compile with
   gcc -std=c99 -Wall -Wextra -Werror -O mausc.c -o mausc -lsndfile -lm
#include <stdlib.h>
#include <math.h>
#include <sndfile.h>
#include <errno.h>
#include <unistd.h>
#include <limits.h>

static void usage(const char *name) {
  fprintf(stderr, "Usage: %s <threshold> <min> <rate>\n", name);
  fprintf(stderr, "<threshold>: silence threshold in dB.\n");
  fprintf(stderr, "<min>: minimum time for silence detection in seconds.\n");
  fprintf(stderr, "<rate>: frame rate of video.\n");
  fprintf(stderr, "An AU format file should be fed into this program.\n");
  fprintf(stderr, "Example: %s -70, 0.15, 25 <\n", name);

int main(int argc, char **argv) {

  /* Check usage. */
  if (4 != argc) {

  /* Load options. */
  float threshold, min, rate;
  if (1 != sscanf(argv[1], "%f", &threshold)) {
    fprintf(stderr, "Could not parse threshold option into a number.\n");
  if (1 != sscanf(argv[2], "%f", &min)) {
    fprintf(stderr, "Could not parse min option into a number.\n");
  if (1 != sscanf(argv[3], "%f", &rate)) {
    fprintf(stderr, "Could not parse rate option into a number.\n");

  /* Scale threshold to integer range that libsndfile will use. */
  threshold = INT_MAX * pow(10, threshold / 20);
  /* Scale min time to frames. */
  min = min * rate;

  /* Check the input is an audiofile. */
  SNDFILE *input;
  SF_INFO metadata;
  input = sf_open_fd(STDIN_FILENO, SFM_READ, &metadata, SF_FALSE);
  if (NULL == input) {
    return sf_error(NULL);

  /* Allocate data buffer to contain audio data from one video frame. */
  size_t frameSamples = metadata.channels * metadata.samplerate / rate;

  int *samples;
  samples = malloc(frameSamples * sizeof(int));
  if (NULL == samples) {
    return errno;

  /* Indices of frame numbers of quiet periods. */
  int start = 0;
  int end = 0;
  /* Process the file one frame at a time and print out cuts along the way. */
  int frames = 0;
  while (frameSamples == sf_read_int(input, samples, frameSamples)) {
    int maxabs = 0;
    for (unsigned i = 0; i < frameSamples; i++) {
      samples[i] = abs(samples[i]);
      maxabs = (maxabs > samples[i]) ? maxabs : samples[i];
    if (maxabs < threshold) {
      end = frames;
    } else {
      if (end - start > min) {
        printf("%d %d\n", start, end);
      start = frames;
  return sf_close(input);


#!/usr/bin/env python
# Build a skiplist from silence in the audio track.
# Based on
from MythTV import MythDB, Job, Recorded, findfile
from os import path
from subprocess import Popen, PIPE
from optparse import OptionParser

def addskip(cr, rec, chanid, starttime, startframe, endframe):
    cr.execute("""INSERT INTO recordedmarkup (chanid, starttime, type, mark)
                  VALUES (%s, '%s', %s, %s);"""
               % (chanid, starttime, rec.markup.MARK_COMM_START, startframe))
    cr.execute("""INSERT INTO recordedmarkup (chanid, starttime, type, mark)
                  VALUES (%s, '%s', %s, %s);"""
               % (chanid, starttime, rec.markup.MARK_COMM_END, endframe))
    rec.commflagged = 1

def runjob(jobid=None, chanid=None, starttime=None):
    # Tunable settings (would like to retrieve per channel from the database)
    threshold = -70 # Silence threshold in dB.
    minsilence = 0.15 # Minimum time for silence detection in seconds.
    maxbreak = 400 # Maximum length of adverts breaks.
    framerate = 25 # Frame rate of video. (should be automatic)

    db = MythDB()
    cursor = db.cursor()
    if jobid:
        job = Job(jobid, db=db)
        chanid = job.chanid
        starttime = job.starttime

        rec = Recorded((chanid, starttime), db=db)
        print 'Could not find recording.'

    # Get program handle in standard format.
    starttime = rec.starttime
    chanid = rec.chanid

    sg = findfile(rec.basename, rec.storagegroup, db=db)
    if sg is None:
        print 'Local access to recording not found.'

    infile = path.join(sg.dirname, rec.basename)

    # Purge any existing skip list.
    cursor.execute("""DELETE FROM recordedmarkup WHERE
                      chanid = %s AND starttime = '%s' AND type = %s;"""
                   % (chanid, starttime, rec.markup.MARK_COMM_START))
    cursor.execute("""DELETE FROM recordedmarkup WHERE
                      chanid = %s AND starttime = '%s' AND type = %s;"""
                   % (chanid, starttime, rec.markup.MARK_COMM_END))
    rec.commflagged = 0

    # Extract uncompressed audio stream from recording.
    p1 = Popen(["avconv", "-v", "fatal", "-i", infile, "-f", "au", "-"],
               stdout = PIPE)
    # Pipe to mausc which will spit out a list of silent intervals in frames.
    p2 = Popen(["mausc", str(threshold), str(minsilence), str(framerate)],
               stdin = p1.stdout, stdout = PIPE)
    output, error = p2.communicate()

    # Convert maxbreak from seconds to frames.
    maxbreak = maxbreak * framerate

    # Coalesce short silences into larger breaks to skip.
    breaks = 0
    breakstart = 1
    breakend = 1
    for line in output.splitlines():
        start, end = line.split()
        # Uncomment the next two lines to not cut before the first break.
        #if 1 == breakstart:
        #    breakstart = int(start)
        if int(end) - breakstart > maxbreak:
            if 1 != breakend:
                addskip(cursor, rec, chanid, starttime, breakstart, breakend)
                breaks = breaks + 1
            breakstart = int(start)
        breakend = end

    if 1 != breakstart: # Add the last break if not flushed.
        addskip(cursor, rec, chanid, starttime, breakstart, breakend)
        breaks = breaks + 1

    # Commit to database.

    if jobid:
                    'comment':'Audio commflag detected %s breaks.' % breaks

def main():
    parser = OptionParser(usage="usage: %prog [options] [jobid]")

    parser.add_option('--chanid', action='store', type='int',
                      dest='chanid', help='Use chanid for manual operation')
    parser.add_option('--starttime', action='store', type='string',
                      dest='stime', help='Use starttime for manual operation')
    opts, args = parser.parse_args()

    if len(args) == 1:
    elif opts.chanid and opts.stime:
        runjob(chanid=opts.chanid, starttime=opts.stime)
        print 'Script must be provided either jobid, or chanid and starttime.'

if __name__ == '__main__':