Remove duplicate videos.py

From MythTV Official Wiki
Revision as of 05:03, 29 April 2011 by Wagnerrp (talk | contribs) (maintenance script to find and delete duplicate entries in mythvideo)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search


Author Raymond Wagner
Description This script is intended to search through the MythVideo database tables to find and delete any duplicate videos as identified by the hash value. This is a dumb delete, and simply removes anything but the first match. It will not necessarily keep the one with the most complete metadata.
Supports Version24.png  Version25.png  



PythonIcon.png remove_duplicate_videos.py

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#---------------------------
# Name: remove_duplicate_videos.py
# Python Script
# Author: Raymond Wagner
# Purpose
#   For reasons unknown, some people continue to get duplicate file
#   entries in their MythVideo database. This script will detail those
#   duplicate files based off hash number, and optionally delete them.
#---------------------------
__title__  = "Remove Duplicate Videos"
__author__ = "Raymond Wagner"
__version__= "v0.5.0"

from optparse import OptionParser
from MythTV import Video

def format_name(vid):
    # returns a string in the format 'TITLE[ - SEASONxEPISODE][ - SUBTITLE]'
    s = vid.title
    if vid.season:
        s += ' - %dx%02d' % (vid.season, vid.episode)
    if vid.subtitle:
        s += ' - '+vid.subtitle
    return s

def FindDuplicates(dodelete):
    dupvids = []
    vids = sorted(Video.getAllEntries(), key=lambda v: v.hash)

    for i in range(len(vids)-1):
        if vids[i].hash == 'NULL':
            continue
        if vids[i].hash == vids[i+1].hash:
            dupvids.append(vids[i+1])

    if dodelete:
        for vid in dupvids:
            vid.delete()

    return dupvids

def main():
    parser = OptionParser(usage="usage: %prog [options] [jobid]")

    parser.add_option("-s", "--dry-run", action="store_true", default=False,
            dest="dryrun", help="Print out duplicates but do not delete.")

    opts, args = parser.parse_args()

    dups = FindDuplicates(not opts.dryrun)

    if len(dups):
        print len(dups), 'Duplicates Found!'
        print '----------------------'
        for vid in dups:
            print '  '+format_name(vid)
    else:
        print 'No Duplicates Found!'

if __name__ == "__main__":
    main()