Remove duplicate videos.py

From MythTV Official Wiki
Revision as of 14:20, 1 September 2020 by Stuarta (talk | contribs)

Jump to: navigation, search


Author Raymond Wagner
Description This script is intended to search through the MythVideo database tables to find and delete any duplicate videos as identified by the hash value. This is a dumb delete, and simply removes anything but the first match. It will not necessarily keep the one with the most complete metadata.
Supports Version24.png  Version25.png  



Important.png Note: This script is outdated and will not work out of the box on any distro that has moved to Python 3 as the default


PythonIcon.png remove_duplicate_videos.py

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#---------------------------
# Name: remove_duplicate_videos.py
# Python Script
# Author: Raymond Wagner
# Purpose
#   For reasons unknown, some people continue to get duplicate file
#   entries in their MythVideo database. This script will detail those
#   duplicate files based off hash number, and optionally delete them.
#---------------------------
__title__  = "Remove Duplicate Videos"
__author__ = "Raymond Wagner"
__version__= "v0.5.0"

from optparse import OptionParser
from MythTV import Video

def format_name(vid):
    # returns a string in the format 'TITLE[ - SEASONxEPISODE][ - SUBTITLE]'
    s = vid.title
    if vid.season:
        s += ' - %dx%02d' % (vid.season, vid.episode)
    if vid.subtitle:
        s += ' - '+vid.subtitle
    return s

def FindDuplicates(dodelete):
    dupvids = []
    vids = sorted(Video.getAllEntries(), key=lambda v: v.hash)

    for i in range(len(vids)-1):
        if vids[i].hash == 'NULL':
            continue
        if vids[i].hash == vids[i+1].hash:
            dupvids.append(vids[i+1])

    if dodelete:
        for vid in dupvids:
            vid.delete()

    return dupvids

def main():
    parser = OptionParser(usage="usage: %prog [options] [jobid]")

    parser.add_option("-s", "--dry-run", action="store_true", default=False,
            dest="dryrun", help="Print out duplicates but do not delete.")

    opts, args = parser.parse_args()

    dups = FindDuplicates(not opts.dryrun)

    if len(dups):
        print len(dups), 'Duplicates Found!'
        print '----------------------'
        for vid in dups:
            print '  '+format_name(vid)
    else:
        print 'No Duplicates Found!'

if __name__ == "__main__":
    main()