Difference between revisions of "Find orphans.py"

From MythTV Official Wiki
Jump to: navigation, search
m (add sanity check for hostname argument)
m (Undo revision 57152 by Stevegoodey (talk) Look at the results of the "DISPLAYTITLE" tag. It allows you to use lowercase titles.)
(18 intermediate revisions by 5 users not shown)
Line 1: Line 1:
{{Wrongtitle|find_orphans.py}}
+
{{DISPLAYTITLE:find_orphans.py}}
 
{{Script info
 
{{Script info
 
|author=Raymond Wagner
 
|author=Raymond Wagner
 
|short=orphaned file/recording scanner
 
|short=orphaned file/recording scanner
|long=A scanner to look for missing and unknown recording files. This is informative only, informing the user of the files but taking no action.
+
|long=A scanner to look for missing and unknown recording files. This will only delete files after multiple confirmations.
 
|category=Maintenance
 
|category=Maintenance
 
|file=find_orphans.py
 
|file=find_orphans.py
|S24=yes}}
+
|S24=yes|S25=yes}}
  
This script is a '''"safe"''' alternative to the old [[Myth.find_orphans.pl]] script.  It is informative only, showing any missing videos, or unknown files.  It can handle multiple backends, and does not need to be run locally, however recordings stored on offline backends will be marked as orphaned.
+
This script shows recordings with missing files, or files with missing recordings.  It can handle multiple backends, and does not need to be run locally, however recordings stored on offline backends will be marked as orphaned.
 +
 
 +
Additionally, this allows listing of database backups, and the listing and deletion of zero byte recordings, orphaned snapshots, and other unknown files.
  
 
<pre>
 
<pre>
Line 47: Line 49:
 
#!/usr/bin/env python
 
#!/usr/bin/env python
  
from MythTV import MythDB, MythBE
+
from MythTV import MythDB, MythBE, Recorded, MythError
 
from socket import timeout
 
from socket import timeout
  
 
import os
 
import os
 
import sys
 
import sys
 +
 +
def human_size(s):
 +
    s = float(s)
 +
    o = 0
 +
    while s > 1000:
 +
        s /= 1000
 +
        o += 1
 +
    return str(round(s,1))+('B ','KB','MB','GB')[o]
  
 
class File( str ):
 
class File( str ):
Line 61: Line 71:
 
         self.path = path
 
         self.path = path
 
         self.size = int(size)
 
         self.size = int(size)
 +
    def pprint(self):
 +
        name = u'%s: %s' % (self.host, os.path.join(self.path, self))
 +
        print u'  {0:<90}{1:>8}'.format(name, human_size(self.size))
 +
    def delete(self):
 +
        be = MythBE(self.host, db=DB)
 +
        be.deleteFile(self, self.group)
  
def human_size(s):
+
class MyRecorded( Recorded ):
     s = float(s)
+
     _table = 'recorded'
    o = 0
+
    def pprint(self):
    while s > 1000:
+
        name = u'{0.hostname}: {0.title}'.format(self)
         s /= 1000
+
         if self.subtitle:
        o += 1
+
            name += u' - '+self.subtitle
    return str(round(s,1))+('B ','KB','MB','GB')[o]
+
        print u' {0:<70}{1:>28}'.format(name,self.basename)
  
def prettyprint(f):
+
def printrecs(title, recs):
     print ('  %s:%s' % (f.host, os.path.join(f.path, f))).ljust(80),\
+
     print title
            human_size(f.size).rjust(8)
+
    for rec in sorted(recs, key=lambda x: x.title):
 +
        rec.pprint()
 +
    print u'{0:>88}{1:>12}'.format('Count:',len(recs))
  
 +
def printfiles(title, files):
 +
    print title
 +
    for f in sorted(files, key=lambda x: x.path):
 +
        f.pprint()
 +
    size = sum([f.size for f in files])
 +
    print u'{0:>88}{1:>12}'.format('Total:',human_size(size))
  
def main(host=None):
+
def populate(host=None):
    db = MythDB()
+
    be = MythBE()
+
 
+
 
     unfiltered = []
 
     unfiltered = []
     kwargs = {}
+
     kwargs = {'livetv':True}
 
     if host:
 
     if host:
         with db as c:
+
         with DB as c:
 
             c.execute("""SELECT count(1) FROM settings
 
             c.execute("""SELECT count(1) FROM settings
 
                         WHERE hostname=%s AND value=%s""",
 
                         WHERE hostname=%s AND value=%s""",
                         (host, 'BackendServerIP')
+
                         (host, 'BackendServerIP'))
 
             if c.fetchone()[0] == 0:
 
             if c.fetchone()[0] == 0:
 
                 raise Exception('Invalid hostname specified on command line.')
 
                 raise Exception('Invalid hostname specified on command line.')
Line 91: Line 112:
 
         kwargs['hostname'] = host
 
         kwargs['hostname'] = host
 
     else:
 
     else:
         with db as c:
+
         with DB as c:
 
             c.execute("""SELECT hostname FROM settings
 
             c.execute("""SELECT hostname FROM settings
 
                         WHERE value='BackendServerIP'""")
 
                         WHERE value='BackendServerIP'""")
 
             hosts = [r[0] for r in c.fetchall()]
 
             hosts = [r[0] for r in c.fetchall()]
 
     for host in hosts:
 
     for host in hosts:
         for sg in db.getStorageGroup():
+
         for sg in DB.getStorageGroup():
 
             if sg.groupname in ('Videos','Banners','Coverart',\
 
             if sg.groupname in ('Videos','Banners','Coverart',\
 
                                 'Fanart','Screenshots','Trailers'):
 
                                 'Fanart','Screenshots','Trailers'):
 
                 continue
 
                 continue
 
             try:
 
             try:
                 dirs,files,sizes = be.getSGList(host, sg.groupname, sg.dirname)
+
                 dirs,files,sizes = BE.getSGList(host, sg.groupname, sg.dirname)
 
                 for f,s in zip(files,sizes):
 
                 for f,s in zip(files,sizes):
 
                     newfile = File(host, sg.groupname, sg.dirname, f, s)
 
                     newfile = File(host, sg.groupname, sg.dirname, f, s)
Line 109: Line 130:
 
                 pass
 
                 pass
  
     recs = list(db.searchRecorded(**kwargs))
+
     recs = list(DB.searchRecorded(**kwargs))
  
 
     zerorecs = []
 
     zerorecs = []
Line 144: Line 165:
 
         unfiltered.remove(f)
 
         unfiltered.remove(f)
  
     if len(recs):
+
     return (recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered)
        print "Recordings with missing files"
+
 
         for rec in recs:
+
def delete_recs(recs):
             if rec.subtitle:
+
    printrecs('The following recordings will be deleted', recs)
                 print ('  %s - %s' % (rec.title, rec.subtitle)).ljust(60),
+
    print 'Are you sure you want to continue?'
 +
    try:
 +
         res = raw_input('> ')
 +
        while True:
 +
             if res == 'yes':
 +
                 for rec in recs:
 +
                    rec.delete(True, True)
 +
                break
 +
            elif res == 'no':
 +
                break
 
             else:
 
             else:
                 print (' %s' % rec.title).ljust(60),
+
                 res = raw_input("'yes' or 'no' > ")
             print rec.basename
+
    except MythError:
 +
        name = u'{0.hostname}: {0.title}'.format(rec)
 +
        if rec.subtitle:
 +
             name += ' - '+rec.subtitle
 +
        print "Warning: Failed to delete '" + name + "'"
 +
    except KeyboardInterrupt:
 +
        pass
 +
    except EOFError:
 +
        sys.exit(0)
  
     if len(zerorecs):
+
def delete_files(files):
         print "\nZero byte recordings"
+
     printfiles('The following files will be deleted', files)
         for rec in zerorecs:
+
    print 'Are you sure you want to continue?'
             if rec.subtitle:
+
    try:
                 print ('  %s - %s' % (rec.title, rec.subtitle)).ljust(40),
+
         res = raw_input('> ')
 +
         while True:
 +
             if res == 'yes':
 +
                 for f in files:
 +
                    f.delete()
 +
                break
 +
            elif res == 'no':
 +
                break
 
             else:
 
             else:
                 print (' %s' % rec.title).ljust(40),
+
                 res = raw_input("'yes' or 'no' > ")
            print rec.basename
+
    except KeyboardInterrupt:
 +
        pass
 +
    except EOFError:
 +
        sys.exit(0)
  
    if len(orphvids):
+
def main(host=None):
        print "\nOrphaned video files"
+
  while True:
         for f in sorted(orphvids, key=lambda x: x.path):
+
         recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered = populate(host)
            # os.unlink(os.path.join(f.path,f))
+
            prettyprint(f)
+
        size = sum([f.size for f in orphvids])
+
        print 'Total:'.rjust(80),human_size(size).rjust(8)
+
  
    if len(orphimgs):
+
        if len(recs):
         print "\nOrphaned snapshots"
+
            printrecs("Recordings with missing files", recs)
         for f in sorted(orphimgs, key=lambda x: x.path):
+
         if len(zerorecs):
             prettyprint(f)
+
            printrecs("Zero byte recordings", zerorecs)
         size = sum([f.size for f in orphimgs])
+
         if len(orphvids):
         print 'Total:'.rjust(80),human_size(size).rjust(8)
+
            printfiles("Orphaned video files", orphvids)
 +
        if len(orphimgs):
 +
            printfiles("Orphaned snapshots", orphimgs)
 +
        if len(dbbackup):
 +
            printfiles("Database backups", dbbackup)
 +
        if len(unfiltered):
 +
            printfiles("Other files", unfiltered)
 +
 
 +
        opts = []
 +
        if len(recs):
 +
            opts.append(['Delete orphaned recording entries', delete_recs, recs])
 +
        if len(zerorecs):
 +
             opts.append(['Delete zero byte recordings', delete_recs, zerorecs])
 +
         if len(orphvids):
 +
            opts.append(['Delete orphaned video files', delete_files, orphvids])
 +
        if len(orphimgs):
 +
            opts.append(['Delete orphaned snapshots', delete_files, orphimgs])
 +
        if len(unfiltered):
 +
            opts.append(['Delete other files', delete_files, unfiltered])
 +
        opts.append(['Refresh list', None, None])
 +
         print 'Please select from the following'
 +
        for i, opt in enumerate(opts):
 +
            print ' {0}. {1}'.format(i+1, opt[0])
 +
 
 +
        try:
 +
            inner = True
 +
            res = raw_input('> ')
 +
            while inner:
 +
                try:
 +
                    res = int(res)
 +
                except:
 +
                    res = raw_input('input number. ctrl-c to exit > ')
 +
                    continue
 +
                if (res <= 0) or (res > len(opts)):
 +
                    res = raw_input('input number within range > ')
 +
                    continue
 +
                break
 +
            opt = opts[res-1]
 +
            if opt[1] is None:
 +
                continue
 +
            else:
 +
                opt[1](opt[2])
  
    if len(dbbackup):
+
        except KeyboardInterrupt:
        print "\nDatabase backups"
+
            break
         for f in sorted(dbbackup, key=lambda x: x.path):
+
         except EOFError:
             prettyprint(f)
+
             sys.exit(0)
        size = sum([f.size for f in dbbackup])
+
        print 'Total:'.rjust(80),human_size(size).rjust(8)
+
  
    if len(unfiltered):
+
DB = MythDB()
        print "\nOther files"
+
BE = MythBE(db=DB)
        for f in sorted(unfiltered, key=lambda x: x.path):
+
DB.searchRecorded.handler = MyRecorded
            prettyprint(f)
+
DB.searchRecorded.dbclass = MyRecorded
        size = sum([f.size for f in unfiltered])
+
        print 'Total:'.rjust(80),human_size(size).rjust(8)
+
  
 
if __name__ == '__main__':
 
if __name__ == '__main__':
Line 199: Line 280:
  
 
[[Category:Python_Scripts]]
 
[[Category:Python_Scripts]]
 +
[[Category:Management Scripts]]

Revision as of 16:13, 28 January 2013


Author Raymond Wagner
Description A scanner to look for missing and unknown recording files. This will only delete files after multiple confirmations.
Supports Version24.png  Version25.png  


This script shows recordings with missing files, or files with missing recordings. It can handle multiple backends, and does not need to be run locally, however recordings stored on offline backends will be marked as orphaned.

Additionally, this allows listing of database backups, and the listing and deletion of zero byte recordings, orphaned snapshots, and other unknown files.

>./find_orphans.py
Recordings with missing files
  Undercovers - Devices                  4642_20101006201300.mpg

Orphaned video files
  mythbe:/srv/mounts/twotb_1/video/2054_20080225110000.mpg                          2.5GB
                                                                          Total:    2.5GB

Orphaned snapshots
  myth0:/srv/mounts/myth0_1/video/4122_20101013113500.mpg.png                       2.6KB
  mythbe:/srv/mounts/twotb_1/video/2029_20100409024900.mpg.png                     84.9KB
  mythbe:/srv/mounts/twotb_1/video/2047_20100807180500.mpg.png                     92.9KB
  mythbe:/srv/mounts/twotb_1/video/2059_20100630090000.mpg.png                     87.0KB
                                                                          Total:  267.4KB

Database backups
  mythbe:/mnt/mythtv/store/backups/mythconverg--20101007134000.sql                 17.3MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1254-20100902174922.sql.gz          13.1MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1263-20100913163154.sql             62.1MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1263-20100913163216.sql.gz          13.0MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1263-20101007134659.sql.gz          15.9MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1264-20101008023651.sql.gz          16.5MB
                                                                          Total:  137.9MB

Other files
  mythbe:/srv/mounts/twotb_1/video/4121_20100312215900.mpg.tmp                    398.6MB
  mythbe:/srv/mounts/twotb_1/video/4191_20090928200000.mpg.tmp                      2.4GB
  mythbe:/srv/mounts/twotb_1/video/4191_20091005195900.mpg.tmp                      2.6GB
  mythbe:/srv/mounts/twotb_1/video/4642_20101006201300.mpg.1                        4.9GB
                                                                          Total:   10.2GB


PythonIcon.png find_orphans.py

#!/usr/bin/env python

from MythTV import MythDB, MythBE, Recorded, MythError
from socket import timeout

import os
import sys

def human_size(s):
    s = float(s)
    o = 0
    while s > 1000:
        s /= 1000
        o += 1
    return str(round(s,1))+('B ','KB','MB','GB')[o]

class File( str ):
    def __new__(self, host, group, path, name, size):
        return str.__new__(self, name)
    def __init__(self, host, group, path, name, size):
        self.host = host
        self.group = group
        self.path = path
        self.size = int(size)
    def pprint(self):
        name = u'%s: %s' % (self.host, os.path.join(self.path, self))
        print u'  {0:<90}{1:>8}'.format(name, human_size(self.size))
    def delete(self):
        be = MythBE(self.host, db=DB)
        be.deleteFile(self, self.group)

class MyRecorded( Recorded ):
    _table = 'recorded'
    def pprint(self):
        name = u'{0.hostname}: {0.title}'.format(self)
        if self.subtitle:
            name += u' - '+self.subtitle
        print u'  {0:<70}{1:>28}'.format(name,self.basename)

def printrecs(title, recs):
    print title
    for rec in sorted(recs, key=lambda x: x.title):
        rec.pprint()
    print u'{0:>88}{1:>12}'.format('Count:',len(recs))

def printfiles(title, files):
    print title
    for f in sorted(files, key=lambda x: x.path):
        f.pprint()
    size = sum([f.size for f in files])
    print u'{0:>88}{1:>12}'.format('Total:',human_size(size))

def populate(host=None):
    unfiltered = []
    kwargs = {'livetv':True}
    if host:
        with DB as c:
            c.execute("""SELECT count(1) FROM settings
                         WHERE hostname=%s AND value=%s""",
                        (host, 'BackendServerIP'))
            if c.fetchone()[0] == 0:
                raise Exception('Invalid hostname specified on command line.')
        hosts = [host]
        kwargs['hostname'] = host
    else:
        with DB as c:
            c.execute("""SELECT hostname FROM settings
                         WHERE value='BackendServerIP'""")
            hosts = [r[0] for r in c.fetchall()]
    for host in hosts:
        for sg in DB.getStorageGroup():
            if sg.groupname in ('Videos','Banners','Coverart',\
                                'Fanart','Screenshots','Trailers'):
                continue
            try:
                dirs,files,sizes = BE.getSGList(host, sg.groupname, sg.dirname)
                for f,s in zip(files,sizes):
                    newfile = File(host, sg.groupname, sg.dirname, f, s)
                    if newfile not in unfiltered:
                        unfiltered.append(newfile)
            except:
                pass

    recs = list(DB.searchRecorded(**kwargs))

    zerorecs = []
    orphvids = []
    for rec in list(recs):
        if rec.basename in unfiltered:
            recs.remove(rec)
            i = unfiltered.index(rec.basename)
            f = unfiltered.pop(i)
            if f.size < 1024:
                zerorecs.append(rec)
            name = rec.basename.rsplit('.',1)[0]
            for f in list(unfiltered):
                if name in f:
                    unfiltered.remove(f)
    for f in list(unfiltered):
        if not (f.endswith('.mpg') or f.endswith('.nuv')):
            continue
        orphvids.append(f)
        unfiltered.remove(f)

    orphimgs = []
    for f in list(unfiltered):
        if not f.endswith('.png'):
            continue
        orphimgs.append(f)
        unfiltered.remove(f)

    dbbackup = []
    for f in list(unfiltered):
        if 'sql' not in f:
            continue
        dbbackup.append(f)
        unfiltered.remove(f)

    return (recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered)

def delete_recs(recs):
    printrecs('The following recordings will be deleted', recs)
    print 'Are you sure you want to continue?'
    try:
        res = raw_input('> ')
        while True:
            if res == 'yes':
                for rec in recs:
                    rec.delete(True, True)
                break
            elif res == 'no':
                break
            else:
                res = raw_input("'yes' or 'no' > ")
    except MythError:
        name = u'{0.hostname}: {0.title}'.format(rec)
        if rec.subtitle:
            name += ' - '+rec.subtitle
        print "Warning: Failed to delete '" + name + "'"
    except KeyboardInterrupt:
        pass
    except EOFError:
        sys.exit(0)

def delete_files(files):
    printfiles('The following files will be deleted', files)
    print 'Are you sure you want to continue?'
    try:
        res = raw_input('> ')
        while True:
            if res == 'yes':
                for f in files:
                    f.delete()
                break
            elif res == 'no':
                break
            else:
                res = raw_input("'yes' or 'no' > ")
    except KeyboardInterrupt:
        pass
    except EOFError:
        sys.exit(0)

def main(host=None):
   while True:
        recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered = populate(host)

        if len(recs):
            printrecs("Recordings with missing files", recs)
        if len(zerorecs):
            printrecs("Zero byte recordings", zerorecs)
        if len(orphvids):
            printfiles("Orphaned video files", orphvids)
        if len(orphimgs):
            printfiles("Orphaned snapshots", orphimgs)
        if len(dbbackup):
            printfiles("Database backups", dbbackup)
        if len(unfiltered):
            printfiles("Other files", unfiltered)

        opts = []
        if len(recs):
            opts.append(['Delete orphaned recording entries', delete_recs, recs])
        if len(zerorecs):
            opts.append(['Delete zero byte recordings', delete_recs, zerorecs])
        if len(orphvids):
            opts.append(['Delete orphaned video files', delete_files, orphvids])
        if len(orphimgs):
            opts.append(['Delete orphaned snapshots', delete_files, orphimgs])
        if len(unfiltered):
            opts.append(['Delete other files', delete_files, unfiltered])
        opts.append(['Refresh list', None, None])
        print 'Please select from the following'
        for i, opt in enumerate(opts):
            print ' {0}. {1}'.format(i+1, opt[0])

        try:
            inner = True
            res = raw_input('> ')
            while inner:
                try:
                    res = int(res)
                except:
                    res = raw_input('input number. ctrl-c to exit > ')
                    continue
                if (res <= 0) or (res > len(opts)):
                    res = raw_input('input number within range > ')
                    continue
                break
            opt = opts[res-1]
            if opt[1] is None:
                continue
            else:
                opt[1](opt[2])

        except KeyboardInterrupt:
            break
        except EOFError:
            sys.exit(0)

DB = MythDB()
BE = MythBE(db=DB)
DB.searchRecorded.handler = MyRecorded
DB.searchRecorded.dbclass = MyRecorded

if __name__ == '__main__':
    if len(sys.argv) == 2:
        main(sys.argv[1])
    else:
        main()