Difference between revisions of "Find orphans.py"

From MythTV Official Wiki
Jump to: navigation, search
m
(Added a "Troubleshooting" section, with information about syntax errors relating to the Python version.)
(23 intermediate revisions by 7 users not shown)
Line 1: Line 1:
{{Wrongtitle|find_orphans.py}}
+
{{DISPLAYTITLE:find_orphans.py}}
 
{{Script info
 
{{Script info
 
|author=Raymond Wagner
 
|author=Raymond Wagner
 
|short=orphaned file/recording scanner
 
|short=orphaned file/recording scanner
|long=A scanner to look for missing and unknown recording files. This is informative only, informing the user of the files but taking no action.
+
|long=A scanner to look for missing and unknown recording files. This will only delete files after multiple confirmations.
 
|category=Maintenance
 
|category=Maintenance
 
|file=find_orphans.py
 
|file=find_orphans.py
|S24=yes}}
+
|S24=yes|S25=yes|S26=yes}}
  
This script is a '''"safe"''' alternative to the old [[Myth.find_orphans.pl]] script.  It is informative only, showing any missing videos, or unknown files.  It can handle multiple backends, and does not need to be run locally, however recordings stored on offline backends will be marked as orphaned.
+
This script shows recordings with missing files, or files with missing recordings.  It can handle multiple backends, and does not need to be run locally, however recordings stored on offline backends will be marked as orphaned.
 +
 
 +
Additionally, this allows listing of database backups, and the listing and deletion of zero byte recordings, orphaned snapshots, and other unknown files.
  
 
<pre>
 
<pre>
Line 47: Line 49:
 
#!/usr/bin/env python
 
#!/usr/bin/env python
  
from MythTV import MythDB, MythBE
+
from MythTV import MythDB, MythBE, Recorded, MythError
 
from socket import timeout
 
from socket import timeout
  
 
import os
 
import os
 
import sys
 
import sys
 +
 +
def human_size(s):
 +
    s = float(s)
 +
    o = 0
 +
    while s > 1000:
 +
        s /= 1000
 +
        o += 1
 +
    return str(round(s,1))+('B ','KB','MB','GB')[o]
  
 
class File( str ):
 
class File( str ):
Line 61: Line 71:
 
         self.path = path
 
         self.path = path
 
         self.size = int(size)
 
         self.size = int(size)
 +
    def pprint(self):
 +
        name = u'%s: %s' % (self.host, os.path.join(self.path, self))
 +
        print u'  {0:<90}{1:>8}'.format(name, human_size(self.size))
 +
    def delete(self):
 +
        be = MythBE(self.host, db=DB)
 +
        be.deleteFile(self, self.group)
  
def human_size(s):
+
class MyRecorded( Recorded ):
     s = float(s)
+
     _table = 'recorded'
    o = 0
+
    def pprint(self):
    while s > 1000:
+
        name = u'{0.hostname}: {0.title}'.format(self)
         s /= 1000
+
         if self.subtitle:
        o += 1
+
            name += u' - '+self.subtitle
    return str(round(s,1))+('B ','KB','MB','GB')[o]
+
        print u' {0:<70}{1:>28}'.format(name,self.basename)
  
def prettyprint(f):
+
def printrecs(title, recs):
     print ('  %s:%s' % (f.host, os.path.join(f.path, f))).ljust(80),\
+
     print title
            human_size(f.size).rjust(8)
+
    for rec in sorted(recs, key=lambda x: x.title):
 +
        rec.pprint()
 +
    print u'{0:>88}{1:>12}'.format('Count:',len(recs))
  
 +
def printfiles(title, files):
 +
    print title
 +
    for f in sorted(files, key=lambda x: x.path):
 +
        f.pprint()
 +
    size = sum([f.size for f in files])
 +
    print u'{0:>88}{1:>12}'.format('Total:',human_size(size))
  
def main(host=None):
+
def populate(host=None):
    db = MythDB()
+
    be = MythBE()
+
 
+
 
     unfiltered = []
 
     unfiltered = []
     kwargs = {}
+
     kwargs = {'livetv':True}
 
     if host:
 
     if host:
 +
        with DB as c:
 +
            c.execute("""SELECT count(1) FROM settings
 +
                        WHERE hostname=%s AND value=%s""",
 +
                        (host, 'BackendServerIP'))
 +
            if c.fetchone()[0] == 0:
 +
                raise Exception('Invalid hostname specified on command line.')
 
         hosts = [host]
 
         hosts = [host]
 
         kwargs['hostname'] = host
 
         kwargs['hostname'] = host
 
     else:
 
     else:
         with db as c:
+
         with DB as c:
 
             c.execute("""SELECT hostname FROM settings
 
             c.execute("""SELECT hostname FROM settings
 
                         WHERE value='BackendServerIP'""")
 
                         WHERE value='BackendServerIP'""")
 
             hosts = [r[0] for r in c.fetchall()]
 
             hosts = [r[0] for r in c.fetchall()]
 
     for host in hosts:
 
     for host in hosts:
         for sg in db.getStorageGroup():
+
         for sg in DB.getStorageGroup():
 
             if sg.groupname in ('Videos','Banners','Coverart',\
 
             if sg.groupname in ('Videos','Banners','Coverart',\
 
                                 'Fanart','Screenshots','Trailers'):
 
                                 'Fanart','Screenshots','Trailers'):
 
                 continue
 
                 continue
 
             try:
 
             try:
                 dirs,files,sizes = be.getSGList(host, sg.groupname, sg.dirname)
+
                 dirs,files,sizes = BE.getSGList(host, sg.groupname, sg.dirname)
 
                 for f,s in zip(files,sizes):
 
                 for f,s in zip(files,sizes):
 
                     newfile = File(host, sg.groupname, sg.dirname, f, s)
 
                     newfile = File(host, sg.groupname, sg.dirname, f, s)
Line 103: Line 130:
 
                 pass
 
                 pass
  
     recs = list(db.searchRecorded(**kwargs))
+
     recs = list(DB.searchRecorded(**kwargs))
  
 
     zerorecs = []
 
     zerorecs = []
Line 138: Line 165:
 
         unfiltered.remove(f)
 
         unfiltered.remove(f)
  
     if len(recs):
+
     return (recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered)
        print "Recordings with missing files"
+
 
         for rec in recs:
+
def delete_recs(recs):
             if rec.subtitle:
+
    printrecs('The following recordings will be deleted', recs)
                 print ('  %s - %s' % (rec.title, rec.subtitle)).ljust(60),
+
    print 'Are you sure you want to continue?'
 +
    try:
 +
         res = raw_input('> ')
 +
        while True:
 +
             if res == 'yes':
 +
                 for rec in recs:
 +
                    rec.delete(True, True)
 +
                break
 +
            elif res == 'no':
 +
                break
 
             else:
 
             else:
                 print (' %s' % rec.title).ljust(60),
+
                 res = raw_input("'yes' or 'no' > ")
             print rec.basename
+
    except MythError:
 +
        name = u'{0.hostname}: {0.title}'.format(rec)
 +
        if rec.subtitle:
 +
             name += ' - '+rec.subtitle
 +
        print "Warning: Failed to delete '" + name + "'"
 +
    except KeyboardInterrupt:
 +
        pass
 +
    except EOFError:
 +
        sys.exit(0)
  
     if len(zerorecs):
+
def delete_files(files):
         print "\nZero byte recordings"
+
     printfiles('The following files will be deleted', files)
         for rec in zerorecs:
+
    print 'Are you sure you want to continue?'
             if rec.subtitle:
+
    try:
                 print ('  %s - %s' % (rec.title, rec.subtitle)).ljust(40),
+
         res = raw_input('> ')
 +
         while True:
 +
             if res == 'yes':
 +
                 for f in files:
 +
                    f.delete()
 +
                break
 +
            elif res == 'no':
 +
                break
 
             else:
 
             else:
                 print (' %s' % rec.title).ljust(40),
+
                 res = raw_input("'yes' or 'no' > ")
            print rec.basename
+
    except KeyboardInterrupt:
 +
        pass
 +
    except EOFError:
 +
        sys.exit(0)
  
    if len(orphvids):
+
def main(host=None):
        print "\nOrphaned video files"
+
  while True:
         for f in sorted(orphvids, key=lambda x: x.path):
+
         recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered = populate(host)
            # os.unlink(os.path.join(f.path,f))
+
            prettyprint(f)
+
        size = sum([f.size for f in orphvids])
+
        print 'Total:'.rjust(80),human_size(size).rjust(8)
+
  
    if len(orphimgs):
+
        if len(recs):
        print "\nOrphaned snapshots"
+
            printrecs("Recordings with missing files", recs)
         for f in sorted(orphimgs, key=lambda x: x.path):
+
         if len(zerorecs):
             prettyprint(f)
+
            printrecs("Zero byte recordings", zerorecs)
         size = sum([f.size for f in orphimgs])
+
        if len(orphvids):
         print 'Total:'.rjust(80),human_size(size).rjust(8)
+
             printfiles("Orphaned video files", orphvids)
 +
         if len(orphimgs):
 +
            printfiles("Orphaned snapshots", orphimgs)
 +
         if len(dbbackup):
 +
            printfiles("Database backups", dbbackup)
 +
        if len(unfiltered):
 +
            printfiles("Other files", unfiltered)
  
    if len(dbbackup):
+
        opts = []
        print "\nDatabase backups"
+
        if len(recs):
         for f in sorted(dbbackup, key=lambda x: x.path):
+
            opts.append(['Delete orphaned recording entries', delete_recs, recs])
             prettyprint(f)
+
         if len(zerorecs):
         size = sum([f.size for f in dbbackup])
+
            opts.append(['Delete zero byte recordings', delete_recs, zerorecs])
         print 'Total:'.rjust(80),human_size(size).rjust(8)
+
        if len(orphvids):
 +
             opts.append(['Delete orphaned video files', delete_files, orphvids])
 +
         if len(orphimgs):
 +
            opts.append(['Delete orphaned snapshots', delete_files, orphimgs])
 +
         if len(unfiltered):
 +
            opts.append(['Delete other files', delete_files, unfiltered])
 +
        opts.append(['Refresh list', None, None])
 +
        print 'Please select from the following'
 +
        for i, opt in enumerate(opts):
 +
            print ' {0}. {1}'.format(i+1, opt[0])
  
    if len(unfiltered):
+
        try:
        print "\nOther files"
+
            inner = True
        for f in sorted(unfiltered, key=lambda x: x.path):
+
            res = raw_input('> ')
            prettyprint(f)
+
            while inner:
        size = sum([f.size for f in unfiltered])
+
                try:
         print 'Total:'.rjust(80),human_size(size).rjust(8)
+
                    res = int(res)
 +
                except:
 +
                    res = raw_input('input number. ctrl-c to exit > ')
 +
                    continue
 +
                if (res <= 0) or (res > len(opts)):
 +
                    res = raw_input('input number within range > ')
 +
                    continue
 +
                break
 +
            opt = opts[res-1]
 +
            if opt[1] is None:
 +
                continue
 +
            else:
 +
                opt[1](opt[2])
 +
 
 +
         except KeyboardInterrupt:
 +
            break
 +
        except EOFError:
 +
            sys.exit(0)
 +
 
 +
DB = MythDB()
 +
BE = MythBE(db=DB)
 +
DB.searchRecorded.handler = MyRecorded
 +
DB.searchRecorded.dbclass = MyRecorded
  
 
if __name__ == '__main__':
 
if __name__ == '__main__':
Line 193: Line 280:
  
 
[[Category:Python_Scripts]]
 
[[Category:Python_Scripts]]
 +
[[Category:Management Scripts]]
 +
 +
 +
== Troubleshooting ==
 +
=== Syntax Error on Line 27 ===
 +
If you receive a syntax error on line 27:
 +
<pre>print u'  {0:<90}{1:>8}'.format(name, human_size(self.size))</pre>
 +
... double check your active Python version.  To be more specific, Python 2.6 or 2.7 is required, and this script will not run under Python 3.3. [http://www.mythtvtalk.com/find_orphans-py-syntax-error-line-27-a-16097]

Revision as of 23:27, 30 January 2014


Author Raymond Wagner
Description A scanner to look for missing and unknown recording files. This will only delete files after multiple confirmations.
Supports Version24.png  Version25.png  Version26.png 


This script shows recordings with missing files, or files with missing recordings. It can handle multiple backends, and does not need to be run locally, however recordings stored on offline backends will be marked as orphaned.

Additionally, this allows listing of database backups, and the listing and deletion of zero byte recordings, orphaned snapshots, and other unknown files.

>./find_orphans.py
Recordings with missing files
  Undercovers - Devices                  4642_20101006201300.mpg

Orphaned video files
  mythbe:/srv/mounts/twotb_1/video/2054_20080225110000.mpg                          2.5GB
                                                                          Total:    2.5GB

Orphaned snapshots
  myth0:/srv/mounts/myth0_1/video/4122_20101013113500.mpg.png                       2.6KB
  mythbe:/srv/mounts/twotb_1/video/2029_20100409024900.mpg.png                     84.9KB
  mythbe:/srv/mounts/twotb_1/video/2047_20100807180500.mpg.png                     92.9KB
  mythbe:/srv/mounts/twotb_1/video/2059_20100630090000.mpg.png                     87.0KB
                                                                          Total:  267.4KB

Database backups
  mythbe:/mnt/mythtv/store/backups/mythconverg--20101007134000.sql                 17.3MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1254-20100902174922.sql.gz          13.1MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1263-20100913163154.sql             62.1MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1263-20100913163216.sql.gz          13.0MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1263-20101007134659.sql.gz          15.9MB
  mythbe:/mnt/mythtv/store/backups/mythconverg-1264-20101008023651.sql.gz          16.5MB
                                                                          Total:  137.9MB

Other files
  mythbe:/srv/mounts/twotb_1/video/4121_20100312215900.mpg.tmp                    398.6MB
  mythbe:/srv/mounts/twotb_1/video/4191_20090928200000.mpg.tmp                      2.4GB
  mythbe:/srv/mounts/twotb_1/video/4191_20091005195900.mpg.tmp                      2.6GB
  mythbe:/srv/mounts/twotb_1/video/4642_20101006201300.mpg.1                        4.9GB
                                                                          Total:   10.2GB


PythonIcon.png find_orphans.py

#!/usr/bin/env python

from MythTV import MythDB, MythBE, Recorded, MythError
from socket import timeout

import os
import sys

def human_size(s):
    s = float(s)
    o = 0
    while s > 1000:
        s /= 1000
        o += 1
    return str(round(s,1))+('B ','KB','MB','GB')[o]

class File( str ):
    def __new__(self, host, group, path, name, size):
        return str.__new__(self, name)
    def __init__(self, host, group, path, name, size):
        self.host = host
        self.group = group
        self.path = path
        self.size = int(size)
    def pprint(self):
        name = u'%s: %s' % (self.host, os.path.join(self.path, self))
        print u'  {0:<90}{1:>8}'.format(name, human_size(self.size))
    def delete(self):
        be = MythBE(self.host, db=DB)
        be.deleteFile(self, self.group)

class MyRecorded( Recorded ):
    _table = 'recorded'
    def pprint(self):
        name = u'{0.hostname}: {0.title}'.format(self)
        if self.subtitle:
            name += u' - '+self.subtitle
        print u'  {0:<70}{1:>28}'.format(name,self.basename)

def printrecs(title, recs):
    print title
    for rec in sorted(recs, key=lambda x: x.title):
        rec.pprint()
    print u'{0:>88}{1:>12}'.format('Count:',len(recs))

def printfiles(title, files):
    print title
    for f in sorted(files, key=lambda x: x.path):
        f.pprint()
    size = sum([f.size for f in files])
    print u'{0:>88}{1:>12}'.format('Total:',human_size(size))

def populate(host=None):
    unfiltered = []
    kwargs = {'livetv':True}
    if host:
        with DB as c:
            c.execute("""SELECT count(1) FROM settings
                         WHERE hostname=%s AND value=%s""",
                        (host, 'BackendServerIP'))
            if c.fetchone()[0] == 0:
                raise Exception('Invalid hostname specified on command line.')
        hosts = [host]
        kwargs['hostname'] = host
    else:
        with DB as c:
            c.execute("""SELECT hostname FROM settings
                         WHERE value='BackendServerIP'""")
            hosts = [r[0] for r in c.fetchall()]
    for host in hosts:
        for sg in DB.getStorageGroup():
            if sg.groupname in ('Videos','Banners','Coverart',\
                                'Fanart','Screenshots','Trailers'):
                continue
            try:
                dirs,files,sizes = BE.getSGList(host, sg.groupname, sg.dirname)
                for f,s in zip(files,sizes):
                    newfile = File(host, sg.groupname, sg.dirname, f, s)
                    if newfile not in unfiltered:
                        unfiltered.append(newfile)
            except:
                pass

    recs = list(DB.searchRecorded(**kwargs))

    zerorecs = []
    orphvids = []
    for rec in list(recs):
        if rec.basename in unfiltered:
            recs.remove(rec)
            i = unfiltered.index(rec.basename)
            f = unfiltered.pop(i)
            if f.size < 1024:
                zerorecs.append(rec)
            name = rec.basename.rsplit('.',1)[0]
            for f in list(unfiltered):
                if name in f:
                    unfiltered.remove(f)
    for f in list(unfiltered):
        if not (f.endswith('.mpg') or f.endswith('.nuv')):
            continue
        orphvids.append(f)
        unfiltered.remove(f)

    orphimgs = []
    for f in list(unfiltered):
        if not f.endswith('.png'):
            continue
        orphimgs.append(f)
        unfiltered.remove(f)

    dbbackup = []
    for f in list(unfiltered):
        if 'sql' not in f:
            continue
        dbbackup.append(f)
        unfiltered.remove(f)

    return (recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered)

def delete_recs(recs):
    printrecs('The following recordings will be deleted', recs)
    print 'Are you sure you want to continue?'
    try:
        res = raw_input('> ')
        while True:
            if res == 'yes':
                for rec in recs:
                    rec.delete(True, True)
                break
            elif res == 'no':
                break
            else:
                res = raw_input("'yes' or 'no' > ")
    except MythError:
        name = u'{0.hostname}: {0.title}'.format(rec)
        if rec.subtitle:
            name += ' - '+rec.subtitle
        print "Warning: Failed to delete '" + name + "'"
    except KeyboardInterrupt:
        pass
    except EOFError:
        sys.exit(0)

def delete_files(files):
    printfiles('The following files will be deleted', files)
    print 'Are you sure you want to continue?'
    try:
        res = raw_input('> ')
        while True:
            if res == 'yes':
                for f in files:
                    f.delete()
                break
            elif res == 'no':
                break
            else:
                res = raw_input("'yes' or 'no' > ")
    except KeyboardInterrupt:
        pass
    except EOFError:
        sys.exit(0)

def main(host=None):
   while True:
        recs, zerorecs, orphvids, orphimgs, dbbackup, unfiltered = populate(host)

        if len(recs):
            printrecs("Recordings with missing files", recs)
        if len(zerorecs):
            printrecs("Zero byte recordings", zerorecs)
        if len(orphvids):
            printfiles("Orphaned video files", orphvids)
        if len(orphimgs):
            printfiles("Orphaned snapshots", orphimgs)
        if len(dbbackup):
            printfiles("Database backups", dbbackup)
        if len(unfiltered):
            printfiles("Other files", unfiltered)

        opts = []
        if len(recs):
            opts.append(['Delete orphaned recording entries', delete_recs, recs])
        if len(zerorecs):
            opts.append(['Delete zero byte recordings', delete_recs, zerorecs])
        if len(orphvids):
            opts.append(['Delete orphaned video files', delete_files, orphvids])
        if len(orphimgs):
            opts.append(['Delete orphaned snapshots', delete_files, orphimgs])
        if len(unfiltered):
            opts.append(['Delete other files', delete_files, unfiltered])
        opts.append(['Refresh list', None, None])
        print 'Please select from the following'
        for i, opt in enumerate(opts):
            print ' {0}. {1}'.format(i+1, opt[0])

        try:
            inner = True
            res = raw_input('> ')
            while inner:
                try:
                    res = int(res)
                except:
                    res = raw_input('input number. ctrl-c to exit > ')
                    continue
                if (res <= 0) or (res > len(opts)):
                    res = raw_input('input number within range > ')
                    continue
                break
            opt = opts[res-1]
            if opt[1] is None:
                continue
            else:
                opt[1](opt[2])

        except KeyboardInterrupt:
            break
        except EOFError:
            sys.exit(0)

DB = MythDB()
BE = MythBE(db=DB)
DB.searchRecorded.handler = MyRecorded
DB.searchRecorded.dbclass = MyRecorded

if __name__ == '__main__':
    if len(sys.argv) == 2:
        main(sys.argv[1])
    else:
        main()


Troubleshooting

Syntax Error on Line 27

If you receive a syntax error on line 27:

print u'  {0:<90}{1:>8}'.format(name, human_size(self.size))

... double check your active Python version. To be more specific, Python 2.6 or 2.7 is required, and this script will not run under Python 3.3. [1]