[mythtv] [PATCH] Mythvideo IMDB grabber

Mikael Magnusson mikaelmagnusson at tjohoo.se
Tue Sep 2 14:19:12 EDT 2003


On Mon, Sep 01, 2003 at 02:39:10PM +0000, Andrew Peabody wrote:
> IMDB changed the url's slightly.  I don't have them in front of me, but if 
> I recall:
> 
> Instead of /Title?{movienum} it is now /title/tt{movienum}
> 
> For posters it is now /title/tt{movienum}/posters
> 
> However, that change is easy and will only take a few seconds, the real 
> issue issue is their new page checks for a http User-Agent.  IMDB does not 
> recognize the one from MythVideo, it shows up as part of the URL instead of 
> being sent as part of the header (just as guess, but that is what it looks 
> like from the debug message at the bottom of the error page).  Either way 
> IMDB does not see the User-Agent string and thinks MythVideo is an 
> anonymous http client which they do not allow.
> 
> I'm not too familiar with Qt, hopefully someone knows more about Qt knows 
> more about how to send a User-Agent/Browser String successfully.  I think 
> Qhttp might be able to let the header be custom set, but I can't get it to 
> work.

This patch contains the necessary changes to the IMDB grabber code.

The grabber now uses a new class called HttpComms, which replaces 
INETComms in MythVideo. HttpComms uses QHttp instead of QUrlOperator, 
as proposed.

The mythtv patch should be applied in mythtv/libs.

Regards,

Mikael Magnusson

-------------- next part --------------
? libmyth/httpcomms.cpp
? libmyth/httpcomms.h
Index: libmyth/libmyth.pro
===================================================================
RCS file: /var/lib/mythcvs/mythtv/libs/libmyth/libmyth.pro,v
retrieving revision 1.32
diff -u -r1.32 libmyth.pro
--- libmyth/libmyth.pro	18 Aug 2003 21:28:22 -0000	1.32
+++ libmyth/libmyth.pro	2 Sep 2003 10:35:30 -0000
@@ -12,19 +12,19 @@
 HEADERS += dialogbox.h lcddevice.h mythcontext.h mythwidgets.h oldsettings.h  
 HEADERS += remotefile.h settings.h themedmenu.h util.h mythwizard.h
 HEADERS += volumecontrol.h uitypes.h xmlparse.h mythplugin.h
-HEADERS += mythdialogs.h audiooutput.h audiooutputoss.h inetcomms.h
+HEADERS += mythdialogs.h audiooutput.h audiooutputoss.h inetcomms.h httpcomms.h
 
 SOURCES += dialogbox.cpp lcddevice.cpp mythcontext.cpp mythwidgets.cpp 
 SOURCES += oldsettings.cpp remotefile.cpp settings.cpp themedmenu.cpp
 SOURCES += util.cpp mythwizard.cpp volumecontrol.h uitypes.cpp xmlparse.cpp
 SOURCES += mythplugin.cpp mythdialogs.cpp audiooutput.cpp audiooutputoss.cpp
-SOURCES += inetcomms.cpp
+SOURCES += inetcomms.cpp httpcomms.cpp
 
 inc.path = $${PREFIX}/include/mythtv/
 inc.files  = dialogbox.h lcddevice.h themedmenu.h mythcontext.h 
 inc.files += mythwidgets.h remotefile.h util.h oldsettings.h volumecontrol.h
 inc.files += settings.h uitypes.h xmlparse.h mythplugin.h mythdialogs.h
-inc.files += audiooutput.h inetcomms.h
+inc.files += audiooutput.h inetcomms.h httpcomms.h
 
 using_alsa {
     DEFINES += USE_ALSA
diff -uN /dev/null libmyth/httpcomms.cpp
--- /dev/null	1970-01-01 01:00:00.000000000 +0100
+++ libmyth/httpcomms.cpp	2003-09-02 00:50:52.000000000 +0200
@@ -0,0 +1,88 @@
+#include <iostream>
+using namespace std;
+
+#include "httpcomms.h"
+
+
+HttpComms::HttpComms(QUrl &url)
+{
+    init(url);
+}
+
+
+HttpComms::HttpComms(QUrl &url, QHttpRequestHeader &header)
+{
+    init(url, header);
+}
+
+
+HttpComms::~HttpComms()
+{
+    delete http;
+}
+
+
+void HttpComms::init(QUrl &url)
+{
+    QHttpRequestHeader header("GET", url.encodedPathAndQuery());
+    char *userAgent = "Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en) Gecko/25250101 Netscape/5.432b1";
+
+    header.setValue("Host", url.host());
+    header.setValue("User-Agent", userAgent);
+
+    init(url, header);
+}
+
+
+void HttpComms::init(QUrl &url, QHttpRequestHeader &header)
+{
+    http = new QHttp();
+    Q_UINT16 port = 80;
+
+    if (url.hasPort()) 
+    {
+        port = url.port();
+    }
+    
+    http->setHost(url.host(), port);
+
+    m_done = false;
+    m_data = "";
+
+    connect(http, SIGNAL(done(bool)),
+            this, SLOT(done(bool)));
+    connect(http, SIGNAL(stateChanged(int)),
+            this, SLOT(stateChanged(int)));
+
+    http->request(header);
+}
+
+
+void HttpComms::stop()
+{
+    disconnect(http, 0, 0, 0);
+    http->abort();
+}
+
+
+void HttpComms::done(bool error)
+{
+    //printf("done %d\n", error);
+    if (error)
+    {
+       cout << "MythVideo: NetworkOperation Error on Finish: " + 
+           http->errorString() + ".\n";
+    }
+    else
+    {
+        m_data = QString(http->readAll());
+    }
+
+    m_done = true; 
+}
+
+
+void HttpComms::stateChanged ( int state )
+{
+//    printf("stateChanged %d\n", state);
+}
diff -uN /dev/null libmyth/httpcomms.h
--- /dev/null	1970-01-01 01:00:00.000000000 +0100
+++ libmyth/httpcomms.h	2003-09-02 00:14:52.000000000 +0200
@@ -0,0 +1,38 @@
+#ifndef INETCOMMS_H_
+#define INETCOMMS_H_
+
+#include <qhttp.h>
+#include <qfile.h>
+#include <qurl.h>
+#include <qobject.h>
+
+class HttpComms : public QObject
+{
+    Q_OBJECT
+  public:
+    HttpComms(QUrl &url);
+    HttpComms(QUrl &url, QHttpRequestHeader &header);
+    virtual ~HttpComms();
+
+    bool isDone(void) { return m_done; }
+
+    QString getData(void) { return m_data; }
+    void stop();
+
+ protected:
+    void init(QUrl &url);
+    void init(QUrl &url, QHttpRequestHeader &header);
+
+    
+  private slots:
+    void done(bool error);
+    void stateChanged ( int state );
+
+  private:
+    QHttp *http;
+    bool m_done;
+    QString m_data;
+};
+
+#endif
+
-------------- next part --------------
Index: mythvideo/videomanager.cpp
===================================================================
RCS file: /var/lib/mythcvs/mythvideo/mythvideo/videomanager.cpp,v
retrieving revision 1.13
diff -u -r1.13 videomanager.cpp
--- mythvideo/videomanager.cpp	25 Aug 2003 21:29:27 -0000	1.13
+++ mythvideo/videomanager.cpp	2 Sep 2003 10:23:40 -0000
@@ -10,6 +10,7 @@
 #include <qnetwork.h>
 #include <qurl.h>
 #include <qdir.h>
+#include <qurloperator.h>
 
 using namespace std;
 
@@ -51,7 +52,7 @@
     stopProcessing = false;
 
     m_state = 0;
-    InetGrabber = NULL;
+    httpGrabber = NULL;
 
     urlTimer = new QTimer(this);
     connect(urlTimer, SIGNAL(timeout()), SLOT(GetMovieListingTimeOut()));
@@ -97,10 +98,10 @@
 
 VideoManager::~VideoManager(void)
 {
-    if (InetGrabber)
+    if (httpGrabber)
     {
-        InetGrabber->stop();
-        delete InetGrabber;
+        httpGrabber->stop();
+        delete httpGrabber;
     }
     delete urlTimer;
 
@@ -333,27 +334,26 @@
 
     QString host = "www.imdb.com";
 
-    QUrl url("http://" + host + "/Posters?" + movieNum
-           + " HTTP/1.1\nHost: " + host + "\nUser-Agent: Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en)"
-           + " Gecko/25250101 Netscape/5.432b1\n");
+    QUrl url("http://" + host + "/title/tt" + movieNum + "/posters");
 
     //cout << "Grabbing Poster HTML From: " << url.toString() << endl;
 
-    if (InetGrabber)
+    if (httpGrabber)
     {
-        InetGrabber->stop();
-        delete InetGrabber;
+        httpGrabber->stop();
+        delete httpGrabber;
     }
 
-    InetGrabber = new INETComms(url);
+    httpGrabber = new HttpComms(url);
 
-    while (!InetGrabber->isDone())
+    while (!httpGrabber->isDone())
     {
         qApp->processEvents();
+        usleep(10000);
     }
 
     QString res;
-    res = InetGrabber->getData();
+    res = httpGrabber->getData();
 
     QString beg = "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" "
                   "background=\"http://posters.imdb.com/posters/";
@@ -400,27 +400,26 @@
     movieNumber = movieNum;
     QString host = "www.imdb.com";
 
-    QUrl url("http://" + host + "/Title?" + movieNum
-           + " HTTP/1.1\nHost: " + host + "\nUser-Agent: Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en)"
-           + " Gecko/25250101 Netscape/5.432b1\n");
+    QUrl url("http://" + host + "/title/tt" + movieNum + "/");
 
     //cout << "Grabbing Data From: " << url.toString() << endl;
 
-    if (InetGrabber)
+    if (httpGrabber)
     {
-        InetGrabber->stop();
-        delete InetGrabber;
+        httpGrabber->stop();
+        delete httpGrabber;
     }
 
-    InetGrabber = new INETComms(url);
+    httpGrabber = new HttpComms(url);
 
-    while (!InetGrabber->isDone())
+    while (!httpGrabber->isDone())
     {
         qApp->processEvents();
+        usleep(10000);
     }
 
     QString res;
-    res = InetGrabber->getData();
+    res = httpGrabber->getData();
 
     ParseMovieData(res);
 
@@ -432,36 +431,36 @@
     QString host = "us.imdb.com";
     theMovieName = movieName;
 
-    QUrl url("http://" + host + "/Tsearch?title=" + movieName + "&type=fuzzy&from_year=1890"
-           + "&to_year=2010&sort=smart&tv=off&x=12&y=14"
-	   + " HTTP/1.1\nHost: us.imdb.com\nUser-Agent: Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en)"
-	   + " Gecko/25250101 Netscape/5.432b1\n");
+    QUrl url("http://" + host + "/Tsearch?title=" + movieName 
+             + "&type=fuzzy&from_year=1890"
+             + "&to_year=2010&sort=smart&tv=off&x=12&y=14");
 
     //cout << "Grabbing Listing From: " << url.toString() << endl;
 
-    if (InetGrabber)
+    if (httpGrabber)
     {
-        InetGrabber->stop();
-        delete InetGrabber;
+        httpGrabber->stop();
+        delete httpGrabber;
     }
 
-    InetGrabber = new INETComms(url);
+    httpGrabber = new HttpComms(url);
 
     urlTimer->stop();
     urlTimer->start(10000);
 
     stopProcessing = false;
-    while (!InetGrabber->isDone())
+    while (!httpGrabber->isDone())
     {
         qApp->processEvents();
 	if (stopProcessing)
 		return 1;
+        usleep(10000);
     }
 
     urlTimer->stop();
 
     QString res;
-    res = InetGrabber->getData();
+    res = httpGrabber->getData();
 
     QString movies = parseData(res, "<A NAME=\"mov\">Movies</A></H2>", "</TABLE>");
 
@@ -506,7 +505,7 @@
     else 
         movieYear = mYear.toInt();
  
-    movieDirector = parseData(data, ">Directed by</b><br>\n<a href=\"/Name?", "</a><br>");
+    movieDirector = parseData(data, ">Directed by</b><br>\n<a href=\"/name/nm", "</a><br>");
     if (movieDirector != "<NULL>")
         movieDirector = movieDirector.right(movieDirector.length() - movieDirector.find("\">") - 2);
     moviePlot = parseData(data, "<b class=\"ch\">Plot Outline:</b> ", "<a href=\"");
Index: mythvideo/videomanager.h
===================================================================
RCS file: /var/lib/mythcvs/mythvideo/mythvideo/videomanager.h,v
retrieving revision 1.4
diff -u -r1.4 videomanager.h
--- mythvideo/videomanager.h	28 Jul 2003 15:49:07 -0000	1.4
+++ mythvideo/videomanager.h	2 Sep 2003 10:23:40 -0000
@@ -6,7 +6,7 @@
 #include <qapplication.h>
 #include <qstringlist.h>
 
-#include <mythtv/inetcomms.h>
+#include <mythtv/httpcomms.h>
 #include "metadata.h"
 #include <mythtv/mythwidgets.h>
 #include <qdom.h>
@@ -62,7 +62,7 @@
     QMap<QString, QString> parseMovieList(QString);
     void ResetCurrentItem();
 
-    INETComms *InetGrabber;
+    HttpComms *httpGrabber;
     void RefreshMovieList();
     QString ratingCountry;
     void GetMovieData(QString);


More information about the mythtv-dev mailing list