[mythtv] [PATCH] Mythvideo IMDB grabber
Mikael Magnusson
mikaelmagnusson at tjohoo.se
Tue Sep 2 14:19:12 EDT 2003
On Mon, Sep 01, 2003 at 02:39:10PM +0000, Andrew Peabody wrote:
> IMDB changed the url's slightly. I don't have them in front of me, but if
> I recall:
>
> Instead of /Title?{movienum} it is now /title/tt{movienum}
>
> For posters it is now /title/tt{movienum}/posters
>
> However, that change is easy and will only take a few seconds, the real
> issue issue is their new page checks for a http User-Agent. IMDB does not
> recognize the one from MythVideo, it shows up as part of the URL instead of
> being sent as part of the header (just as guess, but that is what it looks
> like from the debug message at the bottom of the error page). Either way
> IMDB does not see the User-Agent string and thinks MythVideo is an
> anonymous http client which they do not allow.
>
> I'm not too familiar with Qt, hopefully someone knows more about Qt knows
> more about how to send a User-Agent/Browser String successfully. I think
> Qhttp might be able to let the header be custom set, but I can't get it to
> work.
This patch contains the necessary changes to the IMDB grabber code.
The grabber now uses a new class called HttpComms, which replaces
INETComms in MythVideo. HttpComms uses QHttp instead of QUrlOperator,
as proposed.
The mythtv patch should be applied in mythtv/libs.
Regards,
Mikael Magnusson
-------------- next part --------------
? libmyth/httpcomms.cpp
? libmyth/httpcomms.h
Index: libmyth/libmyth.pro
===================================================================
RCS file: /var/lib/mythcvs/mythtv/libs/libmyth/libmyth.pro,v
retrieving revision 1.32
diff -u -r1.32 libmyth.pro
--- libmyth/libmyth.pro 18 Aug 2003 21:28:22 -0000 1.32
+++ libmyth/libmyth.pro 2 Sep 2003 10:35:30 -0000
@@ -12,19 +12,19 @@
HEADERS += dialogbox.h lcddevice.h mythcontext.h mythwidgets.h oldsettings.h
HEADERS += remotefile.h settings.h themedmenu.h util.h mythwizard.h
HEADERS += volumecontrol.h uitypes.h xmlparse.h mythplugin.h
-HEADERS += mythdialogs.h audiooutput.h audiooutputoss.h inetcomms.h
+HEADERS += mythdialogs.h audiooutput.h audiooutputoss.h inetcomms.h httpcomms.h
SOURCES += dialogbox.cpp lcddevice.cpp mythcontext.cpp mythwidgets.cpp
SOURCES += oldsettings.cpp remotefile.cpp settings.cpp themedmenu.cpp
SOURCES += util.cpp mythwizard.cpp volumecontrol.h uitypes.cpp xmlparse.cpp
SOURCES += mythplugin.cpp mythdialogs.cpp audiooutput.cpp audiooutputoss.cpp
-SOURCES += inetcomms.cpp
+SOURCES += inetcomms.cpp httpcomms.cpp
inc.path = $${PREFIX}/include/mythtv/
inc.files = dialogbox.h lcddevice.h themedmenu.h mythcontext.h
inc.files += mythwidgets.h remotefile.h util.h oldsettings.h volumecontrol.h
inc.files += settings.h uitypes.h xmlparse.h mythplugin.h mythdialogs.h
-inc.files += audiooutput.h inetcomms.h
+inc.files += audiooutput.h inetcomms.h httpcomms.h
using_alsa {
DEFINES += USE_ALSA
diff -uN /dev/null libmyth/httpcomms.cpp
--- /dev/null 1970-01-01 01:00:00.000000000 +0100
+++ libmyth/httpcomms.cpp 2003-09-02 00:50:52.000000000 +0200
@@ -0,0 +1,88 @@
+#include <iostream>
+using namespace std;
+
+#include "httpcomms.h"
+
+
+HttpComms::HttpComms(QUrl &url)
+{
+ init(url);
+}
+
+
+HttpComms::HttpComms(QUrl &url, QHttpRequestHeader &header)
+{
+ init(url, header);
+}
+
+
+HttpComms::~HttpComms()
+{
+ delete http;
+}
+
+
+void HttpComms::init(QUrl &url)
+{
+ QHttpRequestHeader header("GET", url.encodedPathAndQuery());
+ char *userAgent = "Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en) Gecko/25250101 Netscape/5.432b1";
+
+ header.setValue("Host", url.host());
+ header.setValue("User-Agent", userAgent);
+
+ init(url, header);
+}
+
+
+void HttpComms::init(QUrl &url, QHttpRequestHeader &header)
+{
+ http = new QHttp();
+ Q_UINT16 port = 80;
+
+ if (url.hasPort())
+ {
+ port = url.port();
+ }
+
+ http->setHost(url.host(), port);
+
+ m_done = false;
+ m_data = "";
+
+ connect(http, SIGNAL(done(bool)),
+ this, SLOT(done(bool)));
+ connect(http, SIGNAL(stateChanged(int)),
+ this, SLOT(stateChanged(int)));
+
+ http->request(header);
+}
+
+
+void HttpComms::stop()
+{
+ disconnect(http, 0, 0, 0);
+ http->abort();
+}
+
+
+void HttpComms::done(bool error)
+{
+ //printf("done %d\n", error);
+ if (error)
+ {
+ cout << "MythVideo: NetworkOperation Error on Finish: " +
+ http->errorString() + ".\n";
+ }
+ else
+ {
+ m_data = QString(http->readAll());
+ }
+
+ m_done = true;
+}
+
+
+void HttpComms::stateChanged ( int state )
+{
+// printf("stateChanged %d\n", state);
+}
diff -uN /dev/null libmyth/httpcomms.h
--- /dev/null 1970-01-01 01:00:00.000000000 +0100
+++ libmyth/httpcomms.h 2003-09-02 00:14:52.000000000 +0200
@@ -0,0 +1,38 @@
+#ifndef INETCOMMS_H_
+#define INETCOMMS_H_
+
+#include <qhttp.h>
+#include <qfile.h>
+#include <qurl.h>
+#include <qobject.h>
+
+class HttpComms : public QObject
+{
+ Q_OBJECT
+ public:
+ HttpComms(QUrl &url);
+ HttpComms(QUrl &url, QHttpRequestHeader &header);
+ virtual ~HttpComms();
+
+ bool isDone(void) { return m_done; }
+
+ QString getData(void) { return m_data; }
+ void stop();
+
+ protected:
+ void init(QUrl &url);
+ void init(QUrl &url, QHttpRequestHeader &header);
+
+
+ private slots:
+ void done(bool error);
+ void stateChanged ( int state );
+
+ private:
+ QHttp *http;
+ bool m_done;
+ QString m_data;
+};
+
+#endif
+
-------------- next part --------------
Index: mythvideo/videomanager.cpp
===================================================================
RCS file: /var/lib/mythcvs/mythvideo/mythvideo/videomanager.cpp,v
retrieving revision 1.13
diff -u -r1.13 videomanager.cpp
--- mythvideo/videomanager.cpp 25 Aug 2003 21:29:27 -0000 1.13
+++ mythvideo/videomanager.cpp 2 Sep 2003 10:23:40 -0000
@@ -10,6 +10,7 @@
#include <qnetwork.h>
#include <qurl.h>
#include <qdir.h>
+#include <qurloperator.h>
using namespace std;
@@ -51,7 +52,7 @@
stopProcessing = false;
m_state = 0;
- InetGrabber = NULL;
+ httpGrabber = NULL;
urlTimer = new QTimer(this);
connect(urlTimer, SIGNAL(timeout()), SLOT(GetMovieListingTimeOut()));
@@ -97,10 +98,10 @@
VideoManager::~VideoManager(void)
{
- if (InetGrabber)
+ if (httpGrabber)
{
- InetGrabber->stop();
- delete InetGrabber;
+ httpGrabber->stop();
+ delete httpGrabber;
}
delete urlTimer;
@@ -333,27 +334,26 @@
QString host = "www.imdb.com";
- QUrl url("http://" + host + "/Posters?" + movieNum
- + " HTTP/1.1\nHost: " + host + "\nUser-Agent: Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en)"
- + " Gecko/25250101 Netscape/5.432b1\n");
+ QUrl url("http://" + host + "/title/tt" + movieNum + "/posters");
//cout << "Grabbing Poster HTML From: " << url.toString() << endl;
- if (InetGrabber)
+ if (httpGrabber)
{
- InetGrabber->stop();
- delete InetGrabber;
+ httpGrabber->stop();
+ delete httpGrabber;
}
- InetGrabber = new INETComms(url);
+ httpGrabber = new HttpComms(url);
- while (!InetGrabber->isDone())
+ while (!httpGrabber->isDone())
{
qApp->processEvents();
+ usleep(10000);
}
QString res;
- res = InetGrabber->getData();
+ res = httpGrabber->getData();
QString beg = "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" "
"background=\"http://posters.imdb.com/posters/";
@@ -400,27 +400,26 @@
movieNumber = movieNum;
QString host = "www.imdb.com";
- QUrl url("http://" + host + "/Title?" + movieNum
- + " HTTP/1.1\nHost: " + host + "\nUser-Agent: Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en)"
- + " Gecko/25250101 Netscape/5.432b1\n");
+ QUrl url("http://" + host + "/title/tt" + movieNum + "/");
//cout << "Grabbing Data From: " << url.toString() << endl;
- if (InetGrabber)
+ if (httpGrabber)
{
- InetGrabber->stop();
- delete InetGrabber;
+ httpGrabber->stop();
+ delete httpGrabber;
}
- InetGrabber = new INETComms(url);
+ httpGrabber = new HttpComms(url);
- while (!InetGrabber->isDone())
+ while (!httpGrabber->isDone())
{
qApp->processEvents();
+ usleep(10000);
}
QString res;
- res = InetGrabber->getData();
+ res = httpGrabber->getData();
ParseMovieData(res);
@@ -432,36 +431,36 @@
QString host = "us.imdb.com";
theMovieName = movieName;
- QUrl url("http://" + host + "/Tsearch?title=" + movieName + "&type=fuzzy&from_year=1890"
- + "&to_year=2010&sort=smart&tv=off&x=12&y=14"
- + " HTTP/1.1\nHost: us.imdb.com\nUser-Agent: Mozilla/9.876 (X11; U; Linux 2.2.12-20 i686, en)"
- + " Gecko/25250101 Netscape/5.432b1\n");
+ QUrl url("http://" + host + "/Tsearch?title=" + movieName
+ + "&type=fuzzy&from_year=1890"
+ + "&to_year=2010&sort=smart&tv=off&x=12&y=14");
//cout << "Grabbing Listing From: " << url.toString() << endl;
- if (InetGrabber)
+ if (httpGrabber)
{
- InetGrabber->stop();
- delete InetGrabber;
+ httpGrabber->stop();
+ delete httpGrabber;
}
- InetGrabber = new INETComms(url);
+ httpGrabber = new HttpComms(url);
urlTimer->stop();
urlTimer->start(10000);
stopProcessing = false;
- while (!InetGrabber->isDone())
+ while (!httpGrabber->isDone())
{
qApp->processEvents();
if (stopProcessing)
return 1;
+ usleep(10000);
}
urlTimer->stop();
QString res;
- res = InetGrabber->getData();
+ res = httpGrabber->getData();
QString movies = parseData(res, "<A NAME=\"mov\">Movies</A></H2>", "</TABLE>");
@@ -506,7 +505,7 @@
else
movieYear = mYear.toInt();
- movieDirector = parseData(data, ">Directed by</b><br>\n<a href=\"/Name?", "</a><br>");
+ movieDirector = parseData(data, ">Directed by</b><br>\n<a href=\"/name/nm", "</a><br>");
if (movieDirector != "<NULL>")
movieDirector = movieDirector.right(movieDirector.length() - movieDirector.find("\">") - 2);
moviePlot = parseData(data, "<b class=\"ch\">Plot Outline:</b> ", "<a href=\"");
Index: mythvideo/videomanager.h
===================================================================
RCS file: /var/lib/mythcvs/mythvideo/mythvideo/videomanager.h,v
retrieving revision 1.4
diff -u -r1.4 videomanager.h
--- mythvideo/videomanager.h 28 Jul 2003 15:49:07 -0000 1.4
+++ mythvideo/videomanager.h 2 Sep 2003 10:23:40 -0000
@@ -6,7 +6,7 @@
#include <qapplication.h>
#include <qstringlist.h>
-#include <mythtv/inetcomms.h>
+#include <mythtv/httpcomms.h>
#include "metadata.h"
#include <mythtv/mythwidgets.h>
#include <qdom.h>
@@ -62,7 +62,7 @@
QMap<QString, QString> parseMovieList(QString);
void ResetCurrentItem();
- INETComms *InetGrabber;
+ HttpComms *httpGrabber;
void RefreshMovieList();
QString ratingCountry;
void GetMovieData(QString);
More information about the mythtv-dev
mailing list