CyTube/lib/get-info.js

938 lines
29 KiB
JavaScript
Raw Normal View History

var http = require("http");
var https = require("https");
var cheerio = require('cheerio');
2013-03-27 19:28:51 +00:00
var Logger = require("./logger.js");
2014-05-21 02:30:14 +00:00
var Media = require("./media");
var CustomEmbedFilter = require("./customembed").filter;
2013-10-11 21:31:40 +00:00
var Server = require("./server");
2014-01-22 23:11:26 +00:00
var Config = require("./config");
var ffmpeg = require("./ffmpeg");
var mediaquery = require("cytube-mediaquery");
var YouTube = require("cytube-mediaquery/lib/provider/youtube");
2013-10-11 21:31:40 +00:00
/*
* Preference map of quality => youtube formats.
* see https://en.wikipedia.org/wiki/Youtube#Quality_and_codecs
*
* Prefer WebM over MP4, ignore other codecs (e.g. FLV)
*/
const GOOGLE_PREFERENCE = {
"hd1080": [37, 46],
"hd720": [22, 45],
"large": [59, 44],
"medium": [18, 43, 34] // 34 is 360p FLV as a last-ditch
};
const CONTENT_TYPES = {
43: "webm",
44: "webm",
45: "webm",
46: "webm",
18: "mp4",
22: "mp4",
37: "mp4",
59: "mp4",
34: "flv"
};
2013-10-11 21:31:40 +00:00
var urlRetrieve = function (transport, options, callback) {
var req = transport.request(options, function (res) {
2015-01-06 15:54:14 +00:00
res.on("error", function (err) {
Logger.errlog.log("HTTP response " + options.host + options.path + " failed: "+
err);
callback(503, "");
});
var buffer = "";
res.setEncoding("utf-8");
res.on("data", function (chunk) {
buffer += chunk;
});
res.on("end", function () {
callback(res.statusCode, buffer);
});
});
2013-08-03 14:52:24 +00:00
req.on("error", function (err) {
Logger.errlog.log("HTTP request " + options.host + options.path + " failed: " +
err);
callback(503, "");
2013-10-11 21:31:40 +00:00
});
req.end();
2013-10-11 21:31:40 +00:00
};
var mediaTypeMap = {
"youtube": "yt",
"googledrive": "gd",
"google+": "gp"
};
function convertMedia(media) {
return new Media(media.id, media.title, media.duration, mediaTypeMap[media.type],
media.meta);
}
2013-10-11 21:31:40 +00:00
var Getters = {
/* youtube.com */
yt: function (id, callback) {
if (!Config.get("youtube-v3-key")) {
return Getters.yt2(id, callback);
2013-10-11 21:31:40 +00:00
}
YouTube.lookup(id).then(function (video) {
var meta = {};
if (video.meta.blocked) {
meta.restricted = video.meta.blocked;
}
var media = new Media(video.id, video.title, video.duration, "yt", meta);
callback(false, media);
}).catch(function (err) {
callback(err.message || err, null);
2013-10-11 21:31:40 +00:00
});
},
/* youtube.com playlists */
yp: function (id, callback) {
if (!Config.get("youtube-v3-key")) {
return Getters.yp2(id, callback);
2013-10-11 21:31:40 +00:00
}
YouTube.lookupPlaylist(id).then(function (videos) {
videos = videos.map(function (video) {
var meta = {};
if (video.meta.blocked) {
meta.restricted = video.meta.blocked;
}
return new Media(video.id, video.title, video.duration, "yt", meta);
});
callback(null, videos);
}).catch(function (err) {
callback(err.message || err, null);
2013-10-11 21:31:40 +00:00
});
},
/* youtube.com search */
ytSearch: function (query, callback) {
if (!Config.get("youtube-v3-key")) {
return Getters.ytSearch2(query.split(" "), callback);
2014-05-21 02:30:14 +00:00
}
2013-10-11 21:31:40 +00:00
YouTube.search(query).then(function (res) {
var videos = res.results;
videos = videos.map(function (video) {
var meta = {};
if (video.meta.blocked) {
meta.restricted = video.meta.blocked;
}
var media = new Media(video.id, video.title, video.duration, "yt", meta);
media.thumb = { url: video.meta.thumbnail };
return media;
});
callback(null, videos);
}).catch(function (err) {
callback(err.message || err, null);
2013-10-11 21:31:40 +00:00
});
},
/* vimeo.com */
vi: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
if (Config.get("vimeo-oauth.enabled")) {
return Getters.vi_oauth(id, callback);
}
2013-10-11 21:31:40 +00:00
var options = {
host: "vimeo.com",
port: 443,
path: "/api/v2/video/" + id + ".json",
method: "GET",
dataType: "jsonp",
timeout: 1000
};
urlRetrieve(https, options, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private video", null);
case 404:
return callback("Video not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
2013-08-01 03:26:11 +00:00
2013-10-11 21:31:40 +00:00
try {
data = JSON.parse(data);
data = data[0];
var seconds = data.duration;
var title = data.title;
var media = new Media(id, title, seconds, "vi");
callback(false, media);
} catch(e) {
var err = e;
2014-05-21 02:30:14 +00:00
/**
* This should no longer be necessary as the outer handler
* checks for HTTP 404
*/
if (buffer.match(/not found/))
2013-10-11 21:31:40 +00:00
err = "Video not found";
callback(err, null);
}
});
},
vi_oauth: function (id, callback) {
var OAuth = require("oauth");
var oa = new OAuth.OAuth(
"https://vimeo.com/oauth/request_token",
"https://vimeo.com/oauth/access_token",
Config.get("vimeo-oauth.consumer-key"),
Config.get("vimeo-oauth.secret"),
"1.0",
null,
"HMAC-SHA1"
);
oa.get("https://vimeo.com/api/rest/v2?format=json" +
"&method=vimeo.videos.getInfo&video_id=" + id,
null,
null,
function (err, data, res) {
if (err) {
return callback(err, null);
}
try {
data = JSON.parse(data);
if (data.stat !== "ok") {
return callback(data.err.msg, null);
}
var video = data.video[0];
if (video.embed_privacy !== "anywhere") {
return callback("Embedding disabled", null);
}
var id = video.id;
var seconds = parseInt(video.duration);
var title = video.title;
callback(null, new Media(id, title, seconds, "vi"));
} catch (e) {
callback("Error handling Vimeo response", null);
}
});
},
2013-10-11 21:31:40 +00:00
/* dailymotion.com */
dm: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1].split("_")[0];
2013-10-11 21:31:40 +00:00
} else {
callback("Invalid ID", null);
return;
}
var options = {
host: "api.dailymotion.com",
port: 443,
path: "/video/" + id + "?fields=duration,title",
method: "GET",
dataType: "jsonp",
timeout: 1000
};
urlRetrieve(https, options, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private video", null);
case 404:
return callback("Video not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
2013-08-01 03:26:11 +00:00
2013-10-11 21:31:40 +00:00
try {
data = JSON.parse(data);
var title = data.title;
var seconds = data.duration;
2014-05-21 02:30:14 +00:00
/**
* This is a rather hacky way to indicate that a video has
* been deleted...
*/
if (title === "Deleted video" && seconds === 10) {
2013-10-07 05:10:16 +00:00
callback("Video not found", null);
return;
2013-08-01 03:26:11 +00:00
}
2013-10-11 21:31:40 +00:00
var media = new Media(id, title, seconds, "dm");
callback(false, media);
} catch(e) {
2014-06-25 20:13:54 +00:00
callback(e, null);
2013-10-11 21:31:40 +00:00
}
});
},
/* soundcloud.com */
sc: function (id, callback) {
2014-05-21 02:30:14 +00:00
/* TODO: require server owners to register their own API key, put in config */
2013-10-11 21:31:40 +00:00
const SC_CLIENT = "2e0c82ab5a020f3a7509318146128abd";
var m = id.match(/([\w-\/\.:]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
2013-08-01 03:26:11 +00:00
2013-10-11 21:31:40 +00:00
var options = {
host: "api.soundcloud.com",
port: 443,
path: "/resolve.json?url=" + id + "&client_id=" + SC_CLIENT,
method: "GET",
dataType: "jsonp",
timeout: 1000
};
urlRetrieve(https, options, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
case 302:
2014-05-21 02:30:14 +00:00
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private sound", null);
case 404:
return callback("Sound not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
2013-10-11 21:31:40 +00:00
}
2013-10-11 21:31:40 +00:00
var track = null;
try {
data = JSON.parse(data);
track = data.location;
} catch(e) {
callback(e, null);
return;
}
2013-10-11 21:31:40 +00:00
var options2 = {
host: "api.soundcloud.com",
port: 443,
2013-10-11 21:31:40 +00:00
path: track,
method: "GET",
dataType: "jsonp",
timeout: 1000
};
2014-05-21 02:30:14 +00:00
/**
* There has got to be a way to directly get the data I want without
* making two requests to Soundcloud...right?
* ...right?
*/
2013-10-11 21:31:40 +00:00
urlRetrieve(https, options2, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private sound", null);
case 404:
return callback("Sound not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
try {
data = JSON.parse(data);
2013-10-11 21:31:40 +00:00
var seconds = data.duration / 1000;
var title = data.title;
var meta = {};
if (data.sharing === "private" && data.embeddable_by === "all") {
meta.scuri = data.uri;
}
var media = new Media(id, title, seconds, "sc", meta);
2013-10-11 21:31:40 +00:00
callback(false, media);
} catch(e) {
2013-10-07 05:10:16 +00:00
callback(e, null);
}
});
2013-10-11 21:31:40 +00:00
});
},
/* livestream.com */
li: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var title = "Livestream.com - " + id;
var media = new Media(id, title, "--:--", "li");
callback(false, media);
},
/* twitch.tv */
tw: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var title = "Twitch.tv - " + id;
var media = new Media(id, title, "--:--", "tw");
callback(false, media);
},
/* ustream.tv */
us: function (id, callback) {
2014-05-21 02:30:14 +00:00
/**
*2013-09-17
* They couldn't fucking decide whether channels should
* be at http://www.ustream.tv/channel/foo or just
* http://www.ustream.tv/foo so they do both.
* [](/cleese)
*/
2013-10-11 21:31:40 +00:00
var m = id.match(/([^\?&#]+)|(channel\/[^\?&#]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
2014-05-21 02:30:14 +00:00
2013-10-11 21:31:40 +00:00
var options = {
host: "www.ustream.tv",
port: 80,
path: "/" + id,
method: "GET",
timeout: 1000
};
urlRetrieve(http, options, function (status, data) {
if(status !== 200) {
callback("Ustream HTTP " + status, null);
return;
}
2014-05-21 02:30:14 +00:00
/**
* Regexing the ID out of the HTML because
* Ustream's API is so horribly documented
* I literally could not figure out how to retrieve
* this information.
*
* [](/eatadick)
*/
2015-03-23 23:10:00 +00:00
var m = data.match(/https:\/\/www\.ustream\.tv\/embed\/(\d+)/);
if (m) {
2013-10-11 21:31:40 +00:00
var title = "Ustream.tv - " + id;
var media = new Media(m[1], title, "--:--", "us");
callback(false, media);
} else {
2015-03-23 23:10:00 +00:00
callback("Channel ID not found", null);
}
2013-10-11 21:31:40 +00:00
});
},
/* JWPlayer */
jw: function (id, callback) {
var title = "JWPlayer - " + id;
var media = new Media(id, title, "--:--", "jw");
callback(false, media);
},
/* rtmp stream */
rt: function (id, callback) {
var title = "Livestream";
var media = new Media(id, title, "--:--", "rt");
callback(false, media);
},
/* imgur.com albums */
im: function (id, callback) {
2014-05-21 02:30:14 +00:00
/**
* TODO: Consider deprecating this in favor of custom embeds
*/
2013-10-11 21:31:40 +00:00
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
2013-10-11 21:31:40 +00:00
var title = "Imgur Album - " + id;
var media = new Media(id, title, "--:--", "im");
callback(false, media);
},
/* custom embed */
cu: function (id, callback) {
2015-06-18 22:46:33 +00:00
var media;
try {
media = CustomEmbedFilter(id);
} catch (e) {
if (/invalid embed/i.test(e.message)) {
return callback(e.message);
} else {
Logger.errlog.log(e.stack);
return callback("Unknown error processing embed");
}
}
2013-10-11 21:31:40 +00:00
callback(false, media);
2013-11-07 23:19:36 +00:00
},
/* google docs */
gd: function (id, callback) {
var data = {
type: "googledrive",
kind: "single",
id: id
2013-11-07 23:19:36 +00:00
};
mediaquery.lookup(data).then(function (video) {
callback(null, convertMedia(video));
}).catch(function (err) {
callback(err.message || err);
2013-11-07 23:19:36 +00:00
});
},
2015-05-15 06:19:08 +00:00
/* Google+ videos */
gp: function (id, callback) {
var data = {
type: "google+",
kind: "single",
id: id
};
mediaquery.lookup(data).then(function (video) {
callback(null, convertMedia(video));
}).catch(function (err) {
callback(err.message || err);
});
},
/* ffmpeg for raw files */
fi: function (id, cb) {
ffmpeg.query(id, function (err, data) {
if (err) {
return cb(err);
}
var m = new Media(id, data.title, data.duration, "fi", {
bitrate: data.bitrate,
codec: data.codec
});
cb(null, m);
});
2014-07-10 03:41:11 +00:00
},
2015-01-23 05:21:31 +00:00
/* hitbox.tv */
hb: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var title = "Hitbox.tv - " + id;
var media = new Media(id, title, "--:--", "hb");
callback(false, media);
},
/* youtube.com - old v2 API */
yt2: function (id, callback) {
var sv = Server.getServer();
var m = id.match(/([\w-]{11})/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var options = {
host: "gdata.youtube.com",
port: 443,
path: "/feeds/api/videos/" + id + "?v=2&alt=json",
method: "GET",
dataType: "jsonp",
timeout: 1000
};
if (Config.get("youtube-v2-key")) {
options.headers = {
"X-Gdata-Key": "key=" + Config.get("youtube-v2-key")
};
}
urlRetrieve(https, options, function (status, data) {
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private video", null);
case 404:
return callback("Video not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
var buffer = data;
try {
data = JSON.parse(data);
/* Check for embedding restrictions */
if (data.entry.yt$accessControl) {
var ac = data.entry.yt$accessControl;
for (var i = 0; i < ac.length; i++) {
if (ac[i].action === "embed") {
if (ac[i].permission === "denied") {
callback("Embedding disabled", null);
return;
}
break;
}
}
}
var seconds = data.entry.media$group.yt$duration.seconds;
var title = data.entry.title.$t;
var meta = {};
/* Check for country restrictions */
if (data.entry.media$group.media$restriction) {
var rest = data.entry.media$group.media$restriction;
if (rest.length > 0) {
if (rest[0].relationship === "deny") {
meta.restricted = rest[0].$t;
}
}
}
var media = new Media(id, title, seconds, "yt", meta);
callback(false, media);
} catch (e) {
// Gdata version 2 has the rather silly habit of
// returning error codes in XML when I explicitly asked
// for JSON
var m = buffer.match(/<internalReason>([^<]+)<\/internalReason>/);
if (m === null)
m = buffer.match(/<code>([^<]+)<\/code>/);
var err = e;
if (m) {
if(m[1] === "too_many_recent_calls") {
err = "YouTube is throttling the server right "+
"now for making too many requests. "+
"Please try again in a moment.";
} else {
err = m[1];
}
}
callback(err, null);
}
});
},
/* youtube.com playlists - old v2 api */
yp2: function (id, callback, url) {
/**
* NOTE: callback may be called multiple times, once for each <= 25 video
* batch of videos in the list. It will be called in order.
*/
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var path = "/feeds/api/playlists/" + id + "?v=2&alt=json";
/**
* NOTE: the third parameter, url, is used to chain this retriever
* multiple times to get all the videos from a playlist, as each
* request only returns 25 videos.
*/
if (url !== undefined) {
path = "/" + url.split("gdata.youtube.com")[1];
}
var options = {
host: "gdata.youtube.com",
port: 443,
path: path,
method: "GET",
dataType: "jsonp",
timeout: 1000
};
if (Config.get("youtube-v2-key")) {
options.headers = {
"X-Gdata-Key": "key=" + Config.get("youtube-v2-key")
};
}
urlRetrieve(https, options, function (status, data) {
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private playlist", null);
case 404:
return callback("Playlist not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
try {
data = JSON.parse(data);
var vids = [];
for(var i in data.feed.entry) {
try {
/**
* FIXME: This should probably check for embed restrictions
* and country restrictions on each video in the list
*/
var item = data.feed.entry[i];
var id = item.media$group.yt$videoid.$t;
var title = item.title.$t;
var seconds = item.media$group.yt$duration.seconds;
var media = new Media(id, title, seconds, "yt");
vids.push(media);
} catch(e) {
}
}
callback(false, vids);
var links = data.feed.link;
for (var i in links) {
if (links[i].rel === "next") {
/* Look up the next batch of videos from the list */
Getters["yp2"](id, callback, links[i].href);
}
}
} catch (e) {
callback(e, null);
}
});
},
/* youtube.com search - old v2 api */
ytSearch2: function (terms, callback) {
/**
* terms is a list of words from the search query. Each word must be
* encoded properly for use in the request URI
*/
for (var i in terms) {
terms[i] = encodeURIComponent(terms[i]);
}
var query = terms.join("+");
var options = {
host: "gdata.youtube.com",
port: 443,
path: "/feeds/api/videos/?q=" + query + "&v=2&alt=json",
method: "GET",
dataType: "jsonp",
timeout: 1000
};
if (Config.get("youtube-v2-key")) {
options.headers = {
"X-Gdata-Key": "key=" + Config.get("youtube-v2-key")
};
}
urlRetrieve(https, options, function (status, data) {
if (status !== 200) {
callback("YouTube search: HTTP " + status, null);
return;
}
try {
data = JSON.parse(data);
var vids = [];
for(var i in data.feed.entry) {
try {
/**
* FIXME: This should probably check for embed restrictions
* and country restrictions on each video in the list
*/
var item = data.feed.entry[i];
var id = item.media$group.yt$videoid.$t;
var title = item.title.$t;
var seconds = item.media$group.yt$duration.seconds;
var media = new Media(id, title, seconds, "yt");
media.thumb = item.media$group.media$thumbnail[0];
vids.push(media);
} catch(e) {
}
}
callback(false, vids);
} catch(e) {
callback(e, null);
}
});
},
2013-10-11 21:31:40 +00:00
};
/**
* Function to workaround Vimeo being a dick and blocking my domain from embeds.
* Retrieves the player page and extracts the direct links to the MP4 encoded videos.
*/
function vimeoWorkaround(id, cb) {
if (typeof cb !== "function") {
return;
}
var failcount = 0;
var inner = function () {
var options = {
host: "player.vimeo.com",
path: "/video/" + id,
headers: {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0",
"Referrer": "player.vimeo.com"
}
};
var parse = function (data) {
2014-02-05 04:02:27 +00:00
var i = data.indexOf("{\"cdn_url\"");
if (i === -1) {
setImmediate(function () {
cb({});
});
return;
}
var j = data.indexOf("};", i);
2014-02-05 04:02:27 +00:00
var json = data.substring(i, j+1);
try {
json = JSON.parse(json);
if (!json.request.files) {
setImmediate(function () {
cb({});
});
return;
}
var codec = json.request.files.codecs[0];
var files = json.request.files[codec];
setImmediate(function () {
cb(files);
});
} catch (e) {
// This shouldn't happen due to the user-agent, but just in case
if (data.indexOf("crawler") !== -1) {
2014-05-21 02:30:14 +00:00
Logger.syslog.log("Warning: vimdeoWorkaround got crawler response");
failcount++;
if (failcount > 4) {
2014-05-21 02:30:14 +00:00
Logger.errlog.log("vimeoWorkaround got bad response 5 times!"+
" Giving up.");
setImmediate(function () {
cb({});
});
} else {
setImmediate(function () {
inner();
});
}
return;
} else if (data.indexOf("This video does not exist.") !== -1) {
cb({});
return;
} else if (data.indexOf("Because of its privacy settings, this video cannot be played here.") !== -1) {
cb({});
}
Logger.errlog.log("Vimeo workaround error: ");
Logger.errlog.log(e);
2014-02-05 04:02:27 +00:00
Logger.errlog.log("http://vimeo.com/" + id);
setImmediate(function () {
cb({});
});
}
};
2015-01-06 15:54:14 +00:00
urlRetrieve(http, options, function (status, buffer) {
if (status !== 200) {
setImmediate(function () {
cb({});
});
return;
}
2015-01-06 15:54:14 +00:00
parse(buffer);
});
};
inner();
}
2013-10-11 21:31:40 +00:00
module.exports = {
Getters: Getters,
getMedia: function (id, type, callback) {
if(type in this.Getters) {
this.Getters[type](id, callback);
} else {
callback("Unknown media type '" + type + "'", null);
}
},
vimeoWorkaround: vimeoWorkaround
2013-10-11 21:31:40 +00:00
};