summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYaohan Chen <yaohan.chen@gmail.com>2014-04-01 01:21:59 -0400
committerYaohan Chen <yaohan.chen@gmail.com>2014-04-01 01:31:53 -0400
commit23fcf1acc62e0f32f479c12c1916b2b1dd922199 (patch)
tree96ddaf09bb3cb901a9d9a0300bf2c14a37586de9
parentbb224302e1720bf7119c5b3a2e3c394ce989ecc7 (diff)
downloadhib-dlagent-23fcf1acc62e0f32f479c12c1916b2b1dd922199.tar.gz
hib-dlagent-23fcf1acc62e0f32f479c12c1916b2b1dd922199.zip
Support the new Humble Bundle page
Use PhantomJS to handle the dynamically generated page and captchas. Add options to specify the location of PhantomJS scripts and configuration file. Update README on requirements, usage, and debugging information. Update CHANGELOG.
-rw-r--r--CHANGELOG3
-rw-r--r--README26
-rw-r--r--discover-url.coffee21
-rwxr-xr-xhib-dlagent22
-rw-r--r--login.coffee12
-rw-r--r--phantomjs-config.json9
-rw-r--r--util.coffee71
7 files changed, 154 insertions, 10 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 2cf8f08..4af156e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,6 @@
+Version 0.6 - 2014/04/01
+ * Handles the new Humble Bundle page using PhantomJS
+
Version 0.5 - 2014/01/12
* Fixed handling of passwords containing special characters
* Added interactive password prompting when password not provided as argument
diff --git a/README b/README
index e621bb2..ce309da 100644
--- a/README
+++ b/README
@@ -2,22 +2,32 @@ Tool to download Humble Indie Bundle binaries by file name
Author: Eric Anderson <ejona86@gmail.com>
Contributor: Thomas Riccardi <riccardi.thomas@gmail.com>
Contributor: Devin J. Pohly <djpohly@gmail.com>
+Contributor: Yaohan Chen <yaohan.chen@gmail.com>
Description
===========
Primarily for use as a DLAGENT in makepkg.conf in Arch Linux, but generally
useful when needing to download a particular Humble Bundle file via a script.
-The script does very little other than argument parsing; it effectively has only
-two "real" lines of functionality.
It also manages a storage directory where already downloaded file can be found.
Installation and Usage
======================
-Run the script directly or copy the script to a location like /usr/bin/ for all
-users to use.
+This utility currently requires PhantomJS in order to extract links on Humble
+Bundle webpages generated by JavaScript. It also uses 'display', part of
+ImageMagick, to handle any captchas. (It does not solve captchas for you, but
+only displays a screenshot of the web page and asks you to enter the answer.)
+
+To run hib-dlagent without installating it, add parameters '-S. -c.', which tells
+it to look for scripts and the configuration file in the current directory.
+
+By default hib-dlagent assumes the .coffee scripts are installed in
+/usr/share/hib-dlagent, and the .json configuration file is installed in
+/etc/hib-dlagent. If they are installed in other locations, it is necessary
+to either run hib-dlagent with the correct -S and -c parameters, or edit the
+variables SCRIPT_PATH and CONFIG_PATH in hib-dlagent.
The tool uses curl to download the file, or can simply provide the URL needed to
download the file so a different HTTP downloader such a Wget can be used.
@@ -28,3 +38,11 @@ DLAGENTS=(...
...)
Run with the -h argument for more information.
+
+Troubleshooting
+===============
+
+Extracting URLs from Humble Bundle's mostly dynamically-generated website can be
+error-prone, and time consuming. Currently it can take about 20 seconds for the
+script to run. If the script fails or gets stuck, it may help to turn on logging
+by setting the LOG environment variable to a non-empty value.
diff --git a/discover-url.coffee b/discover-url.coffee
new file mode 100644
index 0000000..cf8627d
--- /dev/null
+++ b/discover-url.coffee
@@ -0,0 +1,21 @@
+system = require 'system'
+[_, listing_page, filename, username, password] = system.args
+
+util = require './util'
+page = util.page
+
+page.open listing_page, (status) ->
+ util.log "Open listing page: #{status}"
+ util.handle_login_captcha ->
+ util.log 'Searching URLs...'
+ url = page.evaluate (filename) ->
+ # Characters in filename may need to be escaped for use in a selector
+ found = document.querySelector ".downloads.linux a[href*='#{filename}']"
+ found and found.getAttribute('href')
+ , filename
+ util.log "Found URL: #{url}"
+ if url
+ system.stdout.writeLine url
+ phantom.exit()
+ , username, password
+
diff --git a/hib-dlagent b/hib-dlagent
index fe09339..5c6a8a5 100755
--- a/hib-dlagent
+++ b/hib-dlagent
@@ -6,6 +6,9 @@ LOGIN_PAGE=https://www.humblebundle.com/login
HOME_PAGE=https://www.humblebundle.com/home
COOKIE_JAR=
+SCRIPT_PATH='/usr/share/hib-dlagent'
+CONFIG_PATH='/etc/hib-dlagent'
+
FILE=
DESTINATION=
DOWNLOAD=1
@@ -19,15 +22,14 @@ login() {
read -rsp 'Enter Humble account password: ' PASSWORD
echo
fi
- printf '%s' "$PASSWORD" | \
- curl -s --cookie-jar "$COOKIE_JAR" \
- --data-urlencode "username=$USERNAME" --data-urlencode password@- "$LOGIN_PAGE"
+ phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \
+ "$SCRIPT_PATH"/login.coffee "$LOGIN_PAGE" "$USERNAME" "$PASSWORD"
}
discover_url() {
local LISTING_PAGE="$1"
- curl -s --cookie "$COOKIE_JAR" "$LISTING_PAGE" | grep "/$FILE?" | grep 'data-web=' | \
- sed -e "s/.* data-web='\([^']*\)'.*/\1/" | head -n 1
+ phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \
+ "$SCRIPT_PATH"/discover-url.coffee "$LISTING_PAGE" "$FILE" "$USERNAME" "$PASSWORD"
}
usage() {
@@ -47,6 +49,8 @@ Options:
-s Print URL to stdout instead of downloading. Incompatible with -d
-u <user> Use user to login. Search account's files. If specified multiple
times, the last is used
+ -S <dir> Directory where PhantomJs scripts are located.
+ -c <dir> Directory where configuration files are located.
If you specify -u, then all of that account's bundles are searched. If a key is
associated with a HIB account then you must use -u/-p, since that key only works
@@ -102,7 +106,7 @@ main() {
exit 1
fi
- while getopts "hd:k:o:p:P:su:" opt; do
+ while getopts "hd:k:o:p:P:su:S:c:" opt; do
case $opt in
\?)
exit 1
@@ -132,6 +136,12 @@ main() {
u)
USERNAME="$OPTARG"
;;
+ S)
+ SCRIPT_PATH="$OPTARG"
+ ;;
+ c)
+ CONFIG_PATH="$OPTARG"
+ ;;
esac
done
diff --git a/login.coffee b/login.coffee
new file mode 100644
index 0000000..c129622
--- /dev/null
+++ b/login.coffee
@@ -0,0 +1,12 @@
+system = require 'system'
+[_, login_page, username, password] = system.args
+
+util = require './util'
+page = util.page
+
+page.open login_page, (status)->
+ util.log "Opening login page: #{status}"
+ util.handle_login_captcha ->
+ phantom.exit()
+ , username, password
+
diff --git a/phantomjs-config.json b/phantomjs-config.json
new file mode 100644
index 0000000..559194a
--- /dev/null
+++ b/phantomjs-config.json
@@ -0,0 +1,9 @@
+{
+// "autoLoadImages": false,
+ "webSecurityEnabled": false,
+ "ignoreSslErrors": true,
+ "proxyType": "none",
+ "diskCacheEnabled": true,
+ "debug": true,
+ "libraryPath": ".",
+}
diff --git a/util.coffee b/util.coffee
new file mode 100644
index 0000000..169cc35
--- /dev/null
+++ b/util.coffee
@@ -0,0 +1,71 @@
+system = require('system')
+
+# prints a log message if the LOG environment variable is set
+if system.env.LOG?
+ exports.log = log = system.stderr.writeLine
+else
+ exports.log = log = ->
+
+# prints a message to stderr, reads a line of input, and returns the input
+exports.ask = ask = (message) ->
+ system.stderr.write message
+ system.stdin.readLine()
+
+# a page that directs its console messages to exportss.log
+exports.page = page = require('webpage').create()
+page.onConsoleMessage = log
+
+child_process = require('child_process')
+
+# displays a screenshot of the page, and returns the 'display' process object
+exports.display_screenshot = display_screenshot = ->
+ # FIXME use mktemp, or write to display process directly
+ screenshot = '/tmp/hib-dlagent-phantomjs.png'
+ page.render screenshot
+ child_process.spawn 'display', [screenshot]
+
+# handles login/captcha boxes, and calls the passed action() when logged in
+exports.handle_login_captcha = handle_login_captcha = (action, username, password) ->
+ need_to_submit = false
+
+ # complete a login form if there is one
+ if page.evaluate(-> document.querySelector 'input[name="username"]')
+ log 'Entering login information...'
+ page.evaluate (username, password) ->
+ username_box = document.querySelector 'input[name="username"]'
+ password_box = document.querySelector 'input[name="password"]'
+ if username_box
+ username_box.value = username
+ if password_box
+ password_box.value = password
+ , username, password
+ need_to_submit = true
+
+ # handle a captcha box if there is one
+ if page.evaluate(-> document.querySelector '#recaptcha_response_field')
+ log 'Humble Bundle wants you to solve a captcha. Displaying screenshot...'
+ display_process = display_screenshot()
+ input = ask 'Enter the captcha solution, or press Enter to get a new challenge: '
+ display_process.kill()
+
+ page.evaluate (input)->
+ if input is ''
+ Recaptcha.reload()
+ else
+ captcha_box = document.querySelector '#recaptcha_response_field'
+ captcha_box.value = input
+ , input
+ need_to_submit = true
+
+ if need_to_submit
+ # Entered information, submit and check for captcha/login again after load finishes
+ log 'Submitting login information and/or captcha response...'
+ page.onLoadFinished = -> handle_login_captcha action, username, password
+ page.evaluate ->
+ form = document.querySelector('form')
+ if form
+ form.submit()
+ else
+ log 'Logged in...'
+ action()
+