summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG3
-rw-r--r--README26
-rw-r--r--discover-url.coffee21
-rwxr-xr-xhib-dlagent22
-rw-r--r--login.coffee12
-rw-r--r--phantomjs-config.json9
-rw-r--r--util.coffee71
7 files changed, 154 insertions, 10 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 2cf8f08..4af156e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,6 @@
+Version 0.6 - 2014/04/01
+ * Handles the new Humble Bundle page using PhantomJS
+
Version 0.5 - 2014/01/12
* Fixed handling of passwords containing special characters
* Added interactive password prompting when password not provided as argument
diff --git a/README b/README
index e621bb2..ce309da 100644
--- a/README
+++ b/README
@@ -2,22 +2,32 @@ Tool to download Humble Indie Bundle binaries by file name
Author: Eric Anderson <ejona86@gmail.com>
Contributor: Thomas Riccardi <riccardi.thomas@gmail.com>
Contributor: Devin J. Pohly <djpohly@gmail.com>
+Contributor: Yaohan Chen <yaohan.chen@gmail.com>
Description
===========
Primarily for use as a DLAGENT in makepkg.conf in Arch Linux, but generally
useful when needing to download a particular Humble Bundle file via a script.
-The script does very little other than argument parsing; it effectively has only
-two "real" lines of functionality.
It also manages a storage directory where already downloaded file can be found.
Installation and Usage
======================
-Run the script directly or copy the script to a location like /usr/bin/ for all
-users to use.
+This utility currently requires PhantomJS in order to extract links on Humble
+Bundle webpages generated by JavaScript. It also uses 'display', part of
+ImageMagick, to handle any captchas. (It does not solve captchas for you, but
+only displays a screenshot of the web page and asks you to enter the answer.)
+
+To run hib-dlagent without installating it, add parameters '-S. -c.', which tells
+it to look for scripts and the configuration file in the current directory.
+
+By default hib-dlagent assumes the .coffee scripts are installed in
+/usr/share/hib-dlagent, and the .json configuration file is installed in
+/etc/hib-dlagent. If they are installed in other locations, it is necessary
+to either run hib-dlagent with the correct -S and -c parameters, or edit the
+variables SCRIPT_PATH and CONFIG_PATH in hib-dlagent.
The tool uses curl to download the file, or can simply provide the URL needed to
download the file so a different HTTP downloader such a Wget can be used.
@@ -28,3 +38,11 @@ DLAGENTS=(...
...)
Run with the -h argument for more information.
+
+Troubleshooting
+===============
+
+Extracting URLs from Humble Bundle's mostly dynamically-generated website can be
+error-prone, and time consuming. Currently it can take about 20 seconds for the
+script to run. If the script fails or gets stuck, it may help to turn on logging
+by setting the LOG environment variable to a non-empty value.
diff --git a/discover-url.coffee b/discover-url.coffee
new file mode 100644
index 0000000..cf8627d
--- /dev/null
+++ b/discover-url.coffee
@@ -0,0 +1,21 @@
+system = require 'system'
+[_, listing_page, filename, username, password] = system.args
+
+util = require './util'
+page = util.page
+
+page.open listing_page, (status) ->
+ util.log "Open listing page: #{status}"
+ util.handle_login_captcha ->
+ util.log 'Searching URLs...'
+ url = page.evaluate (filename) ->
+ # Characters in filename may need to be escaped for use in a selector
+ found = document.querySelector ".downloads.linux a[href*='#{filename}']"
+ found and found.getAttribute('href')
+ , filename
+ util.log "Found URL: #{url}"
+ if url
+ system.stdout.writeLine url
+ phantom.exit()
+ , username, password
+
diff --git a/hib-dlagent b/hib-dlagent
index fe09339..5c6a8a5 100755
--- a/hib-dlagent
+++ b/hib-dlagent
@@ -6,6 +6,9 @@ LOGIN_PAGE=https://www.humblebundle.com/login
HOME_PAGE=https://www.humblebundle.com/home
COOKIE_JAR=
+SCRIPT_PATH='/usr/share/hib-dlagent'
+CONFIG_PATH='/etc/hib-dlagent'
+
FILE=
DESTINATION=
DOWNLOAD=1
@@ -19,15 +22,14 @@ login() {
read -rsp 'Enter Humble account password: ' PASSWORD
echo
fi
- printf '%s' "$PASSWORD" | \
- curl -s --cookie-jar "$COOKIE_JAR" \
- --data-urlencode "username=$USERNAME" --data-urlencode password@- "$LOGIN_PAGE"
+ phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \
+ "$SCRIPT_PATH"/login.coffee "$LOGIN_PAGE" "$USERNAME" "$PASSWORD"
}
discover_url() {
local LISTING_PAGE="$1"
- curl -s --cookie "$COOKIE_JAR" "$LISTING_PAGE" | grep "/$FILE?" | grep 'data-web=' | \
- sed -e "s/.* data-web='\([^']*\)'.*/\1/" | head -n 1
+ phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \
+ "$SCRIPT_PATH"/discover-url.coffee "$LISTING_PAGE" "$FILE" "$USERNAME" "$PASSWORD"
}
usage() {
@@ -47,6 +49,8 @@ Options:
-s Print URL to stdout instead of downloading. Incompatible with -d
-u <user> Use user to login. Search account's files. If specified multiple
times, the last is used
+ -S <dir> Directory where PhantomJs scripts are located.
+ -c <dir> Directory where configuration files are located.
If you specify -u, then all of that account's bundles are searched. If a key is
associated with a HIB account then you must use -u/-p, since that key only works
@@ -102,7 +106,7 @@ main() {
exit 1
fi
- while getopts "hd:k:o:p:P:su:" opt; do
+ while getopts "hd:k:o:p:P:su:S:c:" opt; do
case $opt in
\?)
exit 1
@@ -132,6 +136,12 @@ main() {
u)
USERNAME="$OPTARG"
;;
+ S)
+ SCRIPT_PATH="$OPTARG"
+ ;;
+ c)
+ CONFIG_PATH="$OPTARG"
+ ;;
esac
done
diff --git a/login.coffee b/login.coffee
new file mode 100644
index 0000000..c129622
--- /dev/null
+++ b/login.coffee
@@ -0,0 +1,12 @@
+system = require 'system'
+[_, login_page, username, password] = system.args
+
+util = require './util'
+page = util.page
+
+page.open login_page, (status)->
+ util.log "Opening login page: #{status}"
+ util.handle_login_captcha ->
+ phantom.exit()
+ , username, password
+
diff --git a/phantomjs-config.json b/phantomjs-config.json
new file mode 100644
index 0000000..559194a
--- /dev/null
+++ b/phantomjs-config.json
@@ -0,0 +1,9 @@
+{
+// "autoLoadImages": false,
+ "webSecurityEnabled": false,
+ "ignoreSslErrors": true,
+ "proxyType": "none",
+ "diskCacheEnabled": true,
+ "debug": true,
+ "libraryPath": ".",
+}
diff --git a/util.coffee b/util.coffee
new file mode 100644
index 0000000..169cc35
--- /dev/null
+++ b/util.coffee
@@ -0,0 +1,71 @@
+system = require('system')
+
+# prints a log message if the LOG environment variable is set
+if system.env.LOG?
+ exports.log = log = system.stderr.writeLine
+else
+ exports.log = log = ->
+
+# prints a message to stderr, reads a line of input, and returns the input
+exports.ask = ask = (message) ->
+ system.stderr.write message
+ system.stdin.readLine()
+
+# a page that directs its console messages to exportss.log
+exports.page = page = require('webpage').create()
+page.onConsoleMessage = log
+
+child_process = require('child_process')
+
+# displays a screenshot of the page, and returns the 'display' process object
+exports.display_screenshot = display_screenshot = ->
+ # FIXME use mktemp, or write to display process directly
+ screenshot = '/tmp/hib-dlagent-phantomjs.png'
+ page.render screenshot
+ child_process.spawn 'display', [screenshot]
+
+# handles login/captcha boxes, and calls the passed action() when logged in
+exports.handle_login_captcha = handle_login_captcha = (action, username, password) ->
+ need_to_submit = false
+
+ # complete a login form if there is one
+ if page.evaluate(-> document.querySelector 'input[name="username"]')
+ log 'Entering login information...'
+ page.evaluate (username, password) ->
+ username_box = document.querySelector 'input[name="username"]'
+ password_box = document.querySelector 'input[name="password"]'
+ if username_box
+ username_box.value = username
+ if password_box
+ password_box.value = password
+ , username, password
+ need_to_submit = true
+
+ # handle a captcha box if there is one
+ if page.evaluate(-> document.querySelector '#recaptcha_response_field')
+ log 'Humble Bundle wants you to solve a captcha. Displaying screenshot...'
+ display_process = display_screenshot()
+ input = ask 'Enter the captcha solution, or press Enter to get a new challenge: '
+ display_process.kill()
+
+ page.evaluate (input)->
+ if input is ''
+ Recaptcha.reload()
+ else
+ captcha_box = document.querySelector '#recaptcha_response_field'
+ captcha_box.value = input
+ , input
+ need_to_submit = true
+
+ if need_to_submit
+ # Entered information, submit and check for captcha/login again after load finishes
+ log 'Submitting login information and/or captcha response...'
+ page.onLoadFinished = -> handle_login_captcha action, username, password
+ page.evaluate ->
+ form = document.querySelector('form')
+ if form
+ form.submit()
+ else
+ log 'Logged in...'
+ action()
+