From 23fcf1acc62e0f32f479c12c1916b2b1dd922199 Mon Sep 17 00:00:00 2001 From: Yaohan Chen Date: Tue, 1 Apr 2014 01:21:59 -0400 Subject: Support the new Humble Bundle page Use PhantomJS to handle the dynamically generated page and captchas. Add options to specify the location of PhantomJS scripts and configuration file. Update README on requirements, usage, and debugging information. Update CHANGELOG. --- CHANGELOG | 3 +++ README | 26 ++++++++++++++++--- discover-url.coffee | 21 +++++++++++++++ hib-dlagent | 22 +++++++++++----- login.coffee | 12 +++++++++ phantomjs-config.json | 9 +++++++ util.coffee | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 154 insertions(+), 10 deletions(-) create mode 100644 discover-url.coffee create mode 100644 login.coffee create mode 100644 phantomjs-config.json create mode 100644 util.coffee diff --git a/CHANGELOG b/CHANGELOG index 2cf8f08..4af156e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +Version 0.6 - 2014/04/01 + * Handles the new Humble Bundle page using PhantomJS + Version 0.5 - 2014/01/12 * Fixed handling of passwords containing special characters * Added interactive password prompting when password not provided as argument diff --git a/README b/README index e621bb2..ce309da 100644 --- a/README +++ b/README @@ -2,22 +2,32 @@ Tool to download Humble Indie Bundle binaries by file name Author: Eric Anderson Contributor: Thomas Riccardi Contributor: Devin J. Pohly +Contributor: Yaohan Chen Description =========== Primarily for use as a DLAGENT in makepkg.conf in Arch Linux, but generally useful when needing to download a particular Humble Bundle file via a script. -The script does very little other than argument parsing; it effectively has only -two "real" lines of functionality. It also manages a storage directory where already downloaded file can be found. Installation and Usage ====================== -Run the script directly or copy the script to a location like /usr/bin/ for all -users to use. +This utility currently requires PhantomJS in order to extract links on Humble +Bundle webpages generated by JavaScript. It also uses 'display', part of +ImageMagick, to handle any captchas. (It does not solve captchas for you, but +only displays a screenshot of the web page and asks you to enter the answer.) + +To run hib-dlagent without installating it, add parameters '-S. -c.', which tells +it to look for scripts and the configuration file in the current directory. + +By default hib-dlagent assumes the .coffee scripts are installed in +/usr/share/hib-dlagent, and the .json configuration file is installed in +/etc/hib-dlagent. If they are installed in other locations, it is necessary +to either run hib-dlagent with the correct -S and -c parameters, or edit the +variables SCRIPT_PATH and CONFIG_PATH in hib-dlagent. The tool uses curl to download the file, or can simply provide the URL needed to download the file so a different HTTP downloader such a Wget can be used. @@ -28,3 +38,11 @@ DLAGENTS=(... ...) Run with the -h argument for more information. + +Troubleshooting +=============== + +Extracting URLs from Humble Bundle's mostly dynamically-generated website can be +error-prone, and time consuming. Currently it can take about 20 seconds for the +script to run. If the script fails or gets stuck, it may help to turn on logging +by setting the LOG environment variable to a non-empty value. diff --git a/discover-url.coffee b/discover-url.coffee new file mode 100644 index 0000000..cf8627d --- /dev/null +++ b/discover-url.coffee @@ -0,0 +1,21 @@ +system = require 'system' +[_, listing_page, filename, username, password] = system.args + +util = require './util' +page = util.page + +page.open listing_page, (status) -> + util.log "Open listing page: #{status}" + util.handle_login_captcha -> + util.log 'Searching URLs...' + url = page.evaluate (filename) -> + # Characters in filename may need to be escaped for use in a selector + found = document.querySelector ".downloads.linux a[href*='#{filename}']" + found and found.getAttribute('href') + , filename + util.log "Found URL: #{url}" + if url + system.stdout.writeLine url + phantom.exit() + , username, password + diff --git a/hib-dlagent b/hib-dlagent index fe09339..5c6a8a5 100755 --- a/hib-dlagent +++ b/hib-dlagent @@ -6,6 +6,9 @@ LOGIN_PAGE=https://www.humblebundle.com/login HOME_PAGE=https://www.humblebundle.com/home COOKIE_JAR= +SCRIPT_PATH='/usr/share/hib-dlagent' +CONFIG_PATH='/etc/hib-dlagent' + FILE= DESTINATION= DOWNLOAD=1 @@ -19,15 +22,14 @@ login() { read -rsp 'Enter Humble account password: ' PASSWORD echo fi - printf '%s' "$PASSWORD" | \ - curl -s --cookie-jar "$COOKIE_JAR" \ - --data-urlencode "username=$USERNAME" --data-urlencode password@- "$LOGIN_PAGE" + phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \ + "$SCRIPT_PATH"/login.coffee "$LOGIN_PAGE" "$USERNAME" "$PASSWORD" } discover_url() { local LISTING_PAGE="$1" - curl -s --cookie "$COOKIE_JAR" "$LISTING_PAGE" | grep "/$FILE?" | grep 'data-web=' | \ - sed -e "s/.* data-web='\([^']*\)'.*/\1/" | head -n 1 + phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \ + "$SCRIPT_PATH"/discover-url.coffee "$LISTING_PAGE" "$FILE" "$USERNAME" "$PASSWORD" } usage() { @@ -47,6 +49,8 @@ Options: -s Print URL to stdout instead of downloading. Incompatible with -d -u Use user to login. Search account's files. If specified multiple times, the last is used + -S Directory where PhantomJs scripts are located. + -c Directory where configuration files are located. If you specify -u, then all of that account's bundles are searched. If a key is associated with a HIB account then you must use -u/-p, since that key only works @@ -102,7 +106,7 @@ main() { exit 1 fi - while getopts "hd:k:o:p:P:su:" opt; do + while getopts "hd:k:o:p:P:su:S:c:" opt; do case $opt in \?) exit 1 @@ -132,6 +136,12 @@ main() { u) USERNAME="$OPTARG" ;; + S) + SCRIPT_PATH="$OPTARG" + ;; + c) + CONFIG_PATH="$OPTARG" + ;; esac done diff --git a/login.coffee b/login.coffee new file mode 100644 index 0000000..c129622 --- /dev/null +++ b/login.coffee @@ -0,0 +1,12 @@ +system = require 'system' +[_, login_page, username, password] = system.args + +util = require './util' +page = util.page + +page.open login_page, (status)-> + util.log "Opening login page: #{status}" + util.handle_login_captcha -> + phantom.exit() + , username, password + diff --git a/phantomjs-config.json b/phantomjs-config.json new file mode 100644 index 0000000..559194a --- /dev/null +++ b/phantomjs-config.json @@ -0,0 +1,9 @@ +{ +// "autoLoadImages": false, + "webSecurityEnabled": false, + "ignoreSslErrors": true, + "proxyType": "none", + "diskCacheEnabled": true, + "debug": true, + "libraryPath": ".", +} diff --git a/util.coffee b/util.coffee new file mode 100644 index 0000000..169cc35 --- /dev/null +++ b/util.coffee @@ -0,0 +1,71 @@ +system = require('system') + +# prints a log message if the LOG environment variable is set +if system.env.LOG? + exports.log = log = system.stderr.writeLine +else + exports.log = log = -> + +# prints a message to stderr, reads a line of input, and returns the input +exports.ask = ask = (message) -> + system.stderr.write message + system.stdin.readLine() + +# a page that directs its console messages to exportss.log +exports.page = page = require('webpage').create() +page.onConsoleMessage = log + +child_process = require('child_process') + +# displays a screenshot of the page, and returns the 'display' process object +exports.display_screenshot = display_screenshot = -> + # FIXME use mktemp, or write to display process directly + screenshot = '/tmp/hib-dlagent-phantomjs.png' + page.render screenshot + child_process.spawn 'display', [screenshot] + +# handles login/captcha boxes, and calls the passed action() when logged in +exports.handle_login_captcha = handle_login_captcha = (action, username, password) -> + need_to_submit = false + + # complete a login form if there is one + if page.evaluate(-> document.querySelector 'input[name="username"]') + log 'Entering login information...' + page.evaluate (username, password) -> + username_box = document.querySelector 'input[name="username"]' + password_box = document.querySelector 'input[name="password"]' + if username_box + username_box.value = username + if password_box + password_box.value = password + , username, password + need_to_submit = true + + # handle a captcha box if there is one + if page.evaluate(-> document.querySelector '#recaptcha_response_field') + log 'Humble Bundle wants you to solve a captcha. Displaying screenshot...' + display_process = display_screenshot() + input = ask 'Enter the captcha solution, or press Enter to get a new challenge: ' + display_process.kill() + + page.evaluate (input)-> + if input is '' + Recaptcha.reload() + else + captcha_box = document.querySelector '#recaptcha_response_field' + captcha_box.value = input + , input + need_to_submit = true + + if need_to_submit + # Entered information, submit and check for captcha/login again after load finishes + log 'Submitting login information and/or captcha response...' + page.onLoadFinished = -> handle_login_captcha action, username, password + page.evaluate -> + form = document.querySelector('form') + if form + form.submit() + else + log 'Logged in...' + action() + -- cgit v1.2.3-70-g09d2