diff options
-rw-r--r-- | CHANGELOG | 3 | ||||
-rw-r--r-- | README | 26 | ||||
-rw-r--r-- | discover-url.coffee | 21 | ||||
-rwxr-xr-x | hib-dlagent | 22 | ||||
-rw-r--r-- | login.coffee | 12 | ||||
-rw-r--r-- | phantomjs-config.json | 9 | ||||
-rw-r--r-- | util.coffee | 71 |
7 files changed, 154 insertions, 10 deletions
@@ -1,3 +1,6 @@ +Version 0.6 - 2014/04/01 + * Handles the new Humble Bundle page using PhantomJS + Version 0.5 - 2014/01/12 * Fixed handling of passwords containing special characters * Added interactive password prompting when password not provided as argument @@ -2,22 +2,32 @@ Tool to download Humble Indie Bundle binaries by file name Author: Eric Anderson <ejona86@gmail.com> Contributor: Thomas Riccardi <riccardi.thomas@gmail.com> Contributor: Devin J. Pohly <djpohly@gmail.com> +Contributor: Yaohan Chen <yaohan.chen@gmail.com> Description =========== Primarily for use as a DLAGENT in makepkg.conf in Arch Linux, but generally useful when needing to download a particular Humble Bundle file via a script. -The script does very little other than argument parsing; it effectively has only -two "real" lines of functionality. It also manages a storage directory where already downloaded file can be found. Installation and Usage ====================== -Run the script directly or copy the script to a location like /usr/bin/ for all -users to use. +This utility currently requires PhantomJS in order to extract links on Humble +Bundle webpages generated by JavaScript. It also uses 'display', part of +ImageMagick, to handle any captchas. (It does not solve captchas for you, but +only displays a screenshot of the web page and asks you to enter the answer.) + +To run hib-dlagent without installating it, add parameters '-S. -c.', which tells +it to look for scripts and the configuration file in the current directory. + +By default hib-dlagent assumes the .coffee scripts are installed in +/usr/share/hib-dlagent, and the .json configuration file is installed in +/etc/hib-dlagent. If they are installed in other locations, it is necessary +to either run hib-dlagent with the correct -S and -c parameters, or edit the +variables SCRIPT_PATH and CONFIG_PATH in hib-dlagent. The tool uses curl to download the file, or can simply provide the URL needed to download the file so a different HTTP downloader such a Wget can be used. @@ -28,3 +38,11 @@ DLAGENTS=(... ...) Run with the -h argument for more information. + +Troubleshooting +=============== + +Extracting URLs from Humble Bundle's mostly dynamically-generated website can be +error-prone, and time consuming. Currently it can take about 20 seconds for the +script to run. If the script fails or gets stuck, it may help to turn on logging +by setting the LOG environment variable to a non-empty value. diff --git a/discover-url.coffee b/discover-url.coffee new file mode 100644 index 0000000..cf8627d --- /dev/null +++ b/discover-url.coffee @@ -0,0 +1,21 @@ +system = require 'system' +[_, listing_page, filename, username, password] = system.args + +util = require './util' +page = util.page + +page.open listing_page, (status) -> + util.log "Open listing page: #{status}" + util.handle_login_captcha -> + util.log 'Searching URLs...' + url = page.evaluate (filename) -> + # Characters in filename may need to be escaped for use in a selector + found = document.querySelector ".downloads.linux a[href*='#{filename}']" + found and found.getAttribute('href') + , filename + util.log "Found URL: #{url}" + if url + system.stdout.writeLine url + phantom.exit() + , username, password + diff --git a/hib-dlagent b/hib-dlagent index fe09339..5c6a8a5 100755 --- a/hib-dlagent +++ b/hib-dlagent @@ -6,6 +6,9 @@ LOGIN_PAGE=https://www.humblebundle.com/login HOME_PAGE=https://www.humblebundle.com/home COOKIE_JAR= +SCRIPT_PATH='/usr/share/hib-dlagent' +CONFIG_PATH='/etc/hib-dlagent' + FILE= DESTINATION= DOWNLOAD=1 @@ -19,15 +22,14 @@ login() { read -rsp 'Enter Humble account password: ' PASSWORD echo fi - printf '%s' "$PASSWORD" | \ - curl -s --cookie-jar "$COOKIE_JAR" \ - --data-urlencode "username=$USERNAME" --data-urlencode password@- "$LOGIN_PAGE" + phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \ + "$SCRIPT_PATH"/login.coffee "$LOGIN_PAGE" "$USERNAME" "$PASSWORD" } discover_url() { local LISTING_PAGE="$1" - curl -s --cookie "$COOKIE_JAR" "$LISTING_PAGE" | grep "/$FILE?" | grep 'data-web=' | \ - sed -e "s/.* data-web='\([^']*\)'.*/\1/" | head -n 1 + phantomjs --config="$CONFIG_PATH"/phantomjs-config.json --cookies-file="$COOKIE_JAR" \ + "$SCRIPT_PATH"/discover-url.coffee "$LISTING_PAGE" "$FILE" "$USERNAME" "$PASSWORD" } usage() { @@ -47,6 +49,8 @@ Options: -s Print URL to stdout instead of downloading. Incompatible with -d -u <user> Use user to login. Search account's files. If specified multiple times, the last is used + -S <dir> Directory where PhantomJs scripts are located. + -c <dir> Directory where configuration files are located. If you specify -u, then all of that account's bundles are searched. If a key is associated with a HIB account then you must use -u/-p, since that key only works @@ -102,7 +106,7 @@ main() { exit 1 fi - while getopts "hd:k:o:p:P:su:" opt; do + while getopts "hd:k:o:p:P:su:S:c:" opt; do case $opt in \?) exit 1 @@ -132,6 +136,12 @@ main() { u) USERNAME="$OPTARG" ;; + S) + SCRIPT_PATH="$OPTARG" + ;; + c) + CONFIG_PATH="$OPTARG" + ;; esac done diff --git a/login.coffee b/login.coffee new file mode 100644 index 0000000..c129622 --- /dev/null +++ b/login.coffee @@ -0,0 +1,12 @@ +system = require 'system' +[_, login_page, username, password] = system.args + +util = require './util' +page = util.page + +page.open login_page, (status)-> + util.log "Opening login page: #{status}" + util.handle_login_captcha -> + phantom.exit() + , username, password + diff --git a/phantomjs-config.json b/phantomjs-config.json new file mode 100644 index 0000000..559194a --- /dev/null +++ b/phantomjs-config.json @@ -0,0 +1,9 @@ +{ +// "autoLoadImages": false, + "webSecurityEnabled": false, + "ignoreSslErrors": true, + "proxyType": "none", + "diskCacheEnabled": true, + "debug": true, + "libraryPath": ".", +} diff --git a/util.coffee b/util.coffee new file mode 100644 index 0000000..169cc35 --- /dev/null +++ b/util.coffee @@ -0,0 +1,71 @@ +system = require('system') + +# prints a log message if the LOG environment variable is set +if system.env.LOG? + exports.log = log = system.stderr.writeLine +else + exports.log = log = -> + +# prints a message to stderr, reads a line of input, and returns the input +exports.ask = ask = (message) -> + system.stderr.write message + system.stdin.readLine() + +# a page that directs its console messages to exportss.log +exports.page = page = require('webpage').create() +page.onConsoleMessage = log + +child_process = require('child_process') + +# displays a screenshot of the page, and returns the 'display' process object +exports.display_screenshot = display_screenshot = -> + # FIXME use mktemp, or write to display process directly + screenshot = '/tmp/hib-dlagent-phantomjs.png' + page.render screenshot + child_process.spawn 'display', [screenshot] + +# handles login/captcha boxes, and calls the passed action() when logged in +exports.handle_login_captcha = handle_login_captcha = (action, username, password) -> + need_to_submit = false + + # complete a login form if there is one + if page.evaluate(-> document.querySelector 'input[name="username"]') + log 'Entering login information...' + page.evaluate (username, password) -> + username_box = document.querySelector 'input[name="username"]' + password_box = document.querySelector 'input[name="password"]' + if username_box + username_box.value = username + if password_box + password_box.value = password + , username, password + need_to_submit = true + + # handle a captcha box if there is one + if page.evaluate(-> document.querySelector '#recaptcha_response_field') + log 'Humble Bundle wants you to solve a captcha. Displaying screenshot...' + display_process = display_screenshot() + input = ask 'Enter the captcha solution, or press Enter to get a new challenge: ' + display_process.kill() + + page.evaluate (input)-> + if input is '' + Recaptcha.reload() + else + captcha_box = document.querySelector '#recaptcha_response_field' + captcha_box.value = input + , input + need_to_submit = true + + if need_to_submit + # Entered information, submit and check for captcha/login again after load finishes + log 'Submitting login information and/or captcha response...' + page.onLoadFinished = -> handle_login_captcha action, username, password + page.evaluate -> + form = document.querySelector('form') + if form + form.submit() + else + log 'Logged in...' + action() + |