See Also JavaScript

PhantomJS

PhantomJS是一个基于webkit内核、支持JavaScript/CoffeeScript API的无界面浏览器,并且原生支持DOM/CSS/JSON/Canvas/SVG等W3C颁布的互联网技术标准

因此,我们可以使用JavaScriptCoffeeScript来模拟一个现代浏览器在加载网页时所做的各种事件!例如页面自动化,网络监测,网页截屏,以及无界面测试等。

1. 关于PhantomJS

1.1. 相关项目

2. 基础知识

进行PhantomJS需要掌握的基础知识见JavaScript

2.1. Installation

# on ubuntu
apt-get install phantomjs
# or download binary
wget https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-1.9.7-linux-x86_64.tar.bz2
# on mac
brew install phantomjs
brew install casperjs #a navigation scripting & testing utility for PhantomJS

建议直接下载对应平台的二进制运行文件

3. PhantomJS内置对象

3.1. phantom 对象

是一个HTML DOM Window对象

3.2. System Module

var system = require('system');
if (system.args.length < 2 || system.args.length > 5) {
        console.log('Usage: phantomjs --cookies-file=<name> zabbix.js CMD [Options]');
        console.log('  ls:   show configuration load from zabbix.config & login use accout info');
        ...
        phantom.exit(1);
} 
...
if (system.args.length===3 && system.args[1]==="ls") {
url = system.args[2];
}

3.3. Web Page Module

获得一个browser的连接实例,可以用来打开url,打开的函数上以及实例本身还可以注册更多的函数,是最重要的一个对象。

var webPage = require('webpage');
var page = webPage.create();

page.open('http://www.google.com/', function(status) {
  console.log('Status: ' + status);
  // Do other things here...
});

3.3.1. page.open

GET和POST访问的例子,以及POST JSON的例子

http://phantomjs.org/api/webpage/method/open.html

3.3.2. page.evaluate

一个功能强大的函数,可以获得页面对象并进行操作:evaluate(function, arg1, arg2, ...) {object}

http://phantomjs.org/api/webpage/method/evaluate.html

page.open('http://m.bing.com', function(status) {

  var title = page.evaluate(function(s) {
    return document.querySelector(s).innerText;
  }, 'title');

  console.log(title);
  phantom.exit();

});

3.3.3. page.render

把当前页面的截图存入指定文件:render(filename [, {format, quality}]) {void}

http://phantomjs.org/api/webpage/method/render.html

var webPage = require('webpage');
var page = webPage.create();

page.viewportSize = { width: 1920, height: 1080 };
page.open("http://www.google.com", function start(status) {
  page.render('google_home.jpeg', {format: 'jpeg', quality: '100'});
  phantom.exit();
});

3.4. File System Module

var fs = require('fs');

var file = fs.open('/path/to/file', 'r'); 

3.5. Module


4. Quick Start

http://phantomjs.org/quick-start.html

4.1. Transport Value to Script

var system = require('system');
var v1, v2, v3, v4;
if (system.args.length < 5) {
  //phantomjs args.js 1 hello 3 value4
    console.log('usage: phantomjs args.js <v1> <v2> <v3> <v4>');
    phantom.exit();
} else {
    // system.args[0] is the script
    v1 = system.args[1]; // first param
    v2 = system.args[2];
    v3 = system.args[3];
    v4 = system.args[4];
    // print each args
    system.args.forEach(function (arg, i) {
            console.log(i + ': ' + arg);
    });
}
console.log("v2="+v2);
phantom.exit();

➜  js phantomjs arg.js 1 hello 3 value4
v2=hello
0: args.js
1: 1
2: hello
3: 3
4: value4

4.2. load customised JSON config

Interactive Mode

{
   "desc":"zabbix.config",
   "account": {
      "user": "liyan",
      "password": "******************"
   }
}

phantomjs> JSON.parse('{"a":"dd","b":"NULL"}').a
"dd"
phantomjs> JSON.parse(require('fs').read("zabbix.config")).account.user
"liyan"

4.3. Screen Capture

http://phantomjs.org/screen-capture.html

4.3.1. Baidu.com(Basic)

pvar page = require('webpage').create();
page.viewportSize = { width: 1024, height: 800 };
page.clipRect = { top: 0, left: 0, width: 1024, height: 800 };
page.settings = { javascriptEnabled: true, loadImages: true, userAgent: 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) PhantomJS/19.0' };
page.open('http://www.baidu.com/', function(status) {
        console.log('status = ' + status);
        if (status == 'fail') { 
                console.log('open page fail!');
        } else { 
                page.render('./snapshot/test.png');
                console.log('done!');
        } 
        // release the memory
        page.close();

        console.log('quit phantom.');
        phantom.exit();
});

4.3.2. rasterize.js(with many input selections)

https://github.com/ariya/phantomjs/blob/master/examples/rasterize.js

4.3.3. zabblix.js(with Login Action)

zabbix_pass='my password'
phantomjs --cookies-file=1.txt zabbix.js liyan $zabbix_pass http://zabbix.hesine.com/charts.php\?graphid\=1481 1.png

   1 var system = require('system');
   2 //global
   3 var name, password, img;
   4 if (system.args.length < 5) {
   5   //phantomjs --cookies-file=1.txt zabbix.js liyan $zabbix_pass http://zabbix.hesine.com/charts.php\?graphid\=1481 1.png
   6     console.log('usage: phantomjs --cookies-file=1.txt zabbix.js <username> <password> <url> <img>');
   7     phantom.exit();
   8 } else {
   9     // system.args[0] is the script
  10     name = system.args[1]; // first param
  11     password = system.args[2];
  12     url = system.args[3];
  13     img = system.args[4];
  14 }
  15 
  16 var page = require('webpage').create();
  17 page.viewportSize = { width: 1200, height: 600 };
  18 
  19 page.onConsoleMessage = function(msg) {
  20     console.log(msg);
  21 };
  22 
  23 var fillLoginInfo = function(name, password){
  24 	console.log("set username="+name);
  25 
  26 	document.getElementById("name").value = name;
  27 	document.getElementById("password").value = password;
  28 	document.getElementById("enter").click();
  29 }
  30 
  31 page.onLoadFinished = function(){
  32 
  33 	console.log("title="+page.title+", username="+name);
  34 
  35 	if(page.title === "Zabbix"){
  36 		console.log("filling loginInfo...");
  37 		page.evaluate(fillLoginInfo, name, password);
  38 		return;
  39 	}
  40 	else {
  41 		page.render(img);
  42 		console.log("saving picture: "+img);
  43 	}
  44 	console.log("completed.");
  45 	phantom.exit();
  46 }
  47 
  48 console.log("capture "+url);
  49 page.open(url);
zabbix.js

4.4. Code Evaluation

page.open(url, function(status) {
        //在页面上下文中使用js,例如可以方便的获得所需的特征数据
        var data = page.evaluate(function() {
                var itemDetail = [];
                itemDetail[0] = document.title;
                //抓取如下格式的内容
                //<p class="tm-count">31</p>
                itemDetail[1] = document.querySelector('p.tm-count').innerText;
                //抓取如下格式中的链接地址
                //<img class="my-pic" src="http://li3huo.com/wp-content/uploads/2014/05/Beijing_Climate_Temperature_Warning_20140529.png"></img>
                // itemDetail[2] = document.querySelector('img.my-pic').src;
                //item price
                //<span class="tm-price">14.00</span>
                itemDetail[3] = document.querySelector('span.tm-price').innerText;
                
                return itemDetail;
        });
        
        console.log(data);
});

4.5. More Samples

http://phantomjs.org/examples/index.html

https://github.com/ariya/phantomjs/tree/master/examples

5. Use Case

5.1. zabbix report capture

js_capture_zabbix

5.2. crawler4tmall

https://github.com/pananq/crawler4tmall

6. Reference


CategoryTool

MainWiki: PhantomJS (last edited 2014-06-06 00:34:24 by twotwo)