利用selenium截取站内所有的页面截图丨软件测试

利用selenium截取站内所有的页面截图

需求：截取一个站点内所有的页面的截图，比如wordpress所有文章页

实现思路：获取页面所有a标签，获取a标签的链接href，判读是否为本站的链接，是的话截图，然后再递归遍历

import org.apache.commons.io.FileUtils;
import org.openqa.selenium.*;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by zsn on 2015/4/7.
 */
public class WebsiteScreenshot {

    public static void main(String[] args) {
        getAElement("http://www.izsn.cn");
    }


    private static void getAElement(String url){
        WebDriver driver =  new FirefoxDriver();
        //不能使用HtmlUnitDriver，因为HtmlUnitDriver不支持截图
        // 具体解释-&gt;https://code.google.com/p/selenium/issues/detail?id=1361
        driver.get(url);

        //创建保存目录
        File saveDir = new File("./screenshot");
        if(!saveDir.exists()) saveDir.mkdir();

        //保存截图
        pickScreenshot(driver, url);

        //获得网页所有a标签
        List&lt;WebElement&gt; list = driver.findElements(By.tagName("a"));
        for(WebElement element : list){
            try{
                String href = element.getAttribute("href");
                System.out.print("current url:"+href+"\n");
                //是否为站内链接
                if(getDomain(url).equals(getDomain(href))){
                    if(!isExist(href)){
                        //递归执行
                        getAElement(href);
                    }
                }
            }catch (Exception e){
                e.printStackTrace();
            }
        }
    }

    /**
     * 获取网页截图
     * @param driver
     * @param url
     */
    private static void pickScreenshot(WebDriver driver, String url){
        TakesScreenshot screenshot = (TakesScreenshot) driver;
        File file = screenshot.getScreenshotAs(OutputType.FILE);
        try {
            //将文件保存在当前目录下screenshot文件夹下
            String fileName = "./screenshot/"+java.net.URLEncoder.encode(url,"utf-8")+".jpg";
            FileUtils.copyFile(file, new File(fileName));
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            driver.close();
        }
    }

    /**
     * 截图是否存在，不重复截图
     * @param href
     * @return
     */
    private static boolean isExist(String href){
        try {
            String fileName = "./screenshot/"+java.net.URLEncoder.encode(href,"utf-8")+".jpg";
            return new File(fileName).exists();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * 获取url中的域名
     * @param url
     * @return
     */
    private static String getDomain(String url){
        try {
            return new URL(url).getHost();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
        return "";
    }
}

100

101

102

103

104

import org.apache.commons.io.FileUtils;

import org.openqa.selenium.*;

import org.openqa.selenium.firefox.FirefoxDriver;

import org.openqa.selenium.htmlunit.HtmlUnitDriver;

import java.io.File;

import java.io.IOException;

import java.io.UnsupportedEncodingException;

import java.net.MalformedURLException;

import java.net.URL;

import java.util.List;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

/**

* Created by zsn on 2015/4/7.

public class WebsiteScreenshot {

public static void main(String[] args) {

getAElement("http://www.izsn.cn");

}

private static void getAElement(String url){

WebDriver driver = new FirefoxDriver();

//不能使用HtmlUnitDriver，因为HtmlUnitDriver不支持截图

// 具体解释->https://code.google.com/p/selenium/issues/detail?id=1361

driver.get(url);

//创建保存目录

File saveDir = new File("./screenshot");

if(!saveDir.exists()) saveDir.mkdir();

//保存截图

pickScreenshot(driver, url);

//获得网页所有a标签

List<WebElement> list = driver.findElements(By.tagName("a"));

for(WebElement element : list){

try{

String href = element.getAttribute("href");

System.out.print("current url:"+href+"\n");

//是否为站内链接

if(getDomain(url).equals(getDomain(href))){

if(!isExist(href)){

//递归执行

getAElement(href);

}

}catch (Exception e){

e.printStackTrace();

}

/**

* 获取网页截图

* @param driver

* @param url

private static void pickScreenshot(WebDriver driver, String url){

TakesScreenshot screenshot = (TakesScreenshot) driver;

File file = screenshot.getScreenshotAs(OutputType.FILE);

try {

//将文件保存在当前目录下screenshot文件夹下

String fileName = "./screenshot/"+java.net.URLEncoder.encode(url,"utf-8")+".jpg";

FileUtils.copyFile(file, new File(fileName));

} catch (IOException e) {

e.printStackTrace();

}finally {

driver.close();

}

/**

* 截图是否存在，不重复截图

* @param href

* @return

private static boolean isExist(String href){

try {

String fileName = "./screenshot/"+java.net.URLEncoder.encode(href,"utf-8")+".jpg";

return new File(fileName).exists();

} catch (UnsupportedEncodingException e) {

e.printStackTrace();

}

return false;

}

/**

* 获取url中的域名

* @param url

* @return

private static String getDomain(String url){

try {

return new URL(url).getHost();

} catch (MalformedURLException e) {

e.printStackTrace();

}

return "";

}

转载请注明：软件测试 » 利用selenium截取站内所有的页面截图

喜欢 0

标签: selenium, WebDriver