獲取網(wǎng)站的<meta name="keywords" content="" />和<meta name="description" content="“>關(guān)鍵字和描述內(nèi)容
實(shí)現(xiàn)HTML解析器jsoup
下載jsoup的lib地址:http://jsoup.org/download
package cn.evan.util;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class SemanticCrawl {
public static void main(String[] args) {
Document doc = null;
try {
doc = Jsoup.connect("網(wǎng)址").get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String title = doc.title();
Elements metas = doc.head().select("meta");
for (Element meta : metas) {
String content = meta.attr("content");
if ("keywords".equalsIgnoreCase(meta.attr("name"))) {
System.out.println("關(guān)鍵字:"+content);
}
if ("description".equalsIgnoreCase(meta.attr("name"))) {
System.out.println("網(wǎng)站內(nèi)容描述:"+content);
}
}
Elements keywords = doc.getElementsByTag("meta");
System.out.println("標(biāo)題"+title);
}
}
以上所述就是本文分享的全部內(nèi)容了,希望大家能夠喜歡。