java爬虫爬取网页内容(java爬虫获取网页源码2种方式)
第一种:URLpackage InternetTest;
import java.io.byteArrayOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class a44 {
public static void main(String[] args) throws Exception {
URL url = new URL("http://www.baidu.com");
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.setrequestMethod("GET");
conn.setConnectTimeout(5 * 1024);
InputStream inStream = conn.getInputStream();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
while ((len = inStream.read(buffer)) != -1) {
outStream.write(buffer, 0, len);
}
inStream.close();
byte[] data =outStream.toByteArray();
String htmlSource = new String(data);
System.out.println(htmlSource);
}
}
,下面我们就来聊聊关于java爬虫爬取网页内容?接下来我们就一起去了解一下吧!
java爬虫爬取网页内容
第一种:URL
package InternetTest;
import java.io.byteArrayOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class a44 {
public static void main(String[] args) throws Exception {
URL url = new URL("http://www.baidu.com");
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.setrequestMethod("GET");
conn.setConnectTimeout(5 * 1024);
InputStream inStream = conn.getInputStream();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
while ((len = inStream.read(buffer)) != -1) {
outStream.write(buffer, 0, len);
}
inStream.close();
byte[] data =outStream.toByteArray();
String htmlSource = new String(data);
System.out.println(htmlSource);
}
}
第二种:HttpClient
package InternetTest;
import org.apache.http.HttpEntity;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.HttpClientUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class a45 {
public static void main(String[] args) throws Exception{
String url1 = "http://www.baidu.com";
CloseableHttpClient closeableHttpClient = HttpClients.createDefault();
CloseableHttpResponse closeableHttpResponse = null;
HttpGet request = new HttpGet(url1);
closeableHttpResponse = closeableHttpClient.execute(request);
if(closeableHttpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
HttpEntity httpEntity = closeableHttpResponse.getEntity();
String html = EntityUtils.toString(httpEntity, "utf-8");
System.out.println(html);
} else {
System.out.println(EntityUtils.toString(closeableHttpResponse.getEntity(), "utf-8"));
}
HttpClientUtils.closeQuietly(closeableHttpResponse);
HttpClientUtils.closeQuietly(closeableHttpClient);
}
}
免责声明:本文仅代表文章作者的个人观点,与本站无关。其原创性、真实性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容文字的真实性、完整性和原创性本站不作任何保证或承诺,请读者仅作参考,并自行核实相关内容。文章投诉邮箱:anhduc.ph@yahoo.com