Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History
82 lines (71 loc) · 3.44 KB

File metadata and controls

82 lines (71 loc) · 3.44 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package com.crawl.comments;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.ConnectionPoolTimeoutException;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.dom4j.Element;
import org.json.JSONArray;
import org.json.JSONObject;
import java.io.IOException;
import java.net.SocketTimeoutException;
/**
* Created by geekgao on 15-10-19.
*/
public class CrawlComments implements Runnable {
private Element app;
private int start;
private int count;
private int appId;
public CrawlComments(Element app, int start, int count, int appId) {
this.app = app;
this.start = start;
this.count = count;
this.appId = appId;
}
private void setAppXml() throws IOException {
//设置超时
RequestConfig requestConfig = RequestConfig.custom().setConnectionRequestTimeout(2000).setSocketTimeout(6000).setConnectTimeout(2000).build();
//建立client
CloseableHttpClient client = HttpClients.custom().setDefaultRequestConfig(requestConfig).build();
HttpGet getContentJson = new HttpGet("http://comment.mobilem.360.cn/comment/getComments?baike=" + appId + "&level=0&start=" + start + "&count=" + count + "&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4");
String contentJson = EntityUtils.toString(client.execute(getContentJson).getEntity());
JSONObject jsonObject = new JSONObject(contentJson);
JSONArray contentJsonArray = jsonObject.getJSONObject("data").getJSONArray("messages");
for (int i = 0;i < contentJsonArray.length();i++) {
JSONObject messageJsonObject = contentJsonArray.getJSONObject(i);
String userid = messageJsonObject.getString("username");
String time = messageJsonObject.getString("create_time");
String score = String.valueOf(messageJsonObject.getInt("score"));
String review = messageJsonObject.getString("content");
String agreecount = messageJsonObject.getString("likes");
Element comment = app.addElement("comment");
comment.addElement("userid").setText(userid);
comment.addElement("time").setText(time);
comment.addElement("score").setText(score);
comment.addElement("review").setText(review);
comment.addElement("agreecount").setText(agreecount);
}
client.close();
}
public void run() {
try {
setAppXml();
} catch (ConnectionPoolTimeoutException e) {
System.err.println(appId + "号app从" + start + "开始的评论发生-ConnectionPoolTimeoutException");
return;
} catch (ClientProtocolException e) {
e.printStackTrace();
return;
} catch (SocketTimeoutException e) {
System.err.println(appId + "号app从" + start + "开始的评论发生-SocketTimeoutException");
return;
} catch (IOException e) {
e.printStackTrace();
return;
}
System.out.println(appId + "号app从" + start + "开始的评论抓取完毕");
}
}
Morty Proxy This is a proxified and sanitized view of the page, visit original site.