Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History
124 lines (109 loc) · 4.01 KB

File metadata and controls

124 lines (109 loc) · 4.01 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package com.crawl.comments;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.dom4j.Element;
import org.dom4j.io.XMLWriter;
import org.json.JSONObject;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.HashSet;
import java.util.Set;
/**
* Created by geekgao on 15-10-25.
*/
public class CrawlUtils {
/**
*
* @param id appid
* @return app名字
*/
public static String getAppName(String id) throws IOException {
CloseableHttpClient client = HttpClients.createDefault();
HttpGet get = new HttpGet("http://zhushou.360.cn/detail/index/soft_id/" + id);
CloseableHttpResponse response;
try {
response = client.execute(get);
} catch (java.net.UnknownHostException e) {
return null;
}
return EntityUtils.toString(response.getEntity()).split("<title>")[1].split("<")[0];
}
/**
*
* @param xml xml文档
* @param fileName 存储到这个地方
*/
public static void writeXmlToFile(Element xml,String fileName) throws IOException {
Writer fileWriter = new FileWriter(fileName);
XMLWriter xmlWriter = new XMLWriter(fileWriter);
xmlWriter.write(xml);
xmlWriter.close();
}
/**
* 获取需要下载的app的id
* @param uri app类别页
* @param limit 获取前limit个app的评论
* @return
*/
public static Set<String> getAppIds(String uri,int limit) throws IOException {
/*//因为根据网页源码每个appid会匹配到两次,所以获取limit个就必须获取2*limit次
limit = limit * 2;
Set<String> appIds = null;
//获取网页源码,得到appid
HttpGet get = new HttpGet(uri);
CloseableHttpResponse response = client.execute(get);
String html = EntityUtils.toString(response.getEntity());
Pattern getAppIdRegex = Pattern.compile("(?m)/detail/index/soft_id/(.*?)\"");
Matcher matcher = getAppIdRegex.matcher(html);
//至少有一个结果才new一个set
if (matcher.find()) {
appIds = new HashSet<String>();
} else {
return appIds;
}
//控制获取的appid个数
int count = 0;
//把所有匹配到的appid加入到结果中
do {
if (count < limit) {
appIds.add(matcher.group(1));
count++;
}
} while (matcher.find());
return appIds;*/
Set<String> s = new HashSet<String>();
// s.add("3581");
// s.add("778702");
// s.add("1586");
// s.add("6276");
// s.add("122437");
// s.add("5632");
// s.add("4107");
// s.add("98008");
// s.add("3100672");
// s.add("2345172");
// s.add("1343");
// s.add("3094256");
// s.add("101594");
// s.add("1840672");
// s.add("1643");
// s.add("893686");
// s.add("3032510");
s.add("1936882");
// s.add("7256");
// s.add("727030");
return s;
}
public static int getCommentCount(int appId) throws IOException {
CloseableHttpClient client = HttpClients.createDefault();
HttpGet getJson = new HttpGet("http://comment.mobilem.360.cn/comment/getComments?baike=" + appId + "&level=0&start=0&count=1&fm=home_jingjia_3&m=c1804fc5ca4ded8293acd1151efaf3db&m2=61f3c1e4d105b55aff323b20a8136c4e&v=3.2.50&re=1&nt=1&ch=493041&os=21&model=MX4+Pro&sn=4.66476154040931&cu=m76&ca1=armeabi-v7a&ca2=armeabi&ppi=1536x2560&cpc=1&startCount=4");
CloseableHttpResponse response = client.execute(getJson);
String json = EntityUtils.toString(response.getEntity());
JSONObject jsonObject = new JSONObject(json);
return jsonObject.getJSONObject("data").getInt("total");
}
}
Morty Proxy This is a proxified and sanitized view of the page, visit original site.