Skip to content

Commit 3bc2cd8

Browse files
authored
Iteration #11
Merge pull request #11 from IzzySoft/iteration
2 parents 407a2f9 + 832fe33 commit 3bc2cd8

2 files changed

Lines changed: 94 additions & 58 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ Array
137137
[votes] => 1,820
138138
[price] => 0
139139
[size] => Varies with device
140+
[ads] => 0
141+
[iap] => 0
140142
[success] => 1
141143
)
142144
```

google-play.php

Lines changed: 92 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
<?php
2-
/**
3-
*
4-
* @Name : GooglePlayWebServiceAPI/google-play.php
5-
* @Version : 0.3
6-
* @Programmer : Max & Izzy
7-
* @Date : 2020-10-19, 2020-10-25, 2020-10-29, 2020-10-30, 2020-12-05, 2020-12-06
8-
* @Released under : https://github.com/BaseMax/GooglePlayWebServiceAPI/blob/master/LICENSE
9-
* @Repository : https://github.com/BaseMax/GooglePlayWebServiceAPI
10-
*
11-
**/
2+
/** Crawl information of a specific application in the Google Play Store
3+
* @class GooglePlay
4+
* @version 0.3
5+
* @author Max & Izzy
6+
* @copyright MIT https://github.com/BaseMax/GooglePlayWebServiceAPI/blob/master/LICENSE
7+
* @log 2020-10-19 first release
8+
* @log 2020-12-07 recent version
9+
* @brief releases: 2020-10-19, 2020-10-25, 2020-10-29, 2020-10-30, 2020-12-05, 2020-12-06
10+
* @webpage repository https://github.com/BaseMax/GooglePlayWebServiceAPI
11+
**/
1212
class GooglePlay {
13-
private $debug=false;
13+
private $debug = false; // toggle debug output
14+
private $input = ''; // content retrieved from remote
15+
private $lastError = '';
1416

1517
/** Parse a given RegEx and return the match marked by '(?<content>)'
1618
* @method protected getRegVal
@@ -19,10 +21,27 @@ class GooglePlay {
1921
*/
2022
protected function getRegVal($regEx) {
2123
preg_match($regEx, $this->input, $res);
22-
if(isset($res["content"])) return trim($res["content"]);
24+
if (isset($res["content"])) return trim($res["content"]);
2325
else return null;
2426
}
2527

28+
/** Fetch app page from Google Play
29+
* @method protected getApplicationPage
30+
* @param string packageName identifier for the app, e.g. 'com.example.app'
31+
* @param optional string lang language for translations. Should be ISO 639-1 two-letter code. Default: en
32+
* @param optional string loc locale, mainly for currency. Again two-letter, but uppercase
33+
* @return bool success
34+
*/
35+
protected function getApplicationPage($packageName, $lang='en_US', $loc='US') {
36+
$link = "https://play.google.com/store/apps/details?id=" . $packageName . "&hl=$lang&gl=$loc";
37+
if ( ! $this->input = @file_get_contents($link) ) {
38+
$this->lastError = $http_response_header[0];
39+
return false;
40+
} else {
41+
return true;
42+
}
43+
}
44+
2645
/** Obtain details on a given app
2746
* @method public parseApplication
2847
* @param string packageName identifier for the app, e.g. 'com.example.app'
@@ -34,60 +53,59 @@ protected function getRegVal($regEx) {
3453
* Success is signaled by success=1, and details are given via the keys
3554
* packageName, name, developer, category, type (game, app, family), description,
3655
* icon, images (array of screenshot URLs), updated, version, require (min Android version),
37-
* install (number of installs), age, rating (float), votes, price, size
56+
* install (number of installs), age, rating (float), votes, price, size,
57+
* ads (has ads: 0|1), iap (in-app-payment used: 0|1)
3858
* if not explicitly specified otherwise, values are strings
3959
*/
4060
public function parseApplication($packageName, $lang='en_US', $loc='US') {
41-
$link="https://play.google.com/store/apps/details?id=".$packageName."&hl=$lang&gl=$loc";
42-
if ( ! $this->input = @file_get_contents($link) ) {
43-
return ['success'=>0,'message'=>'Google returned: '.$http_response_header[0]];
61+
if ( ! $this->getApplicationPage($packageName, $lang, $loc) ) {
62+
return ['success'=>0,'message'=>$this->lastError];
4463
}
45-
$values=[];
46-
$values["packageName"]=$packageName;
64+
$values = [];
65+
$values["packageName"] = $packageName;
4766

4867
$values["name"] = strip_tags($this->getRegVal('/itemprop="name">(?<content>.*?)<\/h1>/'));
4968
if ($values["name"]==null) {
50-
return ['success'=>0,'message'=>'No app data found'];
69+
return ['success'=>0, 'message'=>'No app data found'];
5170
}
5271

5372
$values["developer"] = strip_tags($this->getRegVal('/href="\/store\/apps\/developer\?id=(?<id>[^\"]+)"([^\>]+|)>(?<content>[^\<]+)<\/a>/i'));
5473

5574
preg_match('/itemprop="genre" href="\/store\/apps\/category\/(?<id>[^\"]+)"([^\>]+|)>(?<content>[^\<]+)<\/a><\/span>/i', $this->input, $category);
56-
if(isset($category["id"], $category["content"])) {
57-
$values["category"]=trim(strip_tags($category["content"]));
58-
$catId=trim(strip_tags($category["id"]));
59-
if($catId=='GAME' || substr($catId,0,5)=='GAME_') $values["type"]="game";
60-
elseif($catId=='FAMILY' || substr($catId,0,7)=='FAMILY?') $values["type"]="family";
61-
else $values["type"]="app";
75+
if (isset($category["id"], $category["content"])) {
76+
$values["category"] = trim(strip_tags($category["content"]));
77+
$catId = trim(strip_tags($category["id"]));
78+
if ($catId=='GAME' || substr($catId,0,5)=='GAME_') $values["type"] = "game";
79+
elseif ($catId=='FAMILY' || substr($catId,0,7)=='FAMILY?') $values["type"] = "family";
80+
else $values["type"] = "app";
6281
} else {
63-
$values["category"]=null;
64-
$values["type"]=null;
82+
$values["category"] = null;
83+
$values["type"] = null;
6584
}
6685

67-
$proto = json_decode($this->getRegVal('/data:(?<content>\[\[\[.+?). sideChannel: .*?\);<\/script/ims'));
68-
$values["summary"] = $proto[0][10][1][1];
86+
$values["summary"] = '';
6987
$values["description"] = $this->getRegVal('/itemprop="description"><span jsslot><div jsname="sngebd">(?<content>.*?)<\/div><\/span><div/i');
7088
$values["icon"] = $this->getRegVal('/<div class="hkhL9e"><div class="xSyT2c"><img src="(?<content>[^\"]+)"/i');
71-
$values["featureGraphic"] = preg_replace('!(.*)=w\d+.*!i','$1',$this->getRegVal('/<meta name="twitter:image" content="(?<content>[^\"]+)"/i'));
89+
$values["featureGraphic"] = preg_replace('!(.*)=w\d+.*!i', '$1', $this->getRegVal('/<meta name="twitter:image" content="(?<content>[^\"]+)"/i'));
7290

7391
preg_match('/<div class="Rx5dXb"([^\>]+|)>(?<content>.*?)<c-data/i', $this->input, $image);
74-
if(isset($image["content"])) {
92+
if ( isset($image["content"]) ) {
7593
preg_match_all('/<img data-src="(?<content>[^\"]+)"/i', $image["content"], $images);
76-
if(isset($images["content"]) && !empty($images["content"])) {
77-
$values["images"]=$images["content"];
94+
if ( isset($images["content"]) && !empty($images["content"]) ) {
95+
$values["images"] = $images["content"];
7896
} else {
7997
preg_match_all('/<img src="[^"]*" srcset="(?<content>[^\s"]+)/i', $image["content"], $images);
80-
if(isset($images["content"])) {
81-
$values["images"]=$images["content"];
98+
if ( isset($images["content"]) ) {
99+
$values["images"] = $images["content"];
82100
} else {
83-
$values["images"]=null;
101+
$values["images"] = null;
84102
}
85103
}
86104
} else {
87-
$values["images"]=null;
105+
$values["images"] = null;
88106
}
89107

90-
if (substr(strtolower($lang),0,2)=='en') {
108+
if ( substr(strtolower($lang),0,2)=='en' ) {
91109
$values["lastUpdated"] = strip_tags($this->getRegVal('/<div class="BgcNfc">Updated<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>.*?)<\/span><\/div><\/span><\/div>/i'));
92110
$values["versionName"] = strip_tags($this->getRegVal('/<div class="BgcNfc">Current Version<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>.*?)<\/span><\/div><\/span><\/div>/i'));
93111
$values["minimumSDKVersion"] = strip_tags($this->getRegVal('/<div class="hAyfc"><div class="BgcNfc">Requires Android<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>.*?)<\/span><\/div><\/span><\/div>/i'));
@@ -96,14 +114,30 @@ public function parseApplication($packageName, $lang='en_US', $loc='US') {
96114
$values["size"] = $this->getRegVal('/<div class="BgcNfc">Size<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>[^<]+)<\/span>/i');
97115
} else {
98116
$envals = $this->parseApplication($packageName);
99-
foreach(["lastUpdated","versionName","minimumSDKVersion","installs","age","size"] as $val) $values[$val]=$envals[$val];
117+
foreach(["lastUpdated","versionName","minimumSDKVersion","installs","age","size"] as $val) $values[$val] = $envals[$val];
100118
}
101119

102120
$values["rating"] = $this->getRegVal('/<div class="BHMmbe"[^>]*>(?<content>[^<]+)<\/div>/i');
103121
$values["votes"] = $this->getRegVal('/<span class="AYi5wd TBRnV"><span[^>]*>(?<content>[^>]+)<\/span>/i');
104122
$values["price"] = $this->getRegVal('/<meta itemprop="price" content="(?<content>[^"]+)">/i');
123+
$test = $this->getRegVal('/<div class="bSIuKf">(?<content>[^<]+)<div/i'); // <div class="bSIuKf">Contains Ads<div
124+
(empty($test)) ? $values["ads"] = 0 : $values["ads"] = 1;
125+
$test = $this->getRegVal('/<div class="aEKMHc">&middot;<\/div>(?<content>[^<]+)</i'); // <div class="aEKMHc">&middot;</div>Offers in-app purchases</div>
126+
(empty($test)) ? $values["iap"] = 0 : $values["iap"] = 1;
127+
128+
$limit = 3;
129+
while ( empty($values["summary"]) && $limit > 0 ) { // sometimes protobuf is missing, but present again on subsequent call
130+
$proto = json_decode($this->getRegVal('/data:(?<content>\[\[\[.+?). sideChannel: .*?\);<\/script/ims'));
131+
if ( empty($proto[0][10]) ) {
132+
--$limit;
133+
$this->getApplicationPage($packageName, $lang, $loc);
134+
} else {
135+
$values["summary"] = $proto[0][10][1][1];
136+
break;
137+
}
138+
}
105139

106-
if($this->debug) {
140+
if ($this->debug) {
107141
print_r($values);
108142
}
109143
$values['success'] = 1;
@@ -117,18 +151,18 @@ public function parseApplication($packageName, $lang='en_US', $loc='US') {
117151
*/
118152
public function parse($link=null) {
119153
if($link == "" || $link == null) {
120-
$link="https://play.google.com/apps";
154+
$link = "https://play.google.com/apps";
121155
}
122-
$input=file_get_contents($link);
156+
$input = file_get_contents($link);
123157
preg_match_all('/href="\/store\/apps\/details\?id=(?<ids>[^\"]+)"/i', $input, $ids);
124-
if(isset($ids["ids"])) {
125-
$ids=$ids["ids"];
126-
$ids=array_values(array_unique($ids));
127-
$values=$ids;
158+
if ( isset($ids["ids"]) ) {
159+
$ids = $ids["ids"];
160+
$ids = array_values(array_unique($ids));
161+
$values = $ids;
128162
} else {
129-
$values=[];
163+
$values = [];
130164
}
131-
if($this->debug) {
165+
if ($this->debug) {
132166
print_r($values);
133167
}
134168
return $values;
@@ -152,29 +186,29 @@ public function parsePerms($packageName, $lang='en') {
152186
'method' => 'POST',
153187
'header' => 'Content-type: application/x-www-form-urlencoded;charset=utf-8'
154188
."\r\n".'Referer: https://play.google.com/',
155-
'content' => 'f.req=%5B%5B%5B%22xdSrCf%22%2C%22%5B%5Bnull%2C%5B%5C%22'.$packageName.'%5C%22%2C7%5D%2C%5B%5D%5D%5D%22%2Cnull%2C%221%22%5D%5D%5D',
189+
'content' => 'f.req=%5B%5B%5B%22xdSrCf%22%2C%22%5B%5Bnull%2C%5B%5C%22' . $packageName . '%5C%22%2C7%5D%2C%5B%5D%5D%5D%22%2Cnull%2C%221%22%5D%5D%5D',
156190
'ignore_errors' => TRUE
157191
)
158192
];
159193
$context = stream_context_create($opts);
160-
if ( $proto = @file_get_contents('https://play.google.com/_/PlayStoreUi/data/batchexecute?rpcids=xdSrCf&bl=boq_playuiserver_20201201.06_p0&hl='.$lang.'&authuser&soc-app=121&soc-platform=1&soc-device=1&rt=c&f.sid=-8792622157958052111&_reqid=257685', false, $context) ) { // raw proto_buf data
161-
preg_match("!HTTP/1\.\d\s+(\d{3})\s+(.+)$!i",$http_response_header[0],$match);
194+
if ( $proto = @file_get_contents('https://play.google.com/_/PlayStoreUi/data/batchexecute?rpcids=xdSrCf&bl=boq_playuiserver_20201201.06_p0&hl=' . $lang . '&authuser&soc-app=121&soc-platform=1&soc-device=1&rt=c&f.sid=-8792622157958052111&_reqid=257685', false, $context) ) { // raw proto_buf data
195+
preg_match("!HTTP/1\.\d\s+(\d{3})\s+(.+)$!i", $http_response_header[0], $match);
162196
$response_code = $match[1];
163197
switch ($response_code) {
164198
case "200" : // HTTP/1.0 200 OK
165199
break;
166200
case "400" : // echo "! No XHR for '$pkg'\n";
167201
case "404" : // app no longer on play
168202
default:
169-
return ['success'=>0,'message'=>$http_response_header[0]];
203+
return ['success'=>0, 'message'=>$http_response_header[0]];
170204
break;
171205
}
172206
} else { // network error (e.g. "failed to open stream: Connection timed out")
173-
return ['success'=>0,'message'=>'network error'];
207+
return ['success'=>0, 'message'=>'network error'];
174208
}
175209

176210
$perms = $perms_unique = [];
177-
$json = preg_replace('!.*?(\[.+?\])\s*\d.*!ims','$1',$proto);
211+
$json = preg_replace('!.*?(\[.+?\])\s*\d.*!ims', '$1', $proto);
178212
$arr = json_decode(json_decode($json)[0][2]);
179213
if (!empty($arr[0])) foreach ($arr[0] as $group) { // 0: group name, 1: group icon, 2: perms, 3: group_id
180214
if (empty($group)) continue;
@@ -191,7 +225,7 @@ public function parsePerms($packageName, $lang='en') {
191225
foreach($arr[2] as $perm) $perms_unique[] = $perm[1];
192226
}
193227

194-
return ['success'=>1,'grouped'=>$perms,'perms'=>array_unique($perms_unique)];
228+
return ['success'=>1, 'grouped'=>$perms, 'perms'=>array_unique($perms_unique)];
195229
}
196230

197231
/** Parse Play Store page for a given category and return package names
@@ -201,7 +235,7 @@ public function parsePerms($packageName, $lang='en') {
201235
* @return array array of package names
202236
*/
203237
public function parseCategory($category) {
204-
$link="https://play.google.com/store/apps/category/".$category;
238+
$link = "https://play.google.com/store/apps/category/" . $category;
205239
return $this->parse($link);
206240
}
207241

@@ -211,7 +245,7 @@ public function parseCategory($category) {
211245
*/
212246
public function parseCategories() {
213247
$input = file_get_contents('https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US');
214-
preg_match_all('!href="/store/apps/category/([^"]+)"[^>]*>([^<]+)!i',$input,$cats);
248+
preg_match_all('!href="/store/apps/category/([^"]+)"[^>]*>([^<]+)!i', $input, $cats);
215249
return array_unique($cats[1]);
216250
}
217251

@@ -221,7 +255,7 @@ public function parseCategories() {
221255
* @return array array of package names
222256
*/
223257
public function parseSearch($query) {
224-
$link="https://play.google.com/store/search?q=".$query."&c=apps";
258+
$link = "https://play.google.com/store/search?q=". $query ."&c=apps";
225259
return $this->parse($link);
226260
}
227261
}

0 commit comments

Comments
 (0)