11<?php
2- /**
3- *
4- * @Name : GooglePlayWebServiceAPI/google-play.php
5- * @Version : 0.3
6- * @Programmer : Max & Izzy
7- * @Date : 2020-10-19, 2020-10-25, 2020-10-29, 2020-10-30, 2020-12-05, 2020-12-06
8- * @Released under : https://github.com/BaseMax/GooglePlayWebServiceAPI/blob/master/LICENSE
9- * @Repository : https://github.com/BaseMax/GooglePlayWebServiceAPI
10- *
11- **/
2+ /** Crawl information of a specific application in the Google Play Store
3+ * @class GooglePlay
4+ * @version 0.3
5+ * @author Max & Izzy
6+ * @copyright MIT https://github.com/BaseMax/GooglePlayWebServiceAPI/blob/master/LICENSE
7+ * @log 2020-10-19 first release
8+ * @log 2020-12-07 recent version
9+ * @brief releases: 2020-10-19, 2020-10-25, 2020-10-29, 2020-10-30, 2020-12-05, 2020-12-06
10+ * @webpage repository https://github.com/BaseMax/GooglePlayWebServiceAPI
11+ **/
1212class GooglePlay {
13- private $ debug =false ;
13+ private $ debug = false ; // toggle debug output
14+ private $ input = '' ; // content retrieved from remote
15+ private $ lastError = '' ;
1416
1517 /** Parse a given RegEx and return the match marked by '(?<content>)'
1618 * @method protected getRegVal
@@ -19,10 +21,27 @@ class GooglePlay {
1921 */
2022 protected function getRegVal ($ regEx ) {
2123 preg_match ($ regEx , $ this ->input , $ res );
22- if (isset ($ res ["content " ])) return trim ($ res ["content " ]);
24+ if (isset ($ res ["content " ])) return trim ($ res ["content " ]);
2325 else return null ;
2426 }
2527
28+ /** Fetch app page from Google Play
29+ * @method protected getApplicationPage
30+ * @param string packageName identifier for the app, e.g. 'com.example.app'
31+ * @param optional string lang language for translations. Should be ISO 639-1 two-letter code. Default: en
32+ * @param optional string loc locale, mainly for currency. Again two-letter, but uppercase
33+ * @return bool success
34+ */
35+ protected function getApplicationPage ($ packageName , $ lang ='en_US ' , $ loc ='US ' ) {
36+ $ link = "https://play.google.com/store/apps/details?id= " . $ packageName . "&hl= $ lang&gl= $ loc " ;
37+ if ( ! $ this ->input = @file_get_contents ($ link ) ) {
38+ $ this ->lastError = $ http_response_header [0 ];
39+ return false ;
40+ } else {
41+ return true ;
42+ }
43+ }
44+
2645 /** Obtain details on a given app
2746 * @method public parseApplication
2847 * @param string packageName identifier for the app, e.g. 'com.example.app'
@@ -34,60 +53,59 @@ protected function getRegVal($regEx) {
3453 * Success is signaled by success=1, and details are given via the keys
3554 * packageName, name, developer, category, type (game, app, family), description,
3655 * icon, images (array of screenshot URLs), updated, version, require (min Android version),
37- * install (number of installs), age, rating (float), votes, price, size
56+ * install (number of installs), age, rating (float), votes, price, size,
57+ * ads (has ads: 0|1), iap (in-app-payment used: 0|1)
3858 * if not explicitly specified otherwise, values are strings
3959 */
4060 public function parseApplication ($ packageName , $ lang ='en_US ' , $ loc ='US ' ) {
41- $ link ="https://play.google.com/store/apps/details?id= " .$ packageName ."&hl= $ lang&gl= $ loc " ;
42- if ( ! $ this ->input = @file_get_contents ($ link ) ) {
43- return ['success ' =>0 ,'message ' =>'Google returned: ' .$ http_response_header [0 ]];
61+ if ( ! $ this ->getApplicationPage ($ packageName , $ lang , $ loc ) ) {
62+ return ['success ' =>0 ,'message ' =>$ this ->lastError ];
4463 }
45- $ values= [];
46- $ values ["packageName " ]= $ packageName ;
64+ $ values = [];
65+ $ values ["packageName " ] = $ packageName ;
4766
4867 $ values ["name " ] = strip_tags ($ this ->getRegVal ('/itemprop="name">(?<content>.*?)<\/h1>/ ' ));
4968 if ($ values ["name " ]==null ) {
50- return ['success ' =>0 ,'message ' =>'No app data found ' ];
69+ return ['success ' =>0 , 'message ' =>'No app data found ' ];
5170 }
5271
5372 $ values ["developer " ] = strip_tags ($ this ->getRegVal ('/href="\/store\/apps\/developer\?id=(?<id>[^\"]+)"([^\>]+|)>(?<content>[^\<]+)<\/a>/i ' ));
5473
5574 preg_match ('/itemprop="genre" href="\/store\/apps\/category\/(?<id>[^\"]+)"([^\>]+|)>(?<content>[^\<]+)<\/a><\/span>/i ' , $ this ->input , $ category );
56- if (isset ($ category ["id " ], $ category ["content " ])) {
57- $ values ["category " ]= trim (strip_tags ($ category ["content " ]));
58- $ catId= trim (strip_tags ($ category ["id " ]));
59- if ($ catId =='GAME ' || substr ($ catId ,0 ,5 )=='GAME_ ' ) $ values ["type " ]= "game " ;
60- elseif ($ catId =='FAMILY ' || substr ($ catId ,0 ,7 )=='FAMILY? ' ) $ values ["type " ]= "family " ;
61- else $ values ["type " ]= "app " ;
75+ if (isset ($ category ["id " ], $ category ["content " ])) {
76+ $ values ["category " ] = trim (strip_tags ($ category ["content " ]));
77+ $ catId = trim (strip_tags ($ category ["id " ]));
78+ if ($ catId =='GAME ' || substr ($ catId ,0 ,5 )=='GAME_ ' ) $ values ["type " ] = "game " ;
79+ elseif ($ catId =='FAMILY ' || substr ($ catId ,0 ,7 )=='FAMILY? ' ) $ values ["type " ] = "family " ;
80+ else $ values ["type " ] = "app " ;
6281 } else {
63- $ values ["category " ]= null ;
64- $ values ["type " ]= null ;
82+ $ values ["category " ] = null ;
83+ $ values ["type " ] = null ;
6584 }
6685
67- $ proto = json_decode ($ this ->getRegVal ('/data:(?<content>\[\[\[.+?). sideChannel: .*?\);<\/script/ims ' ));
68- $ values ["summary " ] = $ proto [0 ][10 ][1 ][1 ];
86+ $ values ["summary " ] = '' ;
6987 $ values ["description " ] = $ this ->getRegVal ('/itemprop="description"><span jsslot><div jsname="sngebd">(?<content>.*?)<\/div><\/span><div/i ' );
7088 $ values ["icon " ] = $ this ->getRegVal ('/<div class="hkhL9e"><div class="xSyT2c"><img src="(?<content>[^\"]+)"/i ' );
71- $ values ["featureGraphic " ] = preg_replace ('!(.*)=w\d+.*!i ' ,'$1 ' ,$ this ->getRegVal ('/<meta name="twitter:image" content="(?<content>[^\"]+)"/i ' ));
89+ $ values ["featureGraphic " ] = preg_replace ('!(.*)=w\d+.*!i ' , '$1 ' , $ this ->getRegVal ('/<meta name="twitter:image" content="(?<content>[^\"]+)"/i ' ));
7290
7391 preg_match ('/<div class="Rx5dXb"([^\>]+|)>(?<content>.*?)<c-data/i ' , $ this ->input , $ image );
74- if ( isset ($ image ["content " ])) {
92+ if ( isset ($ image ["content " ]) ) {
7593 preg_match_all ('/<img data-src="(?<content>[^\"]+)"/i ' , $ image ["content " ], $ images );
76- if ( isset ($ images ["content " ]) && !empty ($ images ["content " ])) {
77- $ values ["images " ]= $ images ["content " ];
94+ if ( isset ($ images ["content " ]) && !empty ($ images ["content " ]) ) {
95+ $ values ["images " ] = $ images ["content " ];
7896 } else {
7997 preg_match_all ('/<img src="[^"]*" srcset="(?<content>[^\s"]+)/i ' , $ image ["content " ], $ images );
80- if ( isset ($ images ["content " ])) {
81- $ values ["images " ]= $ images ["content " ];
98+ if ( isset ($ images ["content " ]) ) {
99+ $ values ["images " ] = $ images ["content " ];
82100 } else {
83- $ values ["images " ]= null ;
101+ $ values ["images " ] = null ;
84102 }
85103 }
86104 } else {
87- $ values ["images " ]= null ;
105+ $ values ["images " ] = null ;
88106 }
89107
90- if (substr (strtolower ($ lang ),0 ,2 )=='en ' ) {
108+ if ( substr (strtolower ($ lang ),0 ,2 )=='en ' ) {
91109 $ values ["lastUpdated " ] = strip_tags ($ this ->getRegVal ('/<div class="BgcNfc">Updated<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>.*?)<\/span><\/div><\/span><\/div>/i ' ));
92110 $ values ["versionName " ] = strip_tags ($ this ->getRegVal ('/<div class="BgcNfc">Current Version<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>.*?)<\/span><\/div><\/span><\/div>/i ' ));
93111 $ values ["minimumSDKVersion " ] = strip_tags ($ this ->getRegVal ('/<div class="hAyfc"><div class="BgcNfc">Requires Android<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>.*?)<\/span><\/div><\/span><\/div>/i ' ));
@@ -96,14 +114,30 @@ public function parseApplication($packageName, $lang='en_US', $loc='US') {
96114 $ values ["size " ] = $ this ->getRegVal ('/<div class="BgcNfc">Size<\/div><span class="htlgb"><div class="IQ1z0d"><span class="htlgb">(?<content>[^<]+)<\/span>/i ' );
97115 } else {
98116 $ envals = $ this ->parseApplication ($ packageName );
99- foreach (["lastUpdated " ,"versionName " ,"minimumSDKVersion " ,"installs " ,"age " ,"size " ] as $ val ) $ values [$ val ]= $ envals [$ val ];
117+ foreach (["lastUpdated " ,"versionName " ,"minimumSDKVersion " ,"installs " ,"age " ,"size " ] as $ val ) $ values [$ val ] = $ envals [$ val ];
100118 }
101119
102120 $ values ["rating " ] = $ this ->getRegVal ('/<div class="BHMmbe"[^>]*>(?<content>[^<]+)<\/div>/i ' );
103121 $ values ["votes " ] = $ this ->getRegVal ('/<span class="AYi5wd TBRnV"><span[^>]*>(?<content>[^>]+)<\/span>/i ' );
104122 $ values ["price " ] = $ this ->getRegVal ('/<meta itemprop="price" content="(?<content>[^"]+)">/i ' );
123+ $ test = $ this ->getRegVal ('/<div class="bSIuKf">(?<content>[^<]+)<div/i ' ); // <div class="bSIuKf">Contains Ads<div
124+ (empty ($ test )) ? $ values ["ads " ] = 0 : $ values ["ads " ] = 1 ;
125+ $ test = $ this ->getRegVal ('/<div class="aEKMHc">·<\/div>(?<content>[^<]+)</i ' ); // <div class="aEKMHc">·</div>Offers in-app purchases</div>
126+ (empty ($ test )) ? $ values ["iap " ] = 0 : $ values ["iap " ] = 1 ;
127+
128+ $ limit = 3 ;
129+ while ( empty ($ values ["summary " ]) && $ limit > 0 ) { // sometimes protobuf is missing, but present again on subsequent call
130+ $ proto = json_decode ($ this ->getRegVal ('/data:(?<content>\[\[\[.+?). sideChannel: .*?\);<\/script/ims ' ));
131+ if ( empty ($ proto [0 ][10 ]) ) {
132+ --$ limit ;
133+ $ this ->getApplicationPage ($ packageName , $ lang , $ loc );
134+ } else {
135+ $ values ["summary " ] = $ proto [0 ][10 ][1 ][1 ];
136+ break ;
137+ }
138+ }
105139
106- if ($ this ->debug ) {
140+ if ($ this ->debug ) {
107141 print_r ($ values );
108142 }
109143 $ values ['success ' ] = 1 ;
@@ -117,18 +151,18 @@ public function parseApplication($packageName, $lang='en_US', $loc='US') {
117151 */
118152 public function parse ($ link =null ) {
119153 if ($ link == "" || $ link == null ) {
120- $ link= "https://play.google.com/apps " ;
154+ $ link = "https://play.google.com/apps " ;
121155 }
122- $ input= file_get_contents ($ link );
156+ $ input = file_get_contents ($ link );
123157 preg_match_all ('/href="\/store\/apps\/details\?id=(?<ids>[^\"]+)"/i ' , $ input , $ ids );
124- if ( isset ($ ids ["ids " ])) {
125- $ ids= $ ids ["ids " ];
126- $ ids= array_values (array_unique ($ ids ));
127- $ values= $ ids ;
158+ if ( isset ($ ids ["ids " ]) ) {
159+ $ ids = $ ids ["ids " ];
160+ $ ids = array_values (array_unique ($ ids ));
161+ $ values = $ ids ;
128162 } else {
129- $ values= [];
163+ $ values = [];
130164 }
131- if ($ this ->debug ) {
165+ if ($ this ->debug ) {
132166 print_r ($ values );
133167 }
134168 return $ values ;
@@ -152,29 +186,29 @@ public function parsePerms($packageName, $lang='en') {
152186 'method ' => 'POST ' ,
153187 'header ' => 'Content-type: application/x-www-form-urlencoded;charset=utf-8 '
154188 ."\r\n" .'Referer: https://play.google.com/ ' ,
155- 'content ' => 'f.req=%5B%5B%5B%22xdSrCf%22%2C%22%5B%5Bnull%2C%5B%5C%22 ' . $ packageName. '%5C%22%2C7%5D%2C%5B%5D%5D%5D%22%2Cnull%2C%221%22%5D%5D%5D ' ,
189+ 'content ' => 'f.req=%5B%5B%5B%22xdSrCf%22%2C%22%5B%5Bnull%2C%5B%5C%22 ' . $ packageName . '%5C%22%2C7%5D%2C%5B%5D%5D%5D%22%2Cnull%2C%221%22%5D%5D%5D ' ,
156190 'ignore_errors ' => TRUE
157191 )
158192 ];
159193 $ context = stream_context_create ($ opts );
160- if ( $ proto = @file_get_contents ('https://play.google.com/_/PlayStoreUi/data/batchexecute?rpcids=xdSrCf&bl=boq_playuiserver_20201201.06_p0&hl= ' . $ lang. '&authuser&soc-app=121&soc-platform=1&soc-device=1&rt=c&f.sid=-8792622157958052111&_reqid=257685 ' , false , $ context ) ) { // raw proto_buf data
161- preg_match ("!HTTP/1\.\d\s+(\d{3})\s+(.+)$!i " ,$ http_response_header [0 ],$ match );
194+ if ( $ proto = @file_get_contents ('https://play.google.com/_/PlayStoreUi/data/batchexecute?rpcids=xdSrCf&bl=boq_playuiserver_20201201.06_p0&hl= ' . $ lang . '&authuser&soc-app=121&soc-platform=1&soc-device=1&rt=c&f.sid=-8792622157958052111&_reqid=257685 ' , false , $ context ) ) { // raw proto_buf data
195+ preg_match ("!HTTP/1\.\d\s+(\d{3})\s+(.+)$!i " , $ http_response_header [0 ], $ match );
162196 $ response_code = $ match [1 ];
163197 switch ($ response_code ) {
164198 case "200 " : // HTTP/1.0 200 OK
165199 break ;
166200 case "400 " : // echo "! No XHR for '$pkg'\n";
167201 case "404 " : // app no longer on play
168202 default :
169- return ['success ' =>0 ,'message ' =>$ http_response_header [0 ]];
203+ return ['success ' =>0 , 'message ' =>$ http_response_header [0 ]];
170204 break ;
171205 }
172206 } else { // network error (e.g. "failed to open stream: Connection timed out")
173- return ['success ' =>0 ,'message ' =>'network error ' ];
207+ return ['success ' =>0 , 'message ' =>'network error ' ];
174208 }
175209
176210 $ perms = $ perms_unique = [];
177- $ json = preg_replace ('!.*?(\[.+?\])\s*\d.*!ims ' ,'$1 ' ,$ proto );
211+ $ json = preg_replace ('!.*?(\[.+?\])\s*\d.*!ims ' , '$1 ' , $ proto );
178212 $ arr = json_decode (json_decode ($ json )[0 ][2 ]);
179213 if (!empty ($ arr [0 ])) foreach ($ arr [0 ] as $ group ) { // 0: group name, 1: group icon, 2: perms, 3: group_id
180214 if (empty ($ group )) continue ;
@@ -191,7 +225,7 @@ public function parsePerms($packageName, $lang='en') {
191225 foreach ($ arr [2 ] as $ perm ) $ perms_unique [] = $ perm [1 ];
192226 }
193227
194- return ['success ' =>1 ,'grouped ' =>$ perms ,'perms ' =>array_unique ($ perms_unique )];
228+ return ['success ' =>1 , 'grouped ' =>$ perms , 'perms ' =>array_unique ($ perms_unique )];
195229 }
196230
197231 /** Parse Play Store page for a given category and return package names
@@ -201,7 +235,7 @@ public function parsePerms($packageName, $lang='en') {
201235 * @return array array of package names
202236 */
203237 public function parseCategory ($ category ) {
204- $ link= "https://play.google.com/store/apps/category/ " . $ category ;
238+ $ link = "https://play.google.com/store/apps/category/ " . $ category ;
205239 return $ this ->parse ($ link );
206240 }
207241
@@ -211,7 +245,7 @@ public function parseCategory($category) {
211245 */
212246 public function parseCategories () {
213247 $ input = file_get_contents ('https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US ' );
214- preg_match_all ('!href="/store/apps/category/([^"]+)"[^>]*>([^<]+)!i ' ,$ input ,$ cats );
248+ preg_match_all ('!href="/store/apps/category/([^"]+)"[^>]*>([^<]+)!i ' , $ input , $ cats );
215249 return array_unique ($ cats [1 ]);
216250 }
217251
@@ -221,7 +255,7 @@ public function parseCategories() {
221255 * @return array array of package names
222256 */
223257 public function parseSearch ($ query ) {
224- $ link= "https://play.google.com/store/search?q= " .$ query ."&c=apps " ;
258+ $ link = "https://play.google.com/store/search?q= " . $ query ."&c=apps " ;
225259 return $ this ->parse ($ link );
226260 }
227261}
0 commit comments