Skip to content

Commit 6367518

Browse files
committed
更新數據插入
1 parent 84e0109 commit 6367518

7 files changed

Lines changed: 249 additions & 23 deletions

File tree

AuthorizeManage.dll

7 KB
Binary file not shown.

facebook/AuthorizeManage.dll

7 KB
Binary file not shown.

facebook/FB_win.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ async def on_search(self):
394394
'search_content': search_content,
395395
'search_count': search_count,
396396
'crawl_count': crawl_count,
397-
'device_id': device_id,
397+
'device': device_id,
398398
'combo_value': combo_value,
399399
'action': 'search',
400400
'types': 'facebook'

facebook/FBmain.py

Lines changed: 116 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,14 @@
1010
import shutil # 适用于Linux和macOS
1111
import sys
1212
import re
13-
import aiohttp
14-
import requests
13+
import datetime
1514
from urllib.parse import urlparse, parse_qs
1615
from PyQt5.QtWidgets import QApplication
1716
from FB_loginwin import win_main
1817
from playwright.async_api import async_playwright
1918
from FB_status import StatusWindow
2019
from database_manager import db_manager
21-
20+
from concurrent.futures import ThreadPoolExecutor
2221

2322
class Crawler:
2423
def __init__(self, cookies, params):
@@ -40,6 +39,8 @@ def __init__(self, cookies, params):
4039
self.supportId = ""
4140
self.post_user_cunt = 0
4241
self.post_name = ""
42+
self.eq_type = None
43+
self.device = params.get('device')
4344

4445
async def safe_update_status(self, text):
4546
"""安全的异步状态更新"""
@@ -527,7 +528,7 @@ async def getusers(self):
527528
if not addresses:
528529
print("没有提供地址列表")
529530
return
530-
531+
self.eq_type = 1
531532
for i in range(len(addresses)):
532533
url = addresses[i].strip()
533534
# 创建一个新的CSV文件名
@@ -545,6 +546,8 @@ async def getusers(self):
545546
groups_num_selector = '//span[@dir="auto"]/div//a[@role="link" and contains(text(), "成員")]'
546547
groups_num_function = await self.page.wait_for_selector(groups_num_selector, timeout=10000)
547548
self.groups_num = await groups_num_function.inner_text()
549+
day_match = re.search(r'\d+', self.groups_num)
550+
self.groups_num = day_match.group()
548551
print(self.groups_num)
549552
await self.robust_update_status(f"社團名:{self.groups_name} 社團人數:{self.groups_num}")
550553
except Exception as e:
@@ -590,7 +593,7 @@ async def getusers(self):
590593
'name': text.strip(),
591594
'user_id': user_id
592595
})
593-
print(f"{user_counter}{user_id} {text.strip()}")
596+
# print(f"{user_counter}:{user_id} {text.strip()}")
594597
await self.robust_update_status(f"{user_counter}{user_id} {text.strip()}")
595598
in_csv_data.append([user_id, text.strip(), self.extract_group_id(url)])
596599

@@ -616,20 +619,42 @@ async def getusers(self):
616619
if scroll_attempts >= 3: # 连续3次没有新用户就停止
617620
print("已加载所有用户")
618621
break
619-
# 将数据写入CSV文件
620-
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
621-
csv_writer = csv.writer(csvfile)
622-
csv_writer.writerow(['userid', 'username', 'societiesid']) # 写入表头
623-
csv_writer.writerows(in_csv_data) # 写入数据
624-
print(f"爬取完成,共获取 {user_counter} 个用户信息")
622+
if len(users) > 0:
623+
# 将数据写入CSV文件
624+
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
625+
csv_writer = csv.writer(csvfile)
626+
csv_writer.writerow(['userid', 'username', 'societiesid']) # 写入表头
627+
csv_writer.writerows(in_csv_data) # 写入数据
628+
print(f"爬取完成,共获取 {user_counter} 个用户信息")
629+
# 使用线程池执行数据库提交(避免阻塞主线程)
630+
loop = asyncio.get_event_loop()
631+
with ThreadPoolExecutor() as executor:
632+
await loop.run_in_executor(
633+
executor,
634+
submit_data_to_database,
635+
csv_filename,
636+
i,
637+
self.eq_type,
638+
self.extract_group_id(url), # 社团ID
639+
self.groups_name, # 社团名称
640+
self.groups_num, # 总成员数
641+
user_counter # 实际获取数量
642+
)
643+
644+
await self.robust_update_status(f"{csv_filename}数据提交完成")
645+
upend_time = datetime.datetime.now()
646+
db_manager.update_updata_table(self.device, len(users), upend_time)
647+
else:
648+
print("無用戶")
649+
625650
# return users
626651

627652
async def getusers_fans(self):
628653
addresses = self.params.get('addresses', [])
629654
if not addresses:
630655
print("没有提供地址列表")
631656
return
632-
657+
self.eq_type = 2
633658
for i in range(len(addresses)):
634659
url = addresses[i].strip()
635660
# 创建一个新的CSV文件名
@@ -703,7 +728,7 @@ async def getusers_fans(self):
703728
'name': text.strip(),
704729
'user_id': user_id
705730
})
706-
print(f"{user_counter}{user_id} {text.strip()}")
731+
# print(f"{user_counter}:{user_id} {text.strip()}")
707732
await self.robust_update_status(f"{user_counter}{user_id} {text.strip()}")
708733
in_csv_data.append([user_id, text.strip(), await self.extract_facebook_identifier(url)])
709734

@@ -729,12 +754,32 @@ async def getusers_fans(self):
729754
if scroll_attempts >= 3: # 连续3次没有新用户就停止
730755
print("已加载所有用户")
731756
break
732-
# 将数据写入CSV文件
733-
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
734-
csv_writer = csv.writer(csvfile)
735-
csv_writer.writerow(['userid', 'username', 'societiesid']) # 写入表头
736-
csv_writer.writerows(in_csv_data) # 写入数据
737-
print(f"爬取完成,共获取 {user_counter} 个用户信息")
757+
if len(users) > 0:
758+
# 将数据写入CSV文件
759+
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
760+
csv_writer = csv.writer(csvfile)
761+
csv_writer.writerow(['userid', 'username', 'societiesid']) # 写入表头
762+
csv_writer.writerows(in_csv_data) # 写入数据
763+
print(f"爬取完成,共获取 {user_counter} 个用户信息")
764+
# 使用线程池执行数据库提交(避免阻塞主线程)
765+
loop = asyncio.get_event_loop()
766+
with ThreadPoolExecutor() as executor:
767+
await loop.run_in_executor(
768+
executor,
769+
submit_data_to_database,
770+
csv_filename,
771+
i,
772+
self.eq_type,
773+
self.extract_fans_id(url), # 粉丝专页ID
774+
self.groups_name, # 粉丝专页名称
775+
self.groups_num, # 总粉丝数
776+
user_counter # 实际获取数量
777+
)
778+
await self.robust_update_status(f"{csv_filename}数据提交完成")
779+
upend_time = datetime.datetime.now()
780+
db_manager.update_updata_table(self.device, len(users), upend_time)
781+
else:
782+
print("無用戶")
738783

739784
async def getusers_like(self):
740785
await self.page.goto(url="https://www.facebook.com/", wait_until='load', timeout=50000)
@@ -987,6 +1032,8 @@ async def get_sponsor_user(self):
9871032
}
9881033
print('提交数据', post_info)
9891034
db_manager.insert_post_info(post_info)
1035+
upend_time = datetime.datetime.now()
1036+
db_manager.update_updata_table(self.device, len(users), upend_time)
9901037
else:
9911038
print('数据为空不提交')
9921039
await self.robust_update_status(f"沒有成員跳過~")
@@ -1206,6 +1253,56 @@ async def force_minimize_browser(self):
12061253
self.minimize_browser_window()
12071254

12081255

1256+
def submit_data_to_database(csv_filename, batch_number, eq_type, societies_url_id, societies_name, total_number,
1257+
getnum):
1258+
"""提交数据到数据库"""
1259+
try:
1260+
with open(csv_filename, 'r', newline='', encoding='utf-8') as csvfile:
1261+
csv_reader = csv.reader(csvfile)
1262+
next(csv_reader) # 跳过表头
1263+
data = list(csv_reader)
1264+
print(f"批次 {batch_number}: 开始提交数据...")
1265+
1266+
batch_size = 80 # 每批80条
1267+
for i in range(0, len(data), batch_size):
1268+
batch_data = data[i:i + batch_size]
1269+
1270+
if eq_type == 1:
1271+
# 插入社团用户表
1272+
success = db_manager.insert_societies_user_batch(batch_data)
1273+
else:
1274+
# 插入粉丝用户表
1275+
success = db_manager.insert_fans_user_batch(batch_data)
1276+
1277+
if not success:
1278+
print(f"批次 {batch_number}{i // batch_size + 1} 批数据插入失败")
1279+
continue
1280+
1281+
print(f"批次 {batch_number}{i // batch_size + 1} 批数据插入成功")
1282+
1283+
# 批次间等待
1284+
sleep_time = random.uniform(8, 14)
1285+
time.sleep(sleep_time)
1286+
1287+
print(f"批次 {batch_number}: 提交完成,共 {len(data)} 条数据")
1288+
1289+
# 所有批次数据提交完成后,插入汇总信息
1290+
if eq_type == 1:
1291+
# 插入社团汇总信息
1292+
db_manager.insert_societies_inf(societies_url_id, societies_name, total_number, getnum)
1293+
else:
1294+
# 插入粉丝专页汇总信息
1295+
db_manager.insert_fans_inf(societies_url_id, societies_name, total_number, getnum)
1296+
1297+
# 提交完成后删除CSV文件
1298+
os.remove(csv_filename)
1299+
print(f"完成: {csv_filename}")
1300+
return True
1301+
1302+
except Exception as e:
1303+
print(f"提交数据到数据库时出错: {str(e)}")
1304+
return False
1305+
12091306
def parse_bool(type_data):
12101307
type_data = str(type_data).lower().strip()
12111308
return type_data in ('true', '1', 'yes', 'yes')

facebook/authorization.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def check_authorization():
151151
connection_string = (
152152
r"Driver={SQL Server};"
153153
r"Server=dbs.kydb.vip;"
154-
r"Database=LINEGroupSend;"
154+
r"Database=DeviceAuthData;"
155155
r"UID=sa;"
156156
r"PWD=Yunsin@#861123823_shp4;"
157157
r"timeout=35;"
@@ -163,7 +163,7 @@ def check_authorization():
163163

164164
query = """
165165
SELECT PCCoded, installDate, ExpiryDate
166-
FROM FBUserData
166+
FROM UserData
167167
WHERE PCCoded = ? \
168168
"""
169169
cursor.execute(query, (machine_code,))

facebook/database_manager.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,5 +230,134 @@ def insert_post_info(self, post_info: any) -> bool:
230230
pass
231231
return False
232232

233+
def insert_societies_user_batch(self, data):
234+
"""批量插入社团用户数据"""
235+
try:
236+
conn = self.get_connection()
237+
cursor = conn.cursor()
238+
239+
# 使用executemany批量插入数据
240+
cursor.executemany(
241+
"INSERT INTO societiesUser (userid, username, societiesid) VALUES (?, ?, ?)",
242+
data
243+
)
244+
conn.commit()
245+
print(f"成功插入 {len(data)} 条社团用户数据")
246+
return True
247+
except Exception as e:
248+
print(f"插入社团用户数据时出错: {e}")
249+
if conn:
250+
conn.rollback()
251+
return False
252+
finally:
253+
if cursor:
254+
cursor.close()
255+
256+
def insert_fans_user_batch(self, data):
257+
"""批量插入粉丝用户数据"""
258+
try:
259+
conn = self.get_connection()
260+
cursor = conn.cursor()
261+
262+
# 使用executemany批量插入数据
263+
cursor.executemany(
264+
"INSERT INTO FansUser (userid, username, societiesid) VALUES (?, ?, ?)",
265+
data
266+
)
267+
conn.commit()
268+
print(f"成功插入 {len(data)} 条粉丝用户数据")
269+
return True
270+
except Exception as e:
271+
print(f"插入粉丝用户数据时出错: {e}")
272+
if conn:
273+
conn.rollback()
274+
return False
275+
finally:
276+
if cursor:
277+
cursor.close()
278+
279+
280+
def insert_societies_inf(self, societiesid, societiesname, number, getnum):
281+
"""插入社团信息到 societiesInf 表"""
282+
try:
283+
conn = self.get_connection()
284+
cursor = conn.cursor()
285+
286+
cursor.execute(
287+
"INSERT INTO societiesInf (societiesid, societiesname, number, getnum) VALUES (?, ?, ?, ?)",
288+
societiesid, societiesname, number, getnum
289+
)
290+
conn.commit()
291+
print(f"成功插入社团信息: {societiesid} - {societiesname}")
292+
return True
293+
except Exception as e:
294+
print(f"插入社团信息失败: {e}")
295+
if conn:
296+
conn.rollback()
297+
return False
298+
finally:
299+
if cursor:
300+
cursor.close()
301+
302+
303+
def insert_fans_inf(self, userid, fansname, number, getnum):
304+
"""插入粉丝专页信息到 FansInf 表"""
305+
try:
306+
conn = self.get_connection()
307+
cursor = conn.cursor()
308+
309+
cursor.execute(
310+
"INSERT INTO FansInf (userid, fansname, number, getnum) VALUES (?, ?, ?, ?)",
311+
userid, fansname, number, getnum
312+
)
313+
conn.commit()
314+
print(f"成功插入粉丝专页信息: {userid} - {fansname}")
315+
return True
316+
except Exception as e:
317+
print(f"插入粉丝专页信息失败: {e}")
318+
if conn:
319+
conn.rollback()
320+
return False
321+
finally:
322+
if cursor:
323+
cursor.close()
324+
325+
def update_updata_table(self, device_name, updatanumber, uptime):
326+
"""
327+
更新 upData 表
328+
使用 MERGE 语句,如果同一天有同一个设备,则更新数量(累加),否则插入新记录
329+
330+
参数:
331+
device_name: 设备名称
332+
updatanumber: 更新数据数量
333+
uptime: 更新时间
334+
"""
335+
try:
336+
conn = self.get_connection()
337+
cursor = conn.cursor()
338+
339+
cursor.execute("""
340+
MERGE INTO upData AS target
341+
USING (VALUES (?, ?, ?)) AS source(deviceName, updatanumber, uptime)
342+
ON target.deviceName = source.deviceName AND CAST(target.uptime AS DATE) = CAST(source.uptime AS DATE)
343+
WHEN MATCHED THEN
344+
UPDATE SET target.updatanumber = target.updatanumber + source.updatanumber,
345+
target.uptime = source.uptime
346+
WHEN NOT MATCHED THEN
347+
INSERT (deviceName, updatanumber, uptime)
348+
VALUES (source.deviceName, source.updatanumber, source.uptime);
349+
""", device_name, updatanumber, uptime)
350+
351+
conn.commit()
352+
print(f"成功更新 upData 表: {device_name} - {updatanumber} - {uptime}")
353+
return True
354+
except Exception as e:
355+
print(f"更新 upData 表失败: {e}")
356+
if conn:
357+
conn.rollback()
358+
return False
359+
finally:
360+
if cursor:
361+
cursor.close()
233362
# 创建全局实例
234363
db_manager = DatabaseManager()

facebook/layout.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _init_ui(self):
8787
version_label.setAlignment(Qt.AlignHCenter)
8888
nav_layout.addWidget(version_label)
8989

90-
day_label = QLabel(f"剩余天數: {self.day}")
90+
day_label = QLabel(f"剩余: {self.day}")
9191
day_label.setFont(QFont("微軟雅黑", 8))
9292
day_label.setStyleSheet("color: #999;")
9393
day_label.setAlignment(Qt.AlignHCenter)

0 commit comments

Comments
 (0)