https://h5.threeperson.com/#/ 群二维码平台,每天更新300-500群二维码,采集平台主要是来自抖音。
脚本如下。
// dy.js - 抖音自动化爬虫(核心功能 JS 版)
const saveDir = 'C:/Users/40650/Desktop/qrcode';
const infoTxt = 'qrcode_info.txt'
async function downloadImage(url) {
// 在 isWeixin
// 只识别二维码,不下载图片,直接POST数据
const headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Referer': 'https://www.douyin.com/'
};
try {
const res = await fetch(url, { headers });
if (res.status === 200) {
const arrayBuffer = await res.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
console.log('准备识别图片:', url, 'buffer长度:', buffer.length);
// 识别二维码
const isQR = await isWeixinQRCode(buffer);
if (!isQR) {
console.log('❌ 非微信2维码图片,跳过提交');
return null;
}
// 识别为二维码,构建json并POST
return buffer;
} else {
console.log('❌ 下载失败,状态码:', res.status);
}
} catch (e) {
console.log('❌ 下载出错:', e);
}
return null;
}
// 判断图片 buffer 是否为2二维码(动态 import 兼容 ESM)
// 使用 sharp + @zxing/library 实现 buffer 2维码识别
async function isWeixinQRCode(buffer) {
console.log('开始识别2维码...');
const sharp = (await import('sharp')).default || (await import('sharp'));
const ZXingModule = await import('@zxing/library');
const { MultiFormatReader, BarcodeFormat, RGBLuminanceSource, BinaryBitmap, HybridBinarizer, DecodeHintType } = ZXingModule;
// 多种预处理:原图、灰度、反色、灰度反色
const preprocessList = [
async img => img,
async img => img.clone().greyscale(),
async img => img.clone().negate(),
async img => img.clone().greyscale().negate(),
];
try {
for (const preprocess of preprocessList) {
let image = sharp(buffer);
image = await preprocess(image);
const { width, height } = await image.metadata();
const raw = await image.ensureAlpha().raw().toBuffer();
const luminances = new Uint8ClampedArray(width * height);
for (let i = 0; i < width * height; i++) {
// 灰度 = R*0.299 + G*0.587 + B*0.114
const r = raw[i * 4];
const g = raw[i * 4 + 1];
const b = raw[i * 4 + 2];
luminances[i] = 0.299 * r + 0.587 * g + 0.114 * b;
}
const source = new RGBLuminanceSource(luminances, width, height);
const bitmap = new BinaryBitmap(new HybridBinarizer(source));
const reader = new MultiFormatReader();
const hints = new Map();
hints.set(DecodeHintType.POSSIBLE_FORMATS, [BarcodeFormat.QR_CODE]);
reader.setHints(hints);
try {
const result = reader.decode(bitmap);
const qrText = result.getText().toLowerCase();
console.log('✅ 检测图片内容:', qrText);
if (qrText.includes('weixin') || qrText.includes('wx') || qrText.includes('wechat')) {
console.log('✅ 检测到微信二维码:', qrText);
return true;
} else {
console.log('❌ 检测到2维码,但不是2维码:', qrText);
return false;
}
} catch (err) {
// 本次预处理未识别,继续尝试下一个
}
}
console.log('❌ ZXing 所有预处理均未识别2维码');
return false;
} catch (e) {
console.log('❌ isWeixinQRCode 整体异常:', e);
return false;
}
}
async function saveInfo(title, expire, imageUrl) {
const line = `{title:${title}, expire:${expire}, url:${imageUrl}}\n`;
await fs.appendFile(infoTxt, line, 'utf8');
}
(async () => {
fetch = (await import('node-fetch')).default;
// 读取关键词文件
const keywordsPath = path.join(__dirname, 'keywords.txt');
let keywords = [];
try {
const raw = await fs.readFile(keywordsPath, 'utf8');
keywords = raw.split(/\r?\n/).map(x => x.trim()).filter(x => x);
} catch (e) {
console.log('未找到 keywords.txt 或读取失败:', e);
return;
}
for (const kw of keywords) {
// 中文转 URL 编码
const encoded = encodeURIComponent(kw);
const searchUrl = `https://www.douyin.com/search/${encoded}?aid=68376e3e-b0ff-489d-993a-b3f20675db5b&type=general`;
console.log('开始处理关键词:', kw, searchUrl);
await runDouyinSpiderWithUrl(searchUrl);
}
async function runDouyinSpiderWithUrl(searchUrl) {
// 初始化 browser/page,只在第一次关键词时创建
if (!global.__douyinPage) {
await fs.writeFile(infoTxt, '', 'utf8');
await fs.ensureDir(saveDir);
global.__douyinBrowser = await puppeteer.launch({
headless: false,
args: ['--start-maximized'],
defaultViewport: null,
executablePath: process.env.CHROME_PATH || 'C:/Program Files/Google/Chrome/Application/chrome.exe'
});
const [page] = await global.__douyinBrowser.pages();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
// 再次最大化窗口(防止部分系统无效)
const session = await page.target().createCDPSession();
const { windowId } = await session.send('Browser.getWindowForTarget');
await session.send('Browser.setWindowBounds', { windowId, bounds: { windowState: 'maximized' } });
await page.setRequestInterception(false);
// 只注册一次 response 事件
page.on('response', async (response) => {
const url = response.url();
if (url.startsWith('https://www.douyin.com/aweme/v1/web/general/search/single')) {
try {
const text = await response.text();
const json = JSON.parse(text);
if (json.data && Array.isArray(json.data)) {
const now = Math.floor(Date.now() / 1000);
const dayAgo = now - 24 * 60 * 60;
for (const item of json.data) {
if (item.aweme_info) {
const aweme = item.aweme_info;
const aweme_id = aweme.aweme_id || 'N/A';
const desc = aweme.desc || '';
const create_time = aweme.create_time || 0;
if (create_time < dayAgo) {
// 超过24小时,跳过
continue;
}
let coverUrl = null;
if (aweme.video && aweme.video.cover && aweme.video.cover.url_list && aweme.video.cover.url_list.length > 0) {
coverUrl = aweme.video.cover.url_list[0];
}
if (coverUrl) {
const buffer = await downloadImage(coverUrl);
if (buffer) {
let title = desc.slice(0, 30).replace(/\n|\r/g, ' ').trim() || '未知群名称';
// 构建json对象,只包含图片地址
const data = {
title: title,
qrcode: coverUrl,
images: coverUrl
};
// 提交到接口
try {
const resp = await fetch('https://h5.threeperson.com/xxxxx', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(data)
});
const result = await resp.text();
console.log('已提交:', data, '返回:', result);
} catch (err) {
console.log('提交失败:', err);
}
}
await new Promise(r => setTimeout(r, 1000));
}
}
}
}
} catch (e) {
// 忽略解析失败
}
}
});
global.__douyinPage = page;
}
// 切换关键词时只刷新页面
const page = global.__douyinPage;
await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
try {
// 自动点击筛选按钮,每次都重新查找,防止 ElementHandle 失效
for (let i = 0; i < 2; i++) {
await page.waitForSelector('div[tabindex="0"] span', { timeout: 10000 });
// 通过 div[tabindex="0"] 下 span 文本内容定位筛选按钮
const divs = await page.$$('div[tabindex="0"]');
console.log('-----divs.length=', divs.length);
let found = false;
for (const div of divs) {
const span = await div.$('span');
if (span) {
const text = await page.evaluate(el => el.textContent, span);
if (text && text.includes('筛选')) {
try {
await span.hover();
found = true;
await page.waitForTimeout(2000);
break;
} catch (err) {
console.log('筛选按钮点击失败,重试...', err);
continue;
}
}
}
}
if (!found) {
console.log('未找到筛选按钮,重试...');
// 通过 div[tabindex="0"] 下 span 文本内容定位筛选按钮
const divs = await page.$$('div[tabindex="0"]');
console.log('-----divs.length=', divs.length);
let found = false;
for (const div of divs) {
const span = await div.$('span');
if (span) {
const text = await page.evaluate(el => el.textContent, span);
if (text && text.includes('筛选')) {
try {
await span.hover();
found = true;
await page.waitForTimeout(2000);
break;
} catch (err) {
console.log('筛选按钮点击失败,重试...', err);
continue;
}
}
}
}
await page.waitForTimeout(1000);
}
}
let clicked = false;
let retryCount = 0;
while (!clicked && retryCount < 3) {
await page.waitForTimeout(1000); // 避免过快轮询
const timeElements = await page.$$('span.eXMmo3JR');
for (const el of timeElements) {
try {
const text = await page.evaluate(el => el.textContent, el);
if (text.includes('一天内')) {
try {
await el.click();
await page.waitForTimeout(2000);
clicked = true;
break;
} catch (err) {
console.log('"一天内"按钮点击失败,重试...', err);
continue;
}
}
} catch (err) {
// 元素已失效,跳过
continue;
}
}
if (!clicked) {
retryCount++;
console.log('未检测到"一天内"按钮,5秒后重试...');
await page.waitForTimeout(5000);
}
}
if (clicked) {
// 检测到并点击后,自动滑动
console.log('已自动筛选"一天内",开始自动滑动...');
let lastHeight = await page.evaluate('document.body.scrollHeight');
let noChangeCount = 0;
let maxNoChange = 5; // 最多滑动5次
for (let i = 0; i < maxNoChange; i++) {
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.waitForTimeout(3000); // 每次滑动停留3秒
let newHeight = await page.evaluate('document.body.scrollHeight');
if (newHeight === lastHeight) {
noChangeCount++;
if (noChangeCount >= 2) { // 连续2次无新数据则提前停止
console.log('页面无新数据,提前停止滑动。');
break;
}
} else {
noChangeCount = 0;
lastHeight = newHeight;
}
}
console.log('滑动结束,页面已到底或无新内容。');
} else {
console.log('自动筛选失败,可手动筛选。');
}
} catch (e) {
console.log('自动筛选失败,可手动筛选。', e);
}
// 不关闭浏览器,直接进入下一个关键词
}
})();