Add HTML structure debugging script
- Create debug-html.ts to save actual HTML from pages - Add npm run debug command to package.json - Helps identify correct CSS selectors for crawlers - Saves HTML to debug-ruliweb.html and debug-arcalive.html Usage: npm run debug
This commit is contained in:
@@ -8,7 +8,8 @@
|
||||
"dev": "tsx watch src/index.ts",
|
||||
"build": "tsc",
|
||||
"start": "node dist/index.js",
|
||||
"test": "tsx src/test.ts"
|
||||
"test": "tsx src/test.ts",
|
||||
"debug": "tsx src/debug-html.ts"
|
||||
},
|
||||
"keywords": [
|
||||
"crawler",
|
||||
|
||||
47
crawler/src/debug-html.ts
Normal file
47
crawler/src/debug-html.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { PuppeteerFetcher } from './utils/puppeteer-fetcher.js';
|
||||
import { Logger } from './utils/logger.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
async function debugHTML() {
|
||||
Logger.info('========== HTML Structure Debug ==========');
|
||||
|
||||
const urls = [
|
||||
{ name: 'Ruliweb', url: 'https://bbs.ruliweb.com/community/board/300143' },
|
||||
{ name: 'Arcalive', url: 'https://arca.live/b/breaking' },
|
||||
];
|
||||
|
||||
for (const { name, url } of urls) {
|
||||
Logger.info(`Fetching ${name}: ${url}`);
|
||||
|
||||
const html = await PuppeteerFetcher.fetchHTML(url);
|
||||
|
||||
if (html) {
|
||||
// HTML을 파일로 저장
|
||||
const filename = `debug-${name.toLowerCase()}.html`;
|
||||
const filepath = path.join(__dirname, filename);
|
||||
await fs.writeFile(filepath, html, 'utf-8');
|
||||
Logger.success(`Saved HTML to ${filename} (${html.length} bytes)`);
|
||||
|
||||
// HTML 미리보기 (처음 500자)
|
||||
Logger.info(`HTML Preview:\n${html.substring(0, 500)}...`);
|
||||
} else {
|
||||
Logger.error(`Failed to fetch ${name}`);
|
||||
}
|
||||
|
||||
await PuppeteerFetcher.delay();
|
||||
}
|
||||
|
||||
await PuppeteerFetcher.closeBrowser();
|
||||
Logger.info('========== Debug completed ==========');
|
||||
Logger.info('Check debug-ruliweb.html and debug-arcalive.html files');
|
||||
}
|
||||
|
||||
debugHTML().catch((error) => {
|
||||
Logger.error('Debug failed', error);
|
||||
PuppeteerFetcher.closeBrowser().finally(() => process.exit(1));
|
||||
});
|
||||
Reference in New Issue
Block a user