From dd4345267a03edd4ee4ee51ef2aefefff4bdca4b Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 15 Nov 2025 17:57:35 +0000 Subject: [PATCH] Add HTML structure debugging script - Create debug-html.ts to save actual HTML from pages - Add npm run debug command to package.json - Helps identify correct CSS selectors for crawlers - Saves HTML to debug-ruliweb.html and debug-arcalive.html Usage: npm run debug --- crawler/package.json | 3 ++- crawler/src/debug-html.ts | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 crawler/src/debug-html.ts diff --git a/crawler/package.json b/crawler/package.json index 27ea2b4..f682428 100644 --- a/crawler/package.json +++ b/crawler/package.json @@ -8,7 +8,8 @@ "dev": "tsx watch src/index.ts", "build": "tsc", "start": "node dist/index.js", - "test": "tsx src/test.ts" + "test": "tsx src/test.ts", + "debug": "tsx src/debug-html.ts" }, "keywords": [ "crawler", diff --git a/crawler/src/debug-html.ts b/crawler/src/debug-html.ts new file mode 100644 index 0000000..bbecccc --- /dev/null +++ b/crawler/src/debug-html.ts @@ -0,0 +1,47 @@ +import { promises as fs } from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; +import { PuppeteerFetcher } from './utils/puppeteer-fetcher.js'; +import { Logger } from './utils/logger.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +async function debugHTML() { + Logger.info('========== HTML Structure Debug =========='); + + const urls = [ + { name: 'Ruliweb', url: 'https://bbs.ruliweb.com/community/board/300143' }, + { name: 'Arcalive', url: 'https://arca.live/b/breaking' }, + ]; + + for (const { name, url } of urls) { + Logger.info(`Fetching ${name}: ${url}`); + + const html = await PuppeteerFetcher.fetchHTML(url); + + if (html) { + // HTML을 파일로 저장 + const filename = `debug-${name.toLowerCase()}.html`; + const filepath = path.join(__dirname, filename); + await fs.writeFile(filepath, html, 'utf-8'); + Logger.success(`Saved HTML to ${filename} (${html.length} bytes)`); + + // HTML 미리보기 (처음 500자) + Logger.info(`HTML Preview:\n${html.substring(0, 500)}...`); + } else { + Logger.error(`Failed to fetch ${name}`); + } + + await PuppeteerFetcher.delay(); + } + + await PuppeteerFetcher.closeBrowser(); + Logger.info('========== Debug completed =========='); + Logger.info('Check debug-ruliweb.html and debug-arcalive.html files'); +} + +debugHTML().catch((error) => { + Logger.error('Debug failed', error); + PuppeteerFetcher.closeBrowser().finally(() => process.exit(1)); +});