Files
community-crawler/crawler/test-search.js
Claude d62867e0cb Add URL endpoint testing script
- Test different Ruliweb URLs (search, board, best, main)
- Result: All endpoints return 403 "Access denied"
- Confirms that Ruliweb blocks all bot requests
- Validates that Puppeteer/Selenium is required
2025-11-15 17:34:10 +00:00

35 lines
1014 B
JavaScript

import axios from 'axios';
const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
async function testUrls() {
const urls = [
'https://bbs.ruliweb.com/search?q=test',
'https://bbs.ruliweb.com/community/board/300143',
'https://bbs.ruliweb.com/best/selection',
'https://bbs.ruliweb.com/',
];
for (const url of urls) {
try {
console.log(`\nTesting: ${url}`);
const response = await axios.get(url, {
headers: {
'User-Agent': USER_AGENT,
},
timeout: 10000,
});
console.log(`✅ SUCCESS - Status: ${response.status}, Length: ${response.data.length}`);
} catch (error) {
if (error.response) {
const body = String(error.response.data).substring(0, 50);
console.log(`❌ FAILED - Status: ${error.response.status}, Body: ${body}`);
} else {
console.log(`❌ ERROR - ${error.message}`);
}
}
}
}
testUrls();