Add URL endpoint testing script
- Test different Ruliweb URLs (search, board, best, main) - Result: All endpoints return 403 "Access denied" - Confirms that Ruliweb blocks all bot requests - Validates that Puppeteer/Selenium is required
This commit is contained in:
34
crawler/test-search.js
Normal file
34
crawler/test-search.js
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||||
|
|
||||||
|
async function testUrls() {
|
||||||
|
const urls = [
|
||||||
|
'https://bbs.ruliweb.com/search?q=test',
|
||||||
|
'https://bbs.ruliweb.com/community/board/300143',
|
||||||
|
'https://bbs.ruliweb.com/best/selection',
|
||||||
|
'https://bbs.ruliweb.com/',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const url of urls) {
|
||||||
|
try {
|
||||||
|
console.log(`\nTesting: ${url}`);
|
||||||
|
const response = await axios.get(url, {
|
||||||
|
headers: {
|
||||||
|
'User-Agent': USER_AGENT,
|
||||||
|
},
|
||||||
|
timeout: 10000,
|
||||||
|
});
|
||||||
|
console.log(`✅ SUCCESS - Status: ${response.status}, Length: ${response.data.length}`);
|
||||||
|
} catch (error) {
|
||||||
|
if (error.response) {
|
||||||
|
const body = String(error.response.data).substring(0, 50);
|
||||||
|
console.log(`❌ FAILED - Status: ${error.response.status}, Body: ${body}`);
|
||||||
|
} else {
|
||||||
|
console.log(`❌ ERROR - ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
testUrls();
|
||||||
Reference in New Issue
Block a user