feat/match_collector: batch match import
This commit is contained in:
+12
-8
@@ -1,21 +1,25 @@
|
||||
services:
|
||||
# Development MongoDB with performance optimizations
|
||||
# Development MongoDB with memory optimizations
|
||||
mongodb:
|
||||
image: mongo:latest
|
||||
image: mongo:8.3.4
|
||||
container_name: buildpath-mongodb
|
||||
ports:
|
||||
- "27017:27017"
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: ${MONGO_USER:-root}
|
||||
MONGO_INITDB_ROOT_PASSWORD: ${MONGO_PASS:-password}
|
||||
GLIBC_TUNABLES: glibc.pthread.rseq=1
|
||||
volumes:
|
||||
- ./data/db:/data/db
|
||||
command: mongod --wiredTigerCacheSizeGB 4 --quiet
|
||||
healthcheck:
|
||||
test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
retries: 30
|
||||
# Reduced cache size to leave more RAM for the import script
|
||||
# WiredTiger cache is now 2GB (was 4GB) to prevent OOM during large imports
|
||||
command: mongod --wiredTigerCacheSizeGB 2 --quiet
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4G
|
||||
reservations:
|
||||
memory: 2G
|
||||
|
||||
mongo-express:
|
||||
image: mongo-express
|
||||
|
||||
@@ -31,19 +31,29 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
const collection = db.collection(collectionName);
|
||||
|
||||
try {
|
||||
// Create indexes first for better performance
|
||||
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true });
|
||||
await collection.createIndex({ "info.gameDuration": 1 });
|
||||
await collection.createIndex({ "info.participants.championId": 1 });
|
||||
await collection.createIndex({ "info.participants.win": 1 });
|
||||
|
||||
// Check file size
|
||||
// Check file size first
|
||||
const fileStats = fs.statSync(filePath);
|
||||
const fileSize = (fileStats.size / (1024 * 1024 * 1024)).toFixed(2);
|
||||
console.log(` 📊 File size: ${fileSize} GB`);
|
||||
|
||||
// Defer index creation to after import to reduce memory pressure
|
||||
// Only create the unique matchId index before import to prevent duplicates
|
||||
console.log(` 📇 Creating unique matchId index...`);
|
||||
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true, background: false });
|
||||
|
||||
await processLineDelimitedFormat(filePath, collection, batchSize, startTime);
|
||||
|
||||
// Create additional indexes after import to reduce memory pressure
|
||||
console.log(`\n 📇 Creating additional indexes (this may take a while)...`);
|
||||
try {
|
||||
await collection.createIndex({ "info.gameDuration": 1 }, { background: true });
|
||||
await collection.createIndex({ "info.participants.championId": 1 }, { background: true });
|
||||
await collection.createIndex({ "info.participants.win": 1 }, { background: true });
|
||||
console.log(` ✅ Indexes created successfully`);
|
||||
} catch (indexError) {
|
||||
console.log(` ⚠️ Warning: Could not create additional indexes: ${indexError.message}`);
|
||||
}
|
||||
|
||||
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
console.log(`🎉 Import complete in ${totalTime} seconds`);
|
||||
console.log(`✅ Processed: ${processed.toLocaleString()} matches`);
|
||||
@@ -66,6 +76,7 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
|
||||
let batch = [];
|
||||
let lineCount = 0;
|
||||
let batchCount = 0;
|
||||
|
||||
for await (const line of rl) {
|
||||
lineCount++;
|
||||
@@ -88,9 +99,16 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
batch.push(match);
|
||||
|
||||
if (batch.length >= batchSize) {
|
||||
process.stdout.write(`\r Inserting batch into MongoDB... `);
|
||||
batchCount++;
|
||||
process.stdout.write(`\r Inserting batch #${batchCount} (${batch.length} matches)... `);
|
||||
await insertBatch(batch, collection);
|
||||
batch = [];
|
||||
|
||||
// Force garbage collection hint every 10 batches by yielding to the event loop
|
||||
// This helps reduce memory pressure when processing large files
|
||||
if (batchCount % 10 === 0) {
|
||||
await new Promise(resolve => setImmediate(resolve));
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
skipped++;
|
||||
@@ -99,8 +117,11 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
|
||||
// Insert remaining matches
|
||||
if (batch.length > 0) {
|
||||
process.stdout.write(`\r Inserting final batch (${batch.length} matches)... `);
|
||||
await insertBatch(batch, collection);
|
||||
}
|
||||
|
||||
console.log(`\n 📊 Total batches inserted: ${batchCount + 1}`);
|
||||
}
|
||||
|
||||
async function insertBatch(batch, collection) {
|
||||
|
||||
Reference in New Issue
Block a user