feat/match_collector: batch match import
pipeline / lint-and-format (push) Successful in 4m22s
pipeline / build-and-push-images (push) Successful in 25s

This commit is contained in:
2026-06-27 12:33:36 +02:00
parent e6ddc27d5c
commit d878af6d1a
3 changed files with 118 additions and 23 deletions
+12 -8
View File
@@ -1,21 +1,25 @@
services:
# Development MongoDB with performance optimizations
# Development MongoDB with memory optimizations
mongodb:
image: mongo:latest
image: mongo:8.3.4
container_name: buildpath-mongodb
ports:
- "27017:27017"
environment:
MONGO_INITDB_ROOT_USERNAME: ${MONGO_USER:-root}
MONGO_INITDB_ROOT_PASSWORD: ${MONGO_PASS:-password}
GLIBC_TUNABLES: glibc.pthread.rseq=1
volumes:
- ./data/db:/data/db
command: mongod --wiredTigerCacheSizeGB 4 --quiet
healthcheck:
test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
interval: 5s
timeout: 2s
retries: 30
# Reduced cache size to leave more RAM for the import script
# WiredTiger cache is now 2GB (was 4GB) to prevent OOM during large imports
command: mongod --wiredTigerCacheSizeGB 2 --quiet
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 2G
mongo-express:
image: mongo-express
+29 -8
View File
@@ -31,19 +31,29 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
const collection = db.collection(collectionName);
try {
// Create indexes first for better performance
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true });
await collection.createIndex({ "info.gameDuration": 1 });
await collection.createIndex({ "info.participants.championId": 1 });
await collection.createIndex({ "info.participants.win": 1 });
// Check file size
// Check file size first
const fileStats = fs.statSync(filePath);
const fileSize = (fileStats.size / (1024 * 1024 * 1024)).toFixed(2);
console.log(` 📊 File size: ${fileSize} GB`);
// Defer index creation to after import to reduce memory pressure
// Only create the unique matchId index before import to prevent duplicates
console.log(` 📇 Creating unique matchId index...`);
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true, background: false });
await processLineDelimitedFormat(filePath, collection, batchSize, startTime);
// Create additional indexes after import to reduce memory pressure
console.log(`\n 📇 Creating additional indexes (this may take a while)...`);
try {
await collection.createIndex({ "info.gameDuration": 1 }, { background: true });
await collection.createIndex({ "info.participants.championId": 1 }, { background: true });
await collection.createIndex({ "info.participants.win": 1 }, { background: true });
console.log(` ✅ Indexes created successfully`);
} catch (indexError) {
console.log(` ⚠️ Warning: Could not create additional indexes: ${indexError.message}`);
}
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`🎉 Import complete in ${totalTime} seconds`);
console.log(`✅ Processed: ${processed.toLocaleString()} matches`);
@@ -66,6 +76,7 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
let batch = [];
let lineCount = 0;
let batchCount = 0;
for await (const line of rl) {
lineCount++;
@@ -88,9 +99,16 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
batch.push(match);
if (batch.length >= batchSize) {
process.stdout.write(`\r Inserting batch into MongoDB... `);
batchCount++;
process.stdout.write(`\r Inserting batch #${batchCount} (${batch.length} matches)... `);
await insertBatch(batch, collection);
batch = [];
// Force garbage collection hint every 10 batches by yielding to the event loop
// This helps reduce memory pressure when processing large files
if (batchCount % 10 === 0) {
await new Promise(resolve => setImmediate(resolve));
}
}
} catch (error) {
skipped++;
@@ -99,8 +117,11 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
// Insert remaining matches
if (batch.length > 0) {
process.stdout.write(`\r Inserting final batch (${batch.length} matches)... `);
await insertBatch(batch, collection);
}
console.log(`\n 📊 Total batches inserted: ${batchCount + 1}`);
}
async function insertBatch(batch, collection) {