#!/usr/bin/env node require('dotenv').config({ path: '.env.local' }); const axios = require('axios'); const AWS = require('aws-sdk'); const { v4: uuidv4 } = require('uuid'); const cheerio = require('cheerio'); // Configure AWS AWS.config.update({ region: process.env.AWS_REGION || 'eu-central-1' }); const dynamodb = new AWS.DynamoDB.DocumentClient(); const TABLE_NAME = process.env.DYNAMODB_TABLE_NAME || 'filamenteka-filaments'; // Confluence configuration const CONFLUENCE_API_URL = process.env.CONFLUENCE_API_URL; const CONFLUENCE_TOKEN = process.env.CONFLUENCE_TOKEN; const CONFLUENCE_PAGE_ID = process.env.CONFLUENCE_PAGE_ID; async function fetchConfluenceData() { try { console.log('Fetching data from Confluence...'); const response = await axios.get( `${CONFLUENCE_API_URL}/wiki/rest/api/content/${CONFLUENCE_PAGE_ID}?expand=body.storage`, { headers: { 'Authorization': `Basic ${Buffer.from(CONFLUENCE_TOKEN).toString('base64')}`, 'Accept': 'application/json' } } ); const htmlContent = response.data.body.storage.value; return parseConfluenceTable(htmlContent); } catch (error) { console.error('Error fetching from Confluence:', error.message); throw error; } } function parseConfluenceTable(html) { const $ = cheerio.load(html); const filaments = []; // Find the table and iterate through rows $('table').find('tr').each((index, row) => { // Skip header row if (index === 0) return; const cells = $(row).find('td'); if (cells.length >= 9) { const filament = { brand: $(cells[0]).text().trim(), tip: $(cells[1]).text().trim(), finish: $(cells[2]).text().trim(), boja: $(cells[3]).text().trim(), refill: $(cells[4]).text().trim(), vakum: $(cells[5]).text().trim(), otvoreno: $(cells[6]).text().trim(), kolicina: $(cells[7]).text().trim(), cena: $(cells[8]).text().trim() }; // Only add if row has valid data if (filament.brand || filament.boja) { filaments.push(filament); } } }); return filaments; } async function clearDynamoTable() { console.log('Clearing existing data from DynamoDB...'); // Scan all items const scanParams = { TableName: TABLE_NAME, ProjectionExpression: 'id' }; try { const scanResult = await dynamodb.scan(scanParams).promise(); if (scanResult.Items.length === 0) { console.log('Table is already empty'); return; } // Delete in batches const deleteRequests = scanResult.Items.map(item => ({ DeleteRequest: { Key: { id: item.id } } })); // DynamoDB batchWrite supports max 25 items for (let i = 0; i < deleteRequests.length; i += 25) { const batch = deleteRequests.slice(i, i + 25); const params = { RequestItems: { [TABLE_NAME]: batch } }; await dynamodb.batchWrite(params).promise(); console.log(`Deleted ${batch.length} items`); } console.log('Table cleared successfully'); } catch (error) { console.error('Error clearing table:', error); throw error; } } async function migrateToDynamoDB(filaments) { console.log(`Migrating ${filaments.length} filaments to DynamoDB...`); // Check if table exists try { const dynamo = new AWS.DynamoDB(); await dynamo.describeTable({ TableName: TABLE_NAME }).promise(); console.log(`Table ${TABLE_NAME} exists`); } catch (error) { if (error.code === 'ResourceNotFoundException') { console.error(`Table ${TABLE_NAME} does not exist. Please run Terraform first.`); process.exit(1); } throw error; } // Add IDs and timestamps const itemsToInsert = filaments.map(item => ({ id: uuidv4(), ...item, createdAt: new Date().toISOString(), updatedAt: new Date().toISOString() })); // Batch write items (max 25 per batch) const chunks = []; for (let i = 0; i < itemsToInsert.length; i += 25) { chunks.push(itemsToInsert.slice(i, i + 25)); } let totalMigrated = 0; for (const chunk of chunks) { const params = { RequestItems: { [TABLE_NAME]: chunk.map(item => ({ PutRequest: { Item: item } })) } }; try { await dynamodb.batchWrite(params).promise(); totalMigrated += chunk.length; console.log(`Migrated ${totalMigrated}/${itemsToInsert.length} items`); } catch (error) { console.error('Error writing batch:', error); throw error; } } console.log('Migration completed successfully!'); return totalMigrated; } async function main() { try { let filaments; // Check for --clear flag const shouldClear = process.argv.includes('--clear'); if (shouldClear) { await clearDynamoTable(); } if (CONFLUENCE_API_URL && CONFLUENCE_TOKEN && CONFLUENCE_PAGE_ID) { // Fetch from Confluence console.log('Using Confluence as data source'); filaments = await fetchConfluenceData(); } else { console.log('Confluence credentials not found, using local mock data...'); const fs = require('fs'); const data = JSON.parse(fs.readFileSync('../public/data.json', 'utf8')); filaments = data; } console.log(`Found ${filaments.length} filaments to migrate`); // Show sample data if (filaments.length > 0) { console.log('\nSample data:'); console.log(JSON.stringify(filaments[0], null, 2)); } // Migrate to DynamoDB const migrated = await migrateToDynamoDB(filaments); // Verify migration const params = { TableName: TABLE_NAME, Select: 'COUNT' }; const result = await dynamodb.scan(params).promise(); console.log(`\nVerification: ${result.Count} total items now in DynamoDB`); // Show sample from DynamoDB const sampleParams = { TableName: TABLE_NAME, Limit: 1 }; const sampleResult = await dynamodb.scan(sampleParams).promise(); if (sampleResult.Items.length > 0) { console.log('\nSample from DynamoDB:'); console.log(JSON.stringify(sampleResult.Items[0], null, 2)); } } catch (error) { console.error('Migration failed:', error); process.exit(1); } } // Run migration if (require.main === module) { console.log('Confluence to DynamoDB Migration Tool'); console.log('====================================='); console.log('Usage: node migrate-with-parser.js [--clear]'); console.log(' --clear: Clear existing data before migration\n'); main(); }