fix(sqlite): improve parser to handle tables without column types and fix column detection (#914)

This commit is contained in:
Jonathan Fishner
2025-09-15 20:50:44 +03:00
committed by GitHub
parent e6783a89cc
commit d3dbf41894
3 changed files with 362 additions and 92 deletions

View File

@@ -43,8 +43,8 @@ const DDLInstructionsMap: Record<DatabaseType, DDLInstruction[]> = {
},
{
text: 'Execute the following command in your terminal:',
code: `sqlite3 <database_file_path>\n.dump > <output_file_path>`,
example: `sqlite3 my_db.db\n.dump > schema_export.sql`,
code: `sqlite3 <database_file_path>\n".schema" > <output_file_path>`,
example: `sqlite3 my_db.db\n".schema" > schema_export.sql`,
},
{
text: 'Open the exported SQL file, copy its contents, and paste them here.',

View File

@@ -0,0 +1,178 @@
import { describe, it, expect } from 'vitest';
import { fromSQLite } from '../sqlite';
describe('SQLite Import Tests', () => {
it('should parse SQLite script with sqlite_sequence table and all relationships', async () => {
const sql = `
CREATE TABLE users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT,
age INTEGER
);
CREATE TABLE sqlite_sequence(name,seq);
CREATE TABLE products (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT,
price REAL
);
CREATE TABLE user_products (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
product_id INTEGER NOT NULL,
purchased_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id),
FOREIGN KEY (product_id) REFERENCES products(id)
);
`;
const result = await fromSQLite(sql);
// ============= CHECK TOTAL COUNTS =============
// Should have exactly 4 tables
expect(result.tables).toHaveLength(4);
// Should have exactly 2 foreign key relationships
expect(result.relationships).toHaveLength(2);
// ============= CHECK USERS TABLE =============
const usersTable = result.tables.find((t) => t.name === 'users');
expect(usersTable).toBeDefined();
expect(usersTable?.columns).toHaveLength(3); // id, name, age
// Check each column in users table
expect(usersTable?.columns[0]).toMatchObject({
name: 'id',
type: 'INTEGER',
primaryKey: true,
increment: true,
nullable: false,
});
expect(usersTable?.columns[1]).toMatchObject({
name: 'name',
type: 'TEXT',
primaryKey: false,
nullable: true,
});
expect(usersTable?.columns[2]).toMatchObject({
name: 'age',
type: 'INTEGER',
primaryKey: false,
nullable: true,
});
// ============= CHECK SQLITE_SEQUENCE TABLE =============
const sqliteSequenceTable = result.tables.find(
(t) => t.name === 'sqlite_sequence'
);
expect(sqliteSequenceTable).toBeDefined();
expect(sqliteSequenceTable?.columns).toHaveLength(2); // name, seq
// Check columns in sqlite_sequence table
expect(sqliteSequenceTable?.columns[0]).toMatchObject({
name: 'name',
type: 'TEXT', // Should default to TEXT when no type specified
primaryKey: false,
nullable: true,
});
expect(sqliteSequenceTable?.columns[1]).toMatchObject({
name: 'seq',
type: 'TEXT', // Should default to TEXT when no type specified
primaryKey: false,
nullable: true,
});
// ============= CHECK PRODUCTS TABLE =============
const productsTable = result.tables.find((t) => t.name === 'products');
expect(productsTable).toBeDefined();
expect(productsTable?.columns).toHaveLength(3); // id, name, price
// Check each column in products table
expect(productsTable?.columns[0]).toMatchObject({
name: 'id',
type: 'INTEGER',
primaryKey: true,
increment: true,
nullable: false,
});
expect(productsTable?.columns[1]).toMatchObject({
name: 'name',
type: 'TEXT',
primaryKey: false,
nullable: true,
});
expect(productsTable?.columns[2]).toMatchObject({
name: 'price',
type: 'REAL',
primaryKey: false,
nullable: true,
});
// ============= CHECK USER_PRODUCTS TABLE =============
const userProductsTable = result.tables.find(
(t) => t.name === 'user_products'
);
expect(userProductsTable).toBeDefined();
expect(userProductsTable?.columns).toHaveLength(4); // id, user_id, product_id, purchased_at
// Check each column in user_products table
expect(userProductsTable?.columns[0]).toMatchObject({
name: 'id',
type: 'INTEGER',
primaryKey: true,
increment: true,
nullable: false,
});
expect(userProductsTable?.columns[1]).toMatchObject({
name: 'user_id',
type: 'INTEGER',
primaryKey: false,
nullable: false, // NOT NULL constraint
});
expect(userProductsTable?.columns[2]).toMatchObject({
name: 'product_id',
type: 'INTEGER',
primaryKey: false,
nullable: false, // NOT NULL constraint
});
expect(userProductsTable?.columns[3]).toMatchObject({
name: 'purchased_at',
type: 'TIMESTAMP', // DATETIME should map to TIMESTAMP
primaryKey: false,
nullable: true,
default: 'CURRENT_TIMESTAMP',
});
// ============= CHECK FOREIGN KEY RELATIONSHIPS =============
// FK 1: user_products.user_id -> users.id
const userIdFK = result.relationships.find(
(r) =>
r.sourceTable === 'user_products' &&
r.sourceColumn === 'user_id' &&
r.targetTable === 'users' &&
r.targetColumn === 'id'
);
expect(userIdFK).toBeDefined();
expect(userIdFK).toMatchObject({
sourceTable: 'user_products',
sourceColumn: 'user_id',
targetTable: 'users',
targetColumn: 'id',
});
// FK 2: user_products.product_id -> products.id
const productIdFK = result.relationships.find(
(r) =>
r.sourceTable === 'user_products' &&
r.sourceColumn === 'product_id' &&
r.targetTable === 'products' &&
r.targetColumn === 'id'
);
expect(productIdFK).toBeDefined();
expect(productIdFK).toMatchObject({
sourceTable: 'user_products',
sourceColumn: 'product_id',
targetTable: 'products',
targetColumn: 'id',
});
});
});

View File

@@ -32,11 +32,11 @@ export async function fromSQLite(sqlContent: string): Promise<SQLParserResult> {
const tableMap: Record<string, string> = {}; // Maps table name to its ID
try {
// SPECIAL HANDLING: Direct line-by-line parser for SQLite DDL
// This ensures we preserve the exact data types from the original DDL
// SPECIAL HANDLING: Direct regex-based parser for SQLite DDL
// This ensures we handle all SQLite-specific syntax including tables without types
const directlyParsedTables = parseCreateTableStatements(sqlContent);
// Check if we successfully parsed tables directly
// Always try direct parsing first as it's more reliable for SQLite
if (directlyParsedTables.length > 0) {
// Map the direct parsing results to the expected SQLParserResult format
directlyParsedTables.forEach((table) => {
@@ -56,8 +56,19 @@ export async function fromSQLite(sqlContent: string): Promise<SQLParserResult> {
// Process foreign keys using the regex approach
findForeignKeysUsingRegex(sqlContent, tableMap, relationships);
// Return the result
return { tables, relationships };
// Create placeholder tables for any missing referenced tables
addPlaceholderTablesForFKReferences(
tables,
relationships,
tableMap
);
// Filter out any invalid relationships
const validRelationships = relationships.filter((rel) => {
return isValidForeignKeyRelationship(rel, tables);
});
return { tables, relationships: validRelationships };
}
// Preprocess SQL to handle SQLite quoted identifiers
@@ -130,101 +141,182 @@ function parseCreateTableStatements(sqlContent: string): {
columns: SQLColumn[];
}[] = [];
// Split SQL content into lines
const lines = sqlContent.split('\n');
let currentTable: { name: string; columns: SQLColumn[] } | null = null;
let inCreateTable = false;
// Process each line
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
// Skip empty lines and comments
if (!line || line.startsWith('--')) {
continue;
}
// Check for CREATE TABLE statement
if (line.toUpperCase().startsWith('CREATE TABLE')) {
// Extract table name
const tableNameMatch =
/CREATE\s+TABLE\s+(?:if\s+not\s+exists\s+)?["'`]?(\w+)["'`]?/i.exec(
line
);
if (tableNameMatch && tableNameMatch[1]) {
inCreateTable = true;
currentTable = {
name: tableNameMatch[1],
columns: [],
};
// Remove comments before processing
const cleanedSQL = sqlContent
.split('\n')
.map((line) => {
const commentIndex = line.indexOf('--');
if (commentIndex >= 0) {
return line.substring(0, commentIndex);
}
}
// Check for end of CREATE TABLE statement
else if (inCreateTable && line.includes(');')) {
if (currentTable) {
tables.push(currentTable);
return line;
})
.join('\n');
// Match all CREATE TABLE statements including those without column definitions
const createTableRegex =
/CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?["'`]?(\w+)["'`]?\s*\(([^;]+?)\)\s*;/gis;
let match;
while ((match = createTableRegex.exec(cleanedSQL)) !== null) {
const tableName = match[1];
const tableBody = match[2].trim();
const table: { name: string; columns: SQLColumn[] } = {
name: tableName,
columns: [],
};
// Special case: sqlite_sequence or tables with columns but no types
if (tableName === 'sqlite_sequence' || !tableBody.includes(' ')) {
// Parse simple column list without types (e.g., "name,seq")
const simpleColumns = tableBody.split(',').map((col) => col.trim());
for (const colName of simpleColumns) {
if (
colName &&
!colName.toUpperCase().startsWith('FOREIGN KEY') &&
!colName.toUpperCase().startsWith('PRIMARY KEY') &&
!colName.toUpperCase().startsWith('UNIQUE') &&
!colName.toUpperCase().startsWith('CHECK') &&
!colName.toUpperCase().startsWith('CONSTRAINT')
) {
table.columns.push({
name: colName.replace(/["'`]/g, ''),
type: 'TEXT', // Default to TEXT for untyped columns
nullable: true,
primaryKey: false,
unique: false,
default: '',
increment: false,
});
}
}
inCreateTable = false;
currentTable = null;
}
// Process column definitions inside CREATE TABLE
else if (inCreateTable && currentTable && line.includes('"')) {
// Column line pattern optimized for user's DDL format
const columnPattern = /\s*["'`](\w+)["'`]\s+([A-Za-z0-9_]+)(.+)?/i;
const match = columnPattern.exec(line);
} else {
// Parse normal table with typed columns
// Split by commas not inside parentheses
const columnDefs = [];
let current = '';
let parenDepth = 0;
if (match) {
const columnName = match[1];
const rawType = match[2].toUpperCase();
const restOfLine = match[3] || '';
for (let i = 0; i < tableBody.length; i++) {
const char = tableBody[i];
if (char === '(') parenDepth++;
else if (char === ')') parenDepth--;
else if (char === ',' && parenDepth === 0) {
columnDefs.push(current.trim());
current = '';
continue;
}
current += char;
}
if (current.trim()) {
columnDefs.push(current.trim());
}
// Determine column properties
const isPrimaryKey = restOfLine
.toUpperCase()
.includes('PRIMARY KEY');
const isNotNull = restOfLine.toUpperCase().includes('NOT NULL');
const isUnique = restOfLine.toUpperCase().includes('UNIQUE');
for (const columnDef of columnDefs) {
const line = columnDef.trim();
// Extract default value
let defaultValue = '';
const defaultMatch = /DEFAULT\s+([^,\s)]+)/i.exec(restOfLine);
if (defaultMatch) {
defaultValue = defaultMatch[1];
// Skip constraints
if (
line.toUpperCase().startsWith('FOREIGN KEY') ||
line.toUpperCase().startsWith('PRIMARY KEY') ||
line.toUpperCase().startsWith('UNIQUE') ||
line.toUpperCase().startsWith('CHECK') ||
line.toUpperCase().startsWith('CONSTRAINT')
) {
continue;
}
// Map to appropriate SQLite storage class
let columnType = rawType;
if (rawType === 'INTEGER' || rawType === 'INT') {
columnType = 'INTEGER';
} else if (
['REAL', 'FLOAT', 'DOUBLE', 'NUMERIC', 'DECIMAL'].includes(
rawType
)
) {
columnType = 'REAL';
} else if (rawType === 'BLOB' || rawType === 'BINARY') {
columnType = 'BLOB';
} else if (
['TIMESTAMP', 'DATETIME', 'DATE'].includes(rawType)
) {
columnType = 'TIMESTAMP';
} else {
columnType = 'TEXT';
}
// Parse column: handle both quoted and unquoted identifiers
// Pattern: [quotes]columnName[quotes] dataType [constraints]
const columnPattern = /^["'`]?([\w]+)["'`]?\s+(\w+)(.*)$/i;
const columnMatch = columnPattern.exec(line);
// Add column to the table
currentTable.columns.push({
name: columnName,
type: columnType,
nullable: !isNotNull,
primaryKey: isPrimaryKey,
unique: isUnique || isPrimaryKey,
default: defaultValue,
increment: isPrimaryKey && columnType === 'INTEGER',
});
if (columnMatch) {
const columnName = columnMatch[1];
const rawType = columnMatch[2].toUpperCase();
const restOfLine = columnMatch[3] || '';
const upperRest = restOfLine.toUpperCase();
// Determine column properties
const isPrimaryKey = upperRest.includes('PRIMARY KEY');
const isAutoIncrement = upperRest.includes('AUTOINCREMENT');
const isNotNull =
upperRest.includes('NOT NULL') || isPrimaryKey;
const isUnique =
upperRest.includes('UNIQUE') || isPrimaryKey;
// Extract default value
let defaultValue = '';
const defaultMatch = /DEFAULT\s+([^,)]+)/i.exec(restOfLine);
if (defaultMatch) {
defaultValue = defaultMatch[1].trim();
// Remove quotes if present
if (
(defaultValue.startsWith("'") &&
defaultValue.endsWith("'")) ||
(defaultValue.startsWith('"') &&
defaultValue.endsWith('"'))
) {
defaultValue = defaultValue.slice(1, -1);
}
}
// Map to appropriate SQLite storage class
let columnType = rawType;
if (rawType === 'INTEGER' || rawType === 'INT') {
columnType = 'INTEGER';
} else if (
[
'REAL',
'FLOAT',
'DOUBLE',
'NUMERIC',
'DECIMAL',
].includes(rawType)
) {
columnType = 'REAL';
} else if (rawType === 'BLOB' || rawType === 'BINARY') {
columnType = 'BLOB';
} else if (
['TIMESTAMP', 'DATETIME', 'DATE', 'TIME'].includes(
rawType
)
) {
columnType = 'TIMESTAMP';
} else if (
['TEXT', 'VARCHAR', 'CHAR', 'CLOB', 'STRING'].includes(
rawType
) ||
rawType.startsWith('VARCHAR') ||
rawType.startsWith('CHAR')
) {
columnType = 'TEXT';
} else {
// Default to TEXT for unknown types
columnType = 'TEXT';
}
// Add column to the table
table.columns.push({
name: columnName,
type: columnType,
nullable: !isNotNull,
primaryKey: isPrimaryKey,
unique: isUnique,
default: defaultValue,
increment:
isPrimaryKey &&
isAutoIncrement &&
columnType === 'INTEGER',
});
}
}
}
if (table.columns.length > 0 || tableName === 'sqlite_sequence') {
tables.push(table);
}
}
return tables;