Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/Database/Adapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -729,12 +729,13 @@ abstract public function createDocument(Document $collection, Document $document
*
* @param Document $collection
* @param array<Document> $documents
* @param bool $ignore If true, silently ignore duplicate documents instead of throwing
*
* @return array<Document>
*
* @throws DatabaseException
*/
abstract public function createDocuments(Document $collection, array $documents): array;
abstract public function createDocuments(Document $collection, array $documents, bool $ignore = false): array;

/**
* Update Document
Expand Down
40 changes: 38 additions & 2 deletions src/Database/Adapter/Mongo.php
Original file line number Diff line number Diff line change
Expand Up @@ -1460,11 +1460,11 @@ public function castingBefore(Document $collection, Document $document): Documen
* @throws DuplicateException
* @throws DatabaseException
*/
public function createDocuments(Document $collection, array $documents): array
public function createDocuments(Document $collection, array $documents, bool $ignore = false): array
{
$name = $this->getNamespace() . '_' . $this->filter($collection->getId());

$options = $this->getTransactionOptions();

$records = [];
$hasSequence = null;
$documents = \array_map(fn ($doc) => clone $doc, $documents);
Expand All @@ -1487,6 +1487,42 @@ public function createDocuments(Document $collection, array $documents): array
$records[] = $record;
}

// In ignore mode, pre-filter duplicates within the same session to avoid
// BulkWriteException which would abort the transaction.
if ($ignore && !empty($records)) {
$uids = \array_filter(\array_map(fn ($r) => $r['_uid'] ?? null, $records));
if (!empty($uids)) {
$findOptions = $this->getTransactionOptions(['projection' => ['_uid' => 1]]);
$filters = ['_uid' => ['$in' => \array_values($uids)]];
if ($this->sharedTables) {
$filters['_tenant'] = $this->getTenantFilters($collection->getId());
}
$result = $this->client->find($name, $filters, $findOptions);
$existingUids = [];
foreach ($result->cursor->firstBatch ?? [] as $doc) {
$existingUids[$doc->_uid] = true;
}

if (!empty($existingUids)) {
$filteredRecords = [];
$filteredDocuments = [];
foreach ($records as $i => $record) {
$uid = $record['_uid'] ?? '';
if (!isset($existingUids[$uid])) {
$filteredRecords[] = $record;
$filteredDocuments[] = $documents[$i];
}
}
$records = $filteredRecords;
$documents = $filteredDocuments;
}
}

if (empty($records)) {
return [];
}
}

try {
$documents = $this->client->insertMany($name, $records, $options);
} catch (MongoException $e) {
Expand Down
2 changes: 1 addition & 1 deletion src/Database/Adapter/Pool.php
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ public function createDocument(Document $collection, Document $document): Docume
return $this->delegate(__FUNCTION__, \func_get_args());
}

public function createDocuments(Document $collection, array $documents): array
public function createDocuments(Document $collection, array $documents, bool $ignore = false): array
{
return $this->delegate(__FUNCTION__, \func_get_args());
}
Expand Down
29 changes: 29 additions & 0 deletions src/Database/Adapter/Postgres.php
Original file line number Diff line number Diff line change
Expand Up @@ -1365,6 +1365,35 @@ public function updateDocument(Document $collection, string $id, Document $docum
return $document;
}

protected function getInsertKeyword(bool $ignore): string
{
return 'INSERT INTO';
}

protected function getInsertSuffix(bool $ignore, string $table): string
{
if (!$ignore) {
return '';
}

$conflictTarget = $this->sharedTables ? '("_uid", "_tenant")' : '("_uid")';

return "ON CONFLICT {$conflictTarget} DO NOTHING";
}

protected function getInsertPermissionsSuffix(bool $ignore): string
{
if (!$ignore) {
return '';
}

$conflictTarget = $this->sharedTables
? '("_type", "_permission", "_document", "_tenant")'
: '("_type", "_permission", "_document")';

return "ON CONFLICT {$conflictTarget} DO NOTHING";
}
Comment on lines +1373 to +1395
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 PostgreSQL ON CONFLICT conflict targets missing COLLATE utf8_ci_ai

Both getInsertSuffix and getInsertPermissionsSuffix produce ON CONFLICT clauses whose conflict targets do not match the actual unique indexes created by the Postgres adapter. PostgreSQL uses inference to find the unique index — when a unique index was built with a non-default collation (here utf8_ci_ai, which is deterministic = false), that same collation must be specified in the conflict target, otherwise PostgreSQL will throw:

there is no unique or exclusion constraint matching the ON CONFLICT specification

Actual unique indexes vs. conflict targets:

Table Actual index Conflict target in PR Issue
main (non-shared) ("_uid" COLLATE utf8_ci_ai) ("_uid") ❌ missing COLLATE
main (shared) ("_uid" COLLATE utf8_ci_ai, "_tenant") ("_uid", "_tenant") ❌ missing COLLATE on _uid
perms (non-shared) (_document COLLATE utf8_ci_ai, _type, _permission) ("_type", "_permission", "_document") ❌ missing COLLATE on _document
perms (shared) (_tenant, _document, _type, _permission) ("_type", "_permission", "_document", "_tenant") ✅ no collation in index

The fixes:

// getInsertSuffix – main document table
protected function getInsertSuffix(bool $ignore, string $table): string
{
    if (!$ignore) {
        return '';
    }

    $conflictTarget = $this->sharedTables
        ? '("_uid" COLLATE utf8_ci_ai, "_tenant")'
        : '("_uid" COLLATE utf8_ci_ai)';

    return "ON CONFLICT {$conflictTarget} DO NOTHING";
}

// getInsertPermissionsSuffix – permissions table
protected function getInsertPermissionsSuffix(bool $ignore): string
{
    if (!$ignore) {
        return '';
    }

    $conflictTarget = $this->sharedTables
        ? '("_type", "_permission", "_document", "_tenant")'
        : '("_document" COLLATE utf8_ci_ai, "_type", "_permission")';

    return "ON CONFLICT {$conflictTarget} DO NOTHING";
}

Without this fix, createDocuments(..., ignore: true) will always fail at the PostgreSQL level for non-shared tables (and shared tables' main document insert), raising a PDOException rather than silently ignoring duplicates.

Copy link
Copy Markdown
Contributor Author

@premtsd-code premtsd-code Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existing upsertDocuments at [Postgres.php:1462] uses ("_uid") without COLLATE and works in production

https://github.com/utopia-php/database/blob/csv-import-upsert-v2/src/Database/Adapter/Postgres.php#L1462


/**
* @param string $tableName
* @param string $columns
Expand Down
85 changes: 81 additions & 4 deletions src/Database/Adapter/SQL.php
Original file line number Diff line number Diff line change
Expand Up @@ -2471,11 +2471,59 @@ protected function execute(mixed $stmt): bool
* @throws DuplicateException
* @throws \Throwable
*/
public function createDocuments(Document $collection, array $documents): array
public function createDocuments(Document $collection, array $documents, bool $ignore = false): array
{
if (empty($documents)) {
return $documents;
}

// Pre-filter duplicates inside the transaction to prevent race conditions.
// Query which UIDs already exist and remove them from the batch.
if ($ignore) {
$collectionId = $collection->getId();
$name = $this->filter($collectionId);
$uids = \array_filter(\array_map(fn (Document $doc) => $doc->getId(), $documents));

if (!empty($uids)) {
$placeholders = [];
$binds = [];
foreach (\array_values(\array_unique($uids)) as $i => $uid) {
$key = ':_dup_uid_' . $i;
$placeholders[] = $key;
$binds[$key] = $uid;
}

$tenantFilter = '';
if ($this->sharedTables) {
$tenantFilter = ' AND _tenant = :_dup_tenant';
$binds[':_dup_tenant'] = $this->getTenant();
}
Comment on lines +2496 to +2500
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 tenantPerDocument pre-filter uses session tenant, misses cross-tenant duplicates

When tenantPerDocument is enabled, a single createDocuments call may contain documents for different tenants (each document->getTenant() can differ). The pre-filter query uses AND _tenant = :_dup_tenant bound to $this->getTenant() — the session-level tenant — so it never finds existing documents whose _tenant differs from the session tenant.

Those undetected duplicates are passed to INSERT IGNORE/ON CONFLICT DO NOTHING, which correctly discards them at the DB layer. However, SQL::createDocuments still returns the original $documents array (line 2658), so Database::createDocuments calls $onNext and increments $modified for documents that were silently skipped — an overcount identical to the known race-condition overcount.

The minimal fix is to filter by (_uid, _tenant) pairs instead of a single session tenant, or restrict this pre-check to non-tenantPerDocument mode and rely solely on the DB-level ignore keyword for that case.


$sql = 'SELECT _uid FROM ' . $this->getSQLTable($name)
. ' WHERE _uid IN (' . \implode(', ', $placeholders) . ')'
. $tenantFilter;

$stmt = $this->getPDO()->prepare($sql);
foreach ($binds as $k => $v) {
$stmt->bindValue($k, $v, $this->getPDOType($v));
}
$stmt->execute();
$existingUids = \array_flip(\array_column($stmt->fetchAll(), '_uid'));
$stmt->closeCursor();

if (!empty($existingUids)) {
$documents = \array_values(\array_filter(
$documents,
fn (Document $doc) => !isset($existingUids[$doc->getId()])
));
}
}

if (empty($documents)) {
return [];
}
}

$spatialAttributes = $this->getSpatialAttributes($collection);
$collection = $collection->getId();
try {
Expand Down Expand Up @@ -2573,8 +2621,9 @@ public function createDocuments(Document $collection, array $documents): array
$batchKeys = \implode(', ', $batchKeys);

$stmt = $this->getPDO()->prepare("
INSERT INTO {$this->getSQLTable($name)} {$columns}
{$this->getInsertKeyword($ignore)} {$this->getSQLTable($name)} {$columns}
VALUES {$batchKeys}
{$this->getInsertSuffix($ignore, $name)}
");

foreach ($bindValues as $key => $value) {
Expand All @@ -2588,8 +2637,9 @@ public function createDocuments(Document $collection, array $documents): array
$permissions = \implode(', ', $permissions);

$sqlPermissions = "
INSERT INTO {$this->getSQLTable($name . '_perms')} (_type, _permission, _document {$tenantColumn})
VALUES {$permissions};
{$this->getInsertKeyword($ignore)} {$this->getSQLTable($name . '_perms')} (_type, _permission, _document {$tenantColumn})
VALUES {$permissions}
{$this->getInsertPermissionsSuffix($ignore)}
";

$stmtPermissions = $this->getPDO()->prepare($sqlPermissions);
Expand All @@ -2608,6 +2658,33 @@ public function createDocuments(Document $collection, array $documents): array
return $documents;
}

/**
* Returns the INSERT keyword, optionally with IGNORE for duplicate handling.
* Override in adapter subclasses for DB-specific syntax.
*/
protected function getInsertKeyword(bool $ignore): string
{
return $ignore ? 'INSERT IGNORE INTO' : 'INSERT INTO';
}

/**
* Returns a suffix appended after VALUES clause for duplicate handling.
* Override in adapter subclasses (e.g., Postgres uses ON CONFLICT DO NOTHING).
*/
protected function getInsertSuffix(bool $ignore, string $table): string
{
return '';
}

/**
* Returns a suffix for the permissions INSERT statement when ignoring duplicates.
* Override in adapter subclasses for DB-specific syntax.
*/
protected function getInsertPermissionsSuffix(bool $ignore): string
{
return '';
}

/**
* @param Document $collection
* @param string $attribute
Expand Down
5 changes: 5 additions & 0 deletions src/Database/Adapter/SQLite.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
*/
class SQLite extends MariaDB
{
protected function getInsertKeyword(bool $ignore): string
{
return $ignore ? 'INSERT OR IGNORE INTO' : 'INSERT INTO';
}

/**
* @inheritDoc
*/
Expand Down
Loading
Loading