@@ -5621,6 +5621,7 @@ public function createDocument(string $collection, Document $document): Document
56215621 * @param int $batchSize
56225622 * @param (callable(Document): void)|null $onNext
56235623 * @param (callable(Throwable): void)|null $onError
5624+ * @param bool $ignore If true, silently ignore duplicate documents instead of throwing
56245625 * @return int
56255626 * @throws AuthorizationException
56265627 * @throws StructureException
@@ -5633,6 +5634,7 @@ public function createDocuments(
56335634 int $ batchSize = self ::INSERT_BATCH_SIZE ,
56345635 ?callable $ onNext = null ,
56355636 ?callable $ onError = null ,
5637+ bool $ ignore = false ,
56365638 ): int {
56375639 if (!$ this ->adapter ->getSharedTables () && $ this ->adapter ->getTenantPerDocument ()) {
56385640 throw new DatabaseException ('Shared tables must be enabled if tenant per document is enabled. ' );
@@ -5653,6 +5655,71 @@ public function createDocuments(
56535655 $ time = DateTime::now ();
56545656 $ modified = 0 ;
56555657
5658+ // Deduplicate intra-batch documents by ID when ignore mode is on.
5659+ // Keeps the first occurrence, mirrors upsertDocuments' seenIds check.
5660+ if ($ ignore ) {
5661+ $ seenIds = [];
5662+ $ deduplicated = [];
5663+ foreach ($ documents as $ document ) {
5664+ $ docId = $ document ->getId ();
5665+ if ($ docId !== '' && isset ($ seenIds [$ docId ])) {
5666+ continue ;
5667+ }
5668+ if ($ docId !== '' ) {
5669+ $ seenIds [$ docId ] = true ;
5670+ }
5671+ $ deduplicated [] = $ document ;
5672+ }
5673+ $ documents = $ deduplicated ;
5674+ }
5675+
5676+ // When ignore mode is on and relationships are being resolved,
5677+ // pre-fetch existing document IDs so we skip relationship writes for duplicates
5678+ $ preExistingIds = [];
5679+ $ tenantPerDocument = $ this ->adapter ->getSharedTables () && $ this ->adapter ->getTenantPerDocument ();
5680+ if ($ ignore ) {
5681+ if ($ tenantPerDocument ) {
5682+ $ idsByTenant = [];
5683+ foreach ($ documents as $ doc ) {
5684+ $ idsByTenant [$ doc ->getTenant ()][] = $ doc ->getId ();
5685+ }
5686+ foreach ($ idsByTenant as $ tenant => $ tenantIds ) {
5687+ $ tenantIds = \array_values (\array_unique ($ tenantIds ));
5688+ foreach (\array_chunk ($ tenantIds , \max (1 , $ this ->maxQueryValues )) as $ idChunk ) {
5689+ $ existing = $ this ->authorization ->skip (fn () => $ this ->withTenant ($ tenant , fn () => $ this ->silent (fn () => $ this ->find (
5690+ $ collection ->getId (),
5691+ [
5692+ Query::equal ('$id ' , $ idChunk ),
5693+ Query::select (['$id ' ]),
5694+ Query::limit (\count ($ idChunk )),
5695+ ]
5696+ ))));
5697+ foreach ($ existing as $ doc ) {
5698+ $ preExistingIds [$ tenant . ': ' . $ doc ->getId ()] = true ;
5699+ }
5700+ }
5701+ }
5702+ } else {
5703+ $ inputIds = \array_values (\array_unique (\array_filter (
5704+ \array_map (fn (Document $ doc ) => $ doc ->getId (), $ documents )
5705+ )));
5706+
5707+ foreach (\array_chunk ($ inputIds , \max (1 , $ this ->maxQueryValues )) as $ idChunk ) {
5708+ $ existing = $ this ->authorization ->skip (fn () => $ this ->silent (fn () => $ this ->find (
5709+ $ collection ->getId (),
5710+ [
5711+ Query::equal ('$id ' , $ idChunk ),
5712+ Query::select (['$id ' ]),
5713+ Query::limit (\count ($ idChunk )),
5714+ ]
5715+ )));
5716+ foreach ($ existing as $ doc ) {
5717+ $ preExistingIds [$ doc ->getId ()] = true ;
5718+ }
5719+ }
5720+ }
5721+ }
5722+
56565723 foreach ($ documents as $ document ) {
56575724 $ createdAt = $ document ->getCreatedAt ();
56585725 $ updatedAt = $ document ->getUpdatedAt ();
@@ -5693,15 +5760,33 @@ public function createDocuments(
56935760 }
56945761
56955762 if ($ this ->resolveRelationships ) {
5696- $ document = $ this ->silent (fn () => $ this ->createDocumentRelationships ($ collection , $ document ));
5763+ $ preExistKey = $ tenantPerDocument
5764+ ? $ document ->getTenant () . ': ' . $ document ->getId ()
5765+ : $ document ->getId ();
5766+
5767+ if (!isset ($ preExistingIds [$ preExistKey ])) {
5768+ $ document = $ this ->silent (fn () => $ this ->createDocumentRelationships ($ collection , $ document ));
5769+ }
56975770 }
56985771
56995772 $ document = $ this ->adapter ->castingBefore ($ collection , $ document );
57005773 }
57015774
57025775 foreach (\array_chunk ($ documents , $ batchSize ) as $ chunk ) {
5703- $ batch = $ this ->withTransaction (function () use ($ collection , $ chunk ) {
5704- return $ this ->adapter ->createDocuments ($ collection , $ chunk );
5776+ if ($ ignore && !empty ($ preExistingIds )) {
5777+ $ chunk = \array_values (\array_filter ($ chunk , function (Document $ doc ) use ($ preExistingIds , $ tenantPerDocument ) {
5778+ $ key = $ tenantPerDocument
5779+ ? $ doc ->getTenant () . ': ' . $ doc ->getId ()
5780+ : $ doc ->getId ();
5781+ return !isset ($ preExistingIds [$ key ]);
5782+ }));
5783+ if (empty ($ chunk )) {
5784+ continue ;
5785+ }
5786+ }
5787+
5788+ $ batch = $ this ->withTransaction (function () use ($ collection , $ chunk , $ ignore ) {
5789+ return $ this ->adapter ->createDocuments ($ collection , $ chunk , $ ignore );
57055790 });
57065791
57075792 $ batch = $ this ->adapter ->getSequences ($ collection ->getId (), $ batch );
@@ -7116,18 +7201,53 @@ public function upsertDocumentsWithIncrease(
71167201 $ created = 0 ;
71177202 $ updated = 0 ;
71187203 $ seenIds = [];
7119- foreach ($ documents as $ key => $ document ) {
7120- if ($ this ->getSharedTables () && $ this ->getTenantPerDocument ()) {
7121- $ old = $ this ->authorization ->skip (fn () => $ this ->withTenant ($ document ->getTenant (), fn () => $ this ->silent (fn () => $ this ->getDocument (
7122- $ collection ->getId (),
7123- $ document ->getId (),
7124- ))));
7204+
7205+ // Batch-fetch existing documents in one query instead of N individual getDocument() calls
7206+ $ ids = \array_filter (\array_map (fn ($ doc ) => $ doc ->getId (), $ documents ));
7207+ $ existingDocs = [];
7208+ $ upsertTenantPerDocument = $ this ->getSharedTables () && $ this ->getTenantPerDocument ();
7209+
7210+ if (!empty ($ ids )) {
7211+ $ uniqueIds = \array_values (\array_unique ($ ids ));
7212+
7213+ if ($ upsertTenantPerDocument ) {
7214+ // Group IDs by tenant and fetch each group separately
7215+ // Use composite key tenant:id to avoid cross-tenant collisions
7216+ $ idsByTenant = [];
7217+ foreach ($ documents as $ doc ) {
7218+ $ tenant = $ doc ->getTenant ();
7219+ $ idsByTenant [$ tenant ][] = $ doc ->getId ();
7220+ }
7221+ foreach ($ idsByTenant as $ tenant => $ tenantIds ) {
7222+ $ tenantIds = \array_values (\array_unique ($ tenantIds ));
7223+ foreach (\array_chunk ($ tenantIds , \max (1 , $ this ->maxQueryValues )) as $ idChunk ) {
7224+ $ fetched = $ this ->authorization ->skip (fn () => $ this ->withTenant ($ tenant , fn () => $ this ->silent (fn () => $ this ->find (
7225+ $ collection ->getId (),
7226+ [Query::equal ('$id ' , $ idChunk ), Query::limit (\count ($ idChunk ))],
7227+ ))));
7228+ foreach ($ fetched as $ doc ) {
7229+ $ existingDocs [$ tenant . ': ' . $ doc ->getId ()] = $ doc ;
7230+ }
7231+ }
7232+ }
71257233 } else {
7126- $ old = $ this ->authorization ->skip (fn () => $ this ->silent (fn () => $ this ->getDocument (
7127- $ collection ->getId (),
7128- $ document ->getId (),
7129- )));
7234+ foreach (\array_chunk ($ uniqueIds , \max (1 , $ this ->maxQueryValues )) as $ idChunk ) {
7235+ $ fetched = $ this ->authorization ->skip (fn () => $ this ->silent (fn () => $ this ->find (
7236+ $ collection ->getId (),
7237+ [Query::equal ('$id ' , $ idChunk ), Query::limit (\count ($ idChunk ))],
7238+ )));
7239+ foreach ($ fetched as $ doc ) {
7240+ $ existingDocs [$ doc ->getId ()] = $ doc ;
7241+ }
7242+ }
71307243 }
7244+ }
7245+
7246+ foreach ($ documents as $ key => $ document ) {
7247+ $ lookupKey = $ upsertTenantPerDocument
7248+ ? $ document ->getTenant () . ': ' . $ document ->getId ()
7249+ : $ document ->getId ();
7250+ $ old = $ existingDocs [$ lookupKey ] ?? new Document ();
71317251
71327252 // Extract operators early to avoid comparison issues
71337253 $ documentArray = $ document ->getArrayCopy ();
0 commit comments