The query below already has indexes based its execution plan but it is still under performant with larger datasets. Are the additional null checks needed and why? Do you see anything that can be done to optimize the overall execution time? The execution plans shows clustered index seeks and no additional indexes are recommended.
WITH L2EDWithSubDupes
AS
(
SELECT
m1.[Id] AS [OriginalEvent_Id]
, m2.[Id] AS [PotentiallyDuplicateEvent_Id]
, CASE
WHEN (m1.[GrpMessageViewers_Hash] IS NULL AND m2.[GrpMessageViewers_Hash] IS NULL) OR
(m1.[GrpMessageViewers_Hash] = m2.[GrpMessageViewers_Hash])
THEN 1 ELSE 0 END [ViewersEqual]
, gc.[HasHardEvidence]
FROM
[ced].[Message] m1 -- m1: master/original
INNER JOIN [ced].[Message] m2 ON -- m2: potential duplicate
m1.[GrpChat_Id] = @GrpChatId
AND m2.[GrpChat_Id] = @GrpChatId
AND m1.[UtcTimestamp] = m2.[UtcTimestamp]
AND ((m1.[Sender_GrpParticipant_Id] IS NULL AND m2.[Sender_GrpParticipant_Id] IS NULL)
OR (m1.[Sender_GrpParticipant_Id] = m2.[Sender_GrpParticipant_Id]))
AND ((m1.[MessageText_Hash] IS NULL AND m2.[MessageText_Hash] IS NULL)
OR (m1.[MessageText_Hash] = m2.[MessageText_Hash]))
AND ((m1.[SystemMessageType] IS NULL AND m2.[SystemMessageType] IS NULL)
OR (m1.[SystemMessageType] = m2.[SystemMessageType]))
INNER JOIN [ced].[GrpChat] gc ON
m1.[GrpChat_Id] = gc.[Id]
LEFT JOIN [ced].[GrpAttachmentName_Hash] anh1 ON
m1.[Id] = anh1.[Message_Id]
LEFT JOIN [ced].[GrpAttachmentName_Hash] anh2 ON
m2.[Id] = anh2.[Message_Id]
LEFT JOIN [ced].[MessageThread] mt1 ON
m1.[Id] = mt1.[Id]
LEFT JOIN [ced].[MessageThread] mt2 ON
m2.[Id] = mt2.[Id]
LEFT JOIN [ced].[MessageChange] mc1 ON
m1.[Id] = mc1.[Id]
LEFT JOIN [ced].[MessageChange] mc2 ON
m2.[Id] = mc2.[Id]
WHERE
(
m1.[SourceItem_Id] < m2.[SourceItem_Id]
OR (
m1.[SourceItem_Id] = m2.[SourceItem_Id]
AND (
m1.[OrdinalPosition] < m2.[OrdinalPosition]
OR (
m1.[OrdinalPosition] = m2.[OrdinalPosition]
AND m1.[Id] < m2.[Id]
)
)
)
)
AND m1.[Id] <> m2.[Id]
--AND m1.[Id] NOT IN (SELECT [Id] FROM [ced].[MessageDuplicateStatus])
--AND m2.[Id] NOT IN (SELECT [Id] FROM [ced].[MessageDuplicateStatus])
AND NOT EXISTS (SELECT 1 FROM [ced].[MessageDuplicateStatus] (NOLOCK) WHERE Id = m1.[Id]) --use not exists
AND NOT EXISTS (SELECT 1 FROM [ced].[MessageDuplicateStatus] (NOLOCK) WHERE Id = m2.[Id]) --use not exists
AND ((mt1.[ReplyTo_MessageThread_Hash] IS NULL AND mt2.[ReplyTo_MessageThread_Hash] IS NULL)
OR (mt1.[ReplyTo_MessageThread_Hash] = mt2.[ReplyTo_MessageThread_Hash]))
AND ((mc1.[SourceVersionID] IS NULL AND mc2.[SourceVersionID] IS NULL) OR (mc1.[SourceVersionID] = mc2.[SourceVersionID]))
AND ((anh1.[GrpAttachmentName_Hash] IS NULL AND anh2.[GrpAttachmentName_Hash] IS NULL)
OR (anh1.[GrpAttachmentName_Hash] = anh2.[GrpAttachmentName_Hash]))
)
,
L2ED -- ensure that only first message is listed in the OriginalEvent_Id column
-- in case we have multiple duplicates
AS
(
SELECT
l2sd.[PotentiallyDuplicateEvent_Id]
, l2sd.[OriginalEvent_Id]
, l2sd.[ViewersEqual]
, l2sd.[HasHardEvidence]
FROM
L2EDWithSubDupes l2sd
WHERE
l2sd.[OriginalEvent_Id] NOT IN
(
SELECT
l2sd1.[PotentiallyDuplicateEvent_Id]
FROM
L2EDWithSubDupes l2sd1
)
)
INSERT INTO [ced].[MessageDuplicateStatus] (
[Id]
, [DuplicateOf_Message_id]
, [Type]
, [Request_Guid]
)
SELECT
t.[PotentiallyDuplicateEvent_Id]
, t.[OriginalEvent_Id]
, 1 as [Type]
, @RequestGuid as [Request_Guid]
FROM
L2ED t
WHERE
t.[ViewersEqual] = 1
OR t.[HasHardEvidence] = 1
OPTION (Label = 'DA_AdoCaseRepository_SQL_L2DExact')
[a query] already has indexes based its execution plan
. \$\endgroup\$