Saturday, June 18, 2011

[NOT] IN, [NOT] EXISTS, JOINS with NULL values



use [tempdb]
go

-- Create test tables and fill test data:
CREATE TABLE [dbo].TestTable(AID int NOT NULL)

INSERT INTO TestTable(AID) values (1)
INSERT INTO TestTable(AID) values (2)
INSERT INTO TestTable(AID) values (3)
INSERT INTO TestTable(AID) values (4)
go

create TABLE TableWithNull(BID int NULL DEFAULT(NULL))

INSERT INTO TableWithNull(BID) VALUES(NULL)
INSERT INTO TableWithNull(BID) VALUES(0)
INSERT INTO TableWithNull(BID) VALUES(1)
INSERT INTO TableWithNull(BID) VALUES(2)
GO

-- Check inserted records:
SELECT * FROM TableWithNull -- NULL, 0, 1, 2
select * from TestTable -- 1, 2, 3, 4

-- Using IN & NOT IN
select * from TestTable where AID in (SELECT BID FROM TableWithNull) -- 1, 2
select * from TestTable where AID not in (SELECT BID FROM TableWithNull) -- 0 rows (expected 3, 4)
-- Any set containing NULL value makes it whole as NULL. Because NULL is unknown and anything added to it also becomes unknown. Thus above SQL statement results 0 rows. To make it work you have to treat the NULL and add a WHERE clause or use EXISTS instead of IN shown below:
select * from TestTable where AID not in (SELECT BID FROM TableWithNull where BID is not null) -- 3, 4 (expected result)
go

-- Using EXISTS & NOT EXISTS
select * from TestTable A where exists (SELECT BID FROM TableWithNull B where A.AID = B.BID) -- 1, 2
select * from TestTable A where not exists (SELECT BID FROM TableWithNull B where A.AID = B.BID) -- 3, 4
go

-- EXISTS keyword is a good way to check sub-queries, this makes them co-related sub-queries.
-- With IN you can only compare one column, but with EXISTS you can compare multiple columns within outer & inner queries (sub-queries or derived queries).

-- Using JOINS
select A.* -- 1, 2
from TestTable A
JOIN TableWithNull B ON
A.AID = B.BID

select A.* -- 3, 4
from TestTable A
LEFT JOIN TableWithNull B ON
A.AID = B.BID
WHERE B.BID IS NULL
go

-- Here's another catch, weird behavior
select * from TestTable where AID not in (SELECT AID FROM TableWithNull) -- Query works
-- [AID] column does not belong to [TableWithNull] table, but the query still work and won't give any error.
-- So its always adviced to use table alias, shown below:
select * from TestTable A where A.AID not in (SELECT B.AID FROM TableWithNull B) -- Invalid column name 'AID'.

-- Final Cleanup
drop table TableWithNull
drop table TestTable
go



No comments: