The NLP semantic analysis engine can list which sources are similar to a specified source. Similarity between sources is determined by the number of entities that appear in both sources (the overlap), and the percentage of the source contents that contain overlap.
For each similar source, NLP returns a list of elements with the following format:
srcId,extId,percentageMatched,percentageNew,nbOfEntsInRefSrc,nbOfEntsInCommon,nbOfEntsInSimSrc,score
The following example demonstrates the listing of similar sources. It first limits the set of test sources to those that may describe an engine failure incident, by using GetByEntities() to select for a list of appropriate entities. It then uses GetSimilar() to find sources similar to these test sources, which may indicate a pattern of similar incidents. GetSimilar() takes the default similarity algorithm ($$$SIMSRCSIMPLE) and its default algorithm parameter (“ent”). The program displays only those similar sources with a high similarity score (>.33). The similarity display omits the source external IDs:
#include %IKPublic
DomainCreateOrOpen
SET dname="mydomain"
IF (##class(%iKnow.Domain).NameIndexExists(dname))
{ WRITE "The ",dname," domain already exists",!
SET domoref=##class(%iKnow.Domain).NameIndexOpen(dname)
GOTO DeleteOldData }
ELSE
{ WRITE "The ",dname," domain does not exist",!
SET domoref=##class(%iKnow.Domain).%New(dname)
DO domoref.%Save()
WRITE "Created the ",dname," domain with domain ID ",domoref.Id,!
GOTO ListerAndLoader }
DeleteOldData
SET stat=domoref.DropData()
IF stat { WRITE "Deleted the data from the ",dname," domain",!!
GOTO ListerAndLoader }
ELSE { WRITE "DropData error ",$System.Status.DisplayError(stat)
QUIT}
ListerAndLoader
SET domId=domoref.Id
SET flister=##class(%iKnow.Source.SQL.Lister).%New(domId)
SET myloader=##class(%iKnow.Source.Loader).%New(domId)
QueryBuild
SET myquery="SELECT TOP 100 ID AS UniqueVal,Type,NarrativeFull FROM Aviation.Event"
SET idfld="UniqueVal"
SET grpfld="Type"
SET dataflds=$LB("NarrativeFull")
UseLister
SET stat=flister.AddListToBatch(myquery,idfld,grpfld,dataflds)
IF stat '= 1 {WRITE "The lister failed: ",$System.Status.DisplayError(stat) QUIT }
UseLoader
SET stat=myloader.ProcessBatch()
IF stat '= 1 {WRITE "The loader failed: ",$System.Status.DisplayError(stat) QUIT }
SourceCountQuery
SET totsrc = ##class(%iKnow.Queries.SourceAPI).GetCountByDomain(domId)
WRITE totsrc," total sources",!
SimiarSourcesQuery
SET engineents = $LB("engine","engine failure","engine power","loss of power","carburetor","crankshaft","piston")
DO ##class(%iKnow.Queries.SourceAPI).GetByEntities(.result,domId,engineents,1,totsrc)
SET i=1
WHILE $DATA(result(i)) {
SET src = $LISTTOSTRING(result(i),",",1)
SET srcId = $PIECE(src,",",1)
WRITE "Source ",srcId," contains an engine incident",!
DO ##class(%iKnow.Queries.SourceAPI).GetSimilar(.sim,domId,srcId,1,50,"",$$$SIMSRCSIMPLE,$LB("ent"))
SET j=1
WHILE $DATA(sim(j)) {
SET simlist=$LISTTOSTRING(sim(j))
IF $PIECE(simlist,",",8) > .33 {
WRITE " similar to source ",$PIECE(simlist,",",1),": "
WRITE $PIECE(simlist,",",3,8),! }
SET j=j+1 }
SET i=i+1 }