Counting Sources and Sentences
To count the number of sources loaded, you can use the GetCountByDomain()Opens in a new tab method of the %iKnow.Queries.SourceAPIOpens in a new tab class.
To count the sentences in all of the sources loaded, you can use the GetCountByDomain()Opens in a new tab method of the %iKnow.Queries.SentenceAPIOpens in a new tab class. To count the sentences in a single source, you can use the GetCountBySource()Opens in a new tab method.
The following example uses data loaded from .txt files (such as source1.txt, source2.txt, etc.) in the mytextfiles directory to demonstrate these sentence count methods. The default Configuration is used:
DomainCreateOrOpen
SET dname="mydomain"
IF (##class(%iKnow.Domain).NameIndexExists(dname))
{ SET domoref=##class(%iKnow.Domain).NameIndexOpen(dname)
GOTO DeleteOldData }
ELSE
{ SET domoref=##class(%iKnow.Domain).%New(dname)
DO domoref.%Save()
GOTO ListerAndLoader }
DeleteOldData
SET stat=domoref.DropData()
IF stat { GOTO ListerAndLoader }
ELSE { WRITE "DropData error ",$System.Status.DisplayError(stat)
QUIT}
ListerAndLoader
SET domId=domoref.Id
SET mylister=##class(%iKnow.Source.File.Lister).%New(domId)
SET myloader=##class(%iKnow.Source.Loader).%New(domId)
SET stat=myloader.SetLister(mylister)
SET install=$SYSTEM.Util.DataDirectory()
SET dirpath=install_"mgr\Temp\iknow\mytextfiles"
SET stat=myloader.ProcessList(dirpath,$LB("txt"),0,"")
IF stat '= 1 { WRITE "Loader error ",$System.Status.DisplayError(stat)
QUIT }
SourceSentenceQueries
SET numSrcD=##class(%iKnow.Queries.SourceAPI).GetCountByDomain(domId)
WRITE "The domain contains ",numSrcD," sources",!
SET numSentD=##class(%iKnow.Queries.SentenceAPI).GetCountByDomain(domId)
WRITE "These sources contain ",numSentD," sentences",!!
DO ##class(%iKnow.Queries.SourceAPI).GetByDomain(.result,domId,1,20)
SET i=1
WHILE $DATA(result(i)) {
SET extId = $LISTGET(result(i),2)
SET fullref = $PIECE(extId,":",3,4)
SET fname = $PIECE(fullref,"\",$LENGTH(extId,"\"))
SET numSentS = ##class(%iKnow.Queries.SentenceAPI).GetCountBySource(domId,result(i))
WRITE fname," has ",numSentS," sentences",!
SET i=i+1 }
The following example uses data loaded from a field of the Aviation.Event SQL table to demonstrate these sentence count methods. In this example only a sample of 10 data records (TOP 10) are loaded:
DomainCreateOrOpen
SET dname="mydomain"
IF (##class(%iKnow.Domain).NameIndexExists(dname))
{ WRITE "The ",dname," domain already exists",!
SET domoref=##class(%iKnow.Domain).NameIndexOpen(dname)
GOTO DeleteOldData }
ELSE
{ WRITE "The ",dname," domain does not exist",!
SET domoref=##class(%iKnow.Domain).%New(dname)
DO domoref.%Save()
WRITE "Created the ",dname," domain with domain ID ",domoref.Id,!
GOTO ListerAndLoader }
DeleteOldData
SET stat=domoref.DropData()
IF stat { WRITE "Deleted the data from the ",dname," domain",!!
GOTO ListerAndLoader }
ELSE { WRITE "DropData error ",$System.Status.DisplayError(stat)
QUIT}
ListerAndLoader
SET domId=domoref.Id
SET flister=##class(%iKnow.Source.SQL.Lister).%New(domId)
SET myloader=##class(%iKnow.Source.Loader).%New(domId)
QueryBuild
SET myquery="SELECT Top 10 ID AS UniqueVal,Type,NarrativeFull FROM Aviation.Event"
SET idfld="UniqueVal"
SET grpfld="Type"
SET dataflds=$LB("NarrativeFull")
UseLister
SET stat=flister.AddListToBatch(myquery,idfld,grpfld,dataflds)
IF stat '= 1 {WRITE "The lister failed: ",$System.Status.DisplayError(stat) QUIT }
UseLoader
SET stat=myloader.ProcessBatch()
IF stat '= 1 {WRITE "The loader failed: ",$System.Status.DisplayError(stat) QUIT }
SourceSentenceQueries
SET numSrcD=##class(%iKnow.Queries.SourceQAPI).GetCountByDomain(domId)
WRITE "The domain contains ",numSrcD," sources",!
SET numSentD=##class(%iKnow.Queries.SentenceQAPI).GetCountByDomain(domId)
WRITE "These sources contain ",numSentD," sentences",!!
DO ##class(%iKnow.Queries.SourceAPI).GetByDomain(.result,domId,1,20)
SET i=1
WHILE $DATA(result(i)) {
SET extId = $LISTGET(result(i),2)
SET fullref = $PIECE(extId,":",3,4)
SET fname = $PIECE(fullref,"\",$LENGTH(extId,"\"))
SET numSentS = ##class(%iKnow.Queries.SentenceAPI).GetCountBySource(domId,result(i))
WRITE fname," has ",numSentS," sentences",!
SET i=i+1 }
For details on what NLP considers a sentence, refer to the Logical Text Units Identified by NLP section of the “Conceptual Overview” chapter.