2009年2月5日 星期四

Create a thesaurus to expand search terms with NSE 9.5


/*
  Net Search Extender 也可以對 Varchar 型態做 Search。
  建立詞典 Thesaurus 後,也可以藉此查出相關聯或者存在同義詞的資料出來
  ※ 中文 workable
*/

/* 建立測試 Table*/

CREATE TABLE HEADLINE
(
  SNO INTEGER NOT NULL PRIMARY KEY,
  TXT VARCHAR(128)
);

/* 新增與 Alonso,西班牙相關的資料 */

INSERT INTO HEADLINE VALUES
(1,'Focused Alonso sets sights on title')
,(2,'Renault expect to challenge again')
,(3,'Cost-cutting plan agreed for F1')
,(4,'Discovery of a Sunken Armada from the Golden Age of Piracy')
,(5,'Pasties challenge paella')
,(6,'Matador aged 11 kills six bulls')
,(7,'Fernando Alonso wins the Singapore Grand Prix');

/* 啟動 NSE Service */

db2text start

/* 建立 index 在 headline 的 txt 欄位上 */

db2text create index headline_idx for text on headline(txt)

/* refresh index */

db2text update index headline_idx for text


/* 建立詞典,存放在sqllib\db2ext\下,名稱給定為 nsesamplethes.def */


:WORDS
  Alonso
 .RELATED_TO F1
 .RELATED_TO Renault
 .RELATED_TO Spain
 .SYNONYM_OF Fernando Alonso

:WORDS
  Spain
 .RELATED_TO Matador
 .RELATED_TO paella
 .RELATED_TO Armada



/*
  Compile nsesamplethes.def
  1. 用db2extth compile
  2. 1208 為 UTF-8
  3. -t compile 產生的檔案所放置的路徑
*/


C:\Program Files\IBM\SQLLIB\db2ext>db2extth -f nsesamplethes.def -ccsid 1208 -t thes\

/* compile 詞典後產生檔案,如圖 */


/* 查詢與Alonso有關的資料 */

db2 => select sno,txt from headline \
db2 (續) => => where contains(txt,'THESAURUS "nsesamplethes" \
db2 (續) => => EXPAND RELATED TERM OF "alonso"') > 0

SNO TXT
--- ---------------------------------------------
 1 Focused Alonso sets sights on title
 2 Renault expect to challenge again
 3 Cost-cutting plan agreed for F1
 7 Fernando Alonso wins the Singapore Grand Prix

  已選取 4 個記錄。

/* 查詢與Alonso同義的資料 */

db2 => select sno,txt from headline \
db2 (續) => => where contains(txt,'THESAURUS "nsesamplethes"
db2 (續) => => EXPAND SYNONYM TERM OF "alonso"') > 0

SNO TXT
--- ---------------------------------------------
 1 Focused Alonso sets sights on title
 7 Fernando Alonso wins the Singapore Grand Prix

  已選取 2 個記錄。


/* 查詢與spain相關的資料 */

db2 => select sno,txt from headline \
db2 (續) => => where contains(txt,'THESAURUS "nsesamplethes"
db2 (續) => => EXPAND RELATED TERM OF "spain"') > 0

SNO TXT
--- ---------------------------------------------
 1 Focused Alonso sets sights on title
 4 Discovery of a Sunken Armada from the Golden Age of Piracy
 5 Pasties challenge paella
 6 Matador aged 11 kills six bulls
 7 Fernando Alonso wins the Singapore Grand Prix

  已選取 5 個記錄。


/* 查詢與spain同義的資料 */

db2 => select sno,txt from headline \
db2 (續) => => where contains(txt,'THESAURUS "nsesamplethes"
db2 (續) => => EXPAND SYNONYM TERM OF "spain"') > 0

SNO TXT
--- ---------------------------------------------

  已選取 0 個記錄。