@@ -17,6 +17,7 @@ use crate::file_doc::FileDoc;
1717use crate :: file_view:: FileView ;
1818use crate :: utils;
1919use crate :: utils:: is_ascii_alphanumeric;
20+ use convert_case:: { Case , Casing } ;
2021use jieba_rs:: Jieba ;
2122use pinyin:: ToPinyin ;
2223use std:: sync:: { Arc , Mutex } ;
@@ -40,7 +41,7 @@ static mut IS_FULL_INDEXING: bool = true;
4041impl IdxStore {
4142 pub fn search_tokenize ( & self , hans : String ) -> String {
4243 if is_ascii_alphanumeric ( hans. as_str ( ) ) {
43- return hans . as_str ( ) . to_lowercase ( ) ;
44+ return self . ascii_tokenize ( hans ) ;
4445 }
4546 let space = " " ;
4647 let hans = hans. replace ( "-" , space) . replace ( "_" , space) ;
@@ -55,10 +56,15 @@ impl IdxStore {
5556 token_text. into_iter ( ) . collect :: < Vec < String > > ( ) . join ( " " )
5657 }
5758
59+ fn ascii_tokenize ( & self , asc : String ) -> String {
60+ let string = asc. to_case ( Case :: Title ) . to_lowercase ( ) ;
61+ return format ! ( "{} {}" , string, asc. to_lowercase( ) ) ;
62+ }
5863 pub fn tokenize ( & self , hans : String ) -> String {
5964 // return hans;
6065 if is_ascii_alphanumeric ( hans. as_str ( ) ) {
61- return hans;
66+ // return hans;
67+ return self . ascii_tokenize ( hans) ;
6268 }
6369 let space = " " ;
6470 let hans = hans. replace ( "-" , space) . replace ( "_" , space) ;
@@ -107,10 +113,11 @@ impl IdxStore {
107113 }
108114
109115 pub fn search ( & self , kw : String , limit : usize ) -> Vec < FileView > {
110- let mut paths = self . search_paths ( self . search_tokenize ( kw. clone ( ) . to_lowercase ( ) ) , limit) ;
111- if paths. is_empty ( ) {
112- paths = self . suggest_path ( kw, limit) ;
113- }
116+ let mut paths = self . search_paths ( self . search_tokenize ( kw. clone ( ) ) , limit) ;
117+ // if paths.is_empty() {
118+ // paths = self.suggest_path(kw, limit);
119+ // }
120+ println ! ( "{:?}" , paths) ;
114121 let file_views = self . parse_file_views ( paths) ;
115122
116123 file_views
@@ -126,12 +133,9 @@ impl IdxStore {
126133 ) -> Vec < FileView > {
127134 let searcher = self . reader . searcher ( ) ;
128135
129- //
130- println ! ( "{}" , kw. to_lowercase( ) ) ;
131-
132136 let kw_query = self
133137 . query_parser
134- . parse_query ( & self . search_tokenize ( kw. to_lowercase ( ) ) )
138+ . parse_query ( & self . search_tokenize ( kw) )
135139 . ok ( )
136140 . unwrap ( ) ;
137141 let mut subqueries = vec ! [ ( Occur :: Must , kw_query) ] ;
@@ -188,7 +192,7 @@ impl IdxStore {
188192 }
189193
190194 pub fn suggest ( & self , kw : String , limit : usize ) -> Vec < FileView > {
191- let mut paths = self . search_paths ( self . search_tokenize ( kw. clone ( ) . to_lowercase ( ) ) , limit) ;
195+ let mut paths = self . search_paths ( self . search_tokenize ( kw. clone ( ) ) , limit) ;
192196 if paths. is_empty ( ) {
193197 paths = self . suggest_path ( kw, limit) ;
194198 }
@@ -315,7 +319,7 @@ impl IdxStore {
315319 pub fn new ( path : & str ) -> IdxStore {
316320 let index_path = std:: path:: Path :: new ( path) ;
317321 let mut schema_builder = Schema :: builder ( ) ;
318- let name_field = schema_builder. add_text_field ( "name" , TEXT | STORED ) ;
322+ let name_field = schema_builder. add_text_field ( "name" , TEXT ) ;
319323 let path_field = schema_builder. add_bytes_field ( "path" , INDEXED | STORED ) ;
320324 let is_dir_field = schema_builder. add_bytes_field ( "is_dir_field" , INDEXED ) ;
321325 let ext_field = schema_builder. add_text_field ( "ext" , TEXT ) ;
@@ -420,28 +424,55 @@ impl IdxStore {
420424#[ cfg( test) ]
421425mod tests {
422426 use super :: * ;
427+ use std:: thread:: sleep;
423428
424429 #[ test]
425430 fn t1 ( ) {
426- let mut store = IdxStore :: new ( "./tmp" ) ;
427- // store.add("jack", "rose");
428- // store.add("jack", "rose大萨达");
431+ let path = "./tmp" ;
432+ fs:: remove_dir_all ( path) ;
433+ let mut store = IdxStore :: new ( path) ;
434+
435+ let vec1 = vec ! [
436+ "jack rose" ,
437+ "JavaHow" ,
438+ "patch" ,
439+ "patch" ,
440+ "patch" ,
441+ "data" ,
442+ "patch.java" ,
443+ "patch.java" ,
444+ "DataPatchController.java" ,
445+ "patch.java" ,
446+ "DataPatchController.java" ,
447+ "DataPatchController.java" ,
448+ "java" ,
449+ "data" ,
450+ "data" ,
451+ ] ;
452+
453+ for x in vec1 {
454+ store. add ( x. to_string ( ) , x. to_string ( ) , false , "" . to_string ( ) ) ;
455+ }
456+
429457 store. commit ( ) ;
430- let vec = store. search ( "jack" . to_string ( ) , 12 ) ;
431- println ! ( "{}" , store. num_docs( ) ) ;
458+ sleep ( Duration :: from_secs ( 1 ) ) ;
459+
460+ let vec = store. search ( "datapatchcontroller" . to_string ( ) , 10 ) ;
461+ for x in vec {
462+ println ! ( "{}" , x. name) ;
463+ }
464+
465+ // let vec = store.search_paths("data patch".to_string(), 100);
466+ // for x in vec {
467+ // println!("{}", x);
468+ // }
432469 }
433470
434471 #[ test]
435472 fn t2 ( ) {
436473 let idx_path = format ! ( "{}{}" , utils:: data_dir( ) , "/orangecachedata/idx" ) ;
437474 let idx_store = Arc :: new ( IdxStore :: new ( & idx_path) ) ;
438- let vec = idx_store. search_with_filter (
439- "SearchBox" . to_string ( ) ,
440- 100 ,
441- None ,
442- None ,
443- Some ( "/Users/jeff/IdeaProjects/orange2" . to_string ( ) ) ,
444- ) ;
475+ let vec = idx_store. search ( "data patch controller" . to_string ( ) , 10 ) ;
445476 for x in vec {
446477 println ! ( "{}" , x. name) ;
447478 }
@@ -451,7 +482,7 @@ mod tests {
451482 fn t5 ( ) {
452483 let idx_path = format ! ( "{}{}" , utils:: data_dir( ) , "/orangecachedata/idx" ) ;
453484 let idx_store = Arc :: new ( IdxStore :: new ( & idx_path) ) ;
454- let string = idx_store. tokenize ( "陈奕迅歌曲 " . to_string ( ) ) ;
485+ let string = idx_store. tokenize ( "DataPatchController.java " . to_string ( ) ) ;
455486 println ! ( "{}" , string) ;
456487 }
457488}
0 commit comments