From 004672aa6c482b212e36b9e794d107be456a11da Mon Sep 17 00:00:00 2001 From: unarist Date: Tue, 6 Jun 2017 23:07:06 +0900 Subject: [PATCH] Fix tag search order and not to use tsvector (#3611) * Sort results by the name * Switch search method to simple `LIKE` matching instead of tsvector/tsquery Previously we used scores from ts_rank_cd() to sort results, but it didn't work because the function returns same score for all results. It's not for calculate similarity of single words. Sometimes this bug even push out exact matching tag from results. Additionally, PostgreSQL supports prefix searching with standard btree index. Using it offers simpler code, but also less index size and some speed. --- app/models/tag.rb | 19 +++---------------- ...113804_change_tag_search_index_to_btree.rb | 12 ++++++++++++ db/schema.rb | 4 ++-- spec/models/tag_spec.rb | 9 +++++++++ 4 files changed, 26 insertions(+), 18 deletions(-) create mode 100644 db/migrate/20170606113804_change_tag_search_index_to_btree.rb diff --git a/app/models/tag.rb b/app/models/tag.rb index 4001e6ed57a..08e3c1b03f0 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -21,22 +21,9 @@ class Tag < ApplicationRecord end class << self - def search_for(terms, limit = 5) - terms = Arel.sql(connection.quote(terms.gsub(/['?\\:]/, ' '))) - textsearch = 'to_tsvector(\'simple\', tags.name)' - query = 'to_tsquery(\'simple\', \'\'\' \' || ' + terms + ' || \' \'\'\' || \':*\')' - - sql = <<-SQL.squish - SELECT - tags.*, - ts_rank_cd(#{textsearch}, #{query}) AS rank - FROM tags - WHERE #{query} @@ #{textsearch} - ORDER BY rank DESC - LIMIT ? - SQL - - Tag.find_by_sql([sql, limit]) + def search_for(term, limit = 5) + pattern = sanitize_sql_like(term) + '%' + Tag.where('name like ?', pattern).order(:name).limit(limit) end end end diff --git a/db/migrate/20170606113804_change_tag_search_index_to_btree.rb b/db/migrate/20170606113804_change_tag_search_index_to_btree.rb new file mode 100644 index 00000000000..5248e172026 --- /dev/null +++ b/db/migrate/20170606113804_change_tag_search_index_to_btree.rb @@ -0,0 +1,12 @@ +class ChangeTagSearchIndexToBtree < ActiveRecord::Migration[5.1] + + def up + remove_index :tags, name: :hashtag_search_index + execute 'CREATE INDEX hashtag_search_index ON tags (name text_pattern_ops);' + end + + def down + remove_index :tags, name: :hashtag_search_index + execute 'CREATE INDEX hashtag_search_index ON tags USING gin(to_tsvector(\'simple\', tags.name));' + end +end diff --git a/db/schema.rb b/db/schema.rb index ca904569e47..712f62ea689 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170604144747) do +ActiveRecord::Schema.define(version: 20170606113804) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -320,7 +320,7 @@ ActiveRecord::Schema.define(version: 20170604144747) do t.string "name", default: "", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index "to_tsvector('simple'::regconfig, (name)::text)", name: "hashtag_search_index", using: :gin + t.index "name text_pattern_ops", name: "hashtag_search_index" t.index ["name"], name: "index_tags_on_name", unique: true end diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb index 7a5b8ec8975..2496946cbec 100644 --- a/spec/models/tag_spec.rb +++ b/spec/models/tag_spec.rb @@ -22,5 +22,14 @@ RSpec.describe Tag, type: :model do expect(results).to eq [tag] end + + it 'finds the exact matching tag as the first item' do + similar_tag = Fabricate(:tag, name: "matchlater") + tag = Fabricate(:tag, name: "match") + + results = Tag.search_for("match") + + expect(results).to eq [tag, similar_tag] + end end end