--- tags: - sentence-transformers - sentence-similarity - feature-extraction - dense - generated_from_trainer - dataset_size:4088 - loss:CoSENTLoss base_model: microsoft/unixcoder-base widget: - source_sentence: |- Name: Split BST | Code: // Time: O(n) // Space: O(h) /** * Definition for a binary tree node. * struct TreeNode { * int val; * TreeNode *left; * TreeNode *right; * TreeNode(int x) : val(x), left(NULL), right(NULL) {} * }; */ class Solution { public: vector splitBST(TreeNode* root, int V) { if (!root) { return {nullptr, nullptr}; } else if (root->val <= V) { const auto& result = splitBST(root->right, V); root->right = result[0]; return {root, result[1]}; } else { const auto& result = splitBST(root->left, V); root->left = result[1]; return {result[0], root}; } } }; | Tags: Binary Search Tree,Binary Tree,Recursion,Tree sentences: - |- Name: Parallel Courses III | Code: // Time: O(|V| + |E|) // Space: O(|E|) class Solution { public: int minimumTime(int n, vector>& relations, vector& time) { vector> adj(n); vector in_degree(n); for (const auto& relation : relations) { adj[relation[0] - 1].emplace_back(relation[1] - 1); ++in_degree[relation[1] - 1]; } vector q; vector dist(n); for (int u = 0; u < n; ++u) { if (in_degree[u]) { continue; } q.emplace_back(u); dist[u] = time[u]; } while (!empty(q)) { vector new_q; for (const auto& u : q) { for (const auto& v : adj[u]) { dist[v] = max(dist[v], dist[u] + time[v]); --in_degree[v]; if (!in_degree[v]) { new_q.emplace_back(v); } } } q = move(new_q); } return *max_element(cbegin(dist), cend(dist)); } }; | Tags: Array,Dynamic Programming,Graph,Topological Sort - >- Name: Determine Whether Matrix Can Be Obtained By Rotation | Code: // Time: O(m * n) // Space: O(1) class Solution { public: bool findRotation(vector>& mat, vector>& target) { vector> checks = { [&mat, &target](int i, int j) { return mat[i][j] == target[i][j]; }, [&mat, &target](int i, int j) { return mat[i][j] == target[j][size(mat) - 1 - i]; }, [&mat, &target](int i, int j) { return mat[i][j] == target[size(mat) - 1 - i][size(mat[0]) - 1 - j]; }, [&mat, &target](int i, int j) { return mat[i][j] == target[size(mat[0]) - 1 - j][i]; }, }; const auto& traverse = [&mat, &target](const auto& check) { for (int i = 0; i < size(mat); ++i) { for (int j = 0; j < size(mat[0]); ++j) { if (!check(i, j)) { return false; } } } return true; }; for (const auto& check : checks) { if (traverse(check)) { return true; } } return false; } }; | Tags: Array,Matrix - >- Name: Maximum Total Reward Using Operations I | Code: // Time: O(nlogn + r^2), r = max(rewardValues) // Space: O(r) // sort, dp, bitset class Solution { public: int maxTotalReward(vector& rewardValues) { static const int MAX_VALUE = 20000; unordered_set v_set(cbegin(rewardValues), cend(rewardValues)); vector sorted_v(cbegin(v_set), cend(v_set)); sort(begin(sorted_v), end(sorted_v)); bitset<(MAX_VALUE - 1) + 1> dp, mask; dp[0] = true; int i = 0; for (int i = 0, j = 0; i + 1 < size(sorted_v); ++i) { while (j < sorted_v[i]) { mask[j++] = true; } dp |= (dp & mask) << sorted_v[i]; } int result = sorted_v.back() - 1; for (; !dp[result]; --result); return sorted_v.back() + result; } }; // Time: O(nlogn + r^2), r = max(rewardValues) // Space: O(r) // sort, dp, bitset class Solution2 { public: int maxTotalReward(vector& rewardValues) { static const int MAX_VALUE = 20000; unordered_set v_set(cbegin(rewardValues), cend(rewardValues)); vector sorted_v(cbegin(v_set), cend(v_set)); sort(begin(sorted_v), end(sorted_v)); bitset<(MAX_VALUE * 2 - 1) + 1> dp, mask; dp[0] = true; int i = 0; for (int i = 0, j = 0; i < size(sorted_v); ++i) { while (j < sorted_v[i]) { mask[j++] = true; } dp |= (dp & mask) << sorted_v[i]; } int result = 2 * sorted_v.back() - 1; for (; !dp[result]; --result); return result; } }; // Time: O(nlogn + r^2), r = max(rewardValues) // Space: O(r) // sort, dp class Solution3 { public: int maxTotalReward(vector& rewardValues) { unordered_set v_set(cbegin(rewardValues), cend(rewardValues)); vector sorted_v(cbegin(v_set), cend(v_set)); sort(begin(sorted_v), end(sorted_v)); const int mx = sorted_v.back(); vector dp((mx - 1) + 1); dp[0] = true; for (int i = 0; i + 1 < size(sorted_v); ++i) { for (int x = 0; x < min(sorted_v[i], mx - sorted_v[i]); ++x) { if (dp[x]) { dp[sorted_v[i] + x] = dp[x]; } } } int result = mx - 1; for (; !dp[result]; --result); return mx + result; } }; // Time: O(nlogn + r^2), r = max(rewardValues) // Space: O(r) // sort, dp class Solution4 { public: int maxTotalReward(vector& rewardValues) { unordered_set v_set(cbegin(rewardValues), cend(rewardValues)); vector sorted_v(cbegin(v_set), cend(v_set)); sort(begin(sorted_v), end(sorted_v)); const int mx = sorted_v.back(); vector dp((mx * 2 - 1) + 1); dp[0] = true; for (int i = 0; i < size(sorted_v); ++i) { for (int x = 0; x < sorted_v[i]; ++x) { if (dp[x]) { dp[sorted_v[i] + x] = dp[x]; } } } int result = 2 * mx - 1; for (; !dp[result]; --result); return result; } }; | Tags: Array,Dynamic Programming - source_sentence: |- Name: Valid Perfect Square | Code: // Time: O(logn) // Space: O(1) class Solution { public: bool isPerfectSquare(int num) { int left = 1, right = num; while (left <= right) { const int mid = left + (right - left) / 2; if (mid >= num / mid) { right = mid - 1; } else { left = mid + 1; } } return left == num / left && num % left == 0; } }; | Tags: Binary Search,Math sentences: - |- Name: Shortest Bridge | Code: // Time: O(n^2) // Space: O(n^2) class Solution { public: template struct PairHash { size_t operator()(const pair& p) const { size_t seed = 0; seed ^= std::hash{}(p.first) + 0x9e3779b9 + (seed<<6) + (seed>>2); seed ^= std::hash{}(p.second) + 0x9e3779b9 + (seed<<6) + (seed>>2); return seed; } }; int shortestBridge(vector>& A) { static const vector> directions{{0, 1}, {1, 0}, {0, -1}, {-1, 0}}; unordered_set, PairHash> lookup; unordered_set, PairHash> target; auto islands = get_islands(A); lookup = move(islands[0]); target = move(islands[1]); queue, int>> q; for (const auto& node : lookup) { q.emplace(node, 0); } while (!q.empty()) { pair node; int dis; tie(node, dis) = q.front(); q.pop(); if (target.count(node)) { return dis - 1; } for (const auto& d : directions) { pair nei = make_pair(node.first + d.first, node.second + d.second); if (0 > nei.first || nei.first >= A.size() || 0 > nei.second || nei.second >= A[0].size() || lookup.count(nei)) { continue; } q.emplace(nei, dis + 1); lookup.emplace(nei); } } } private: vector, PairHash>> get_islands(const vector>& A) { static const vector> directions{{0, 1}, {1, 0}, {0, -1}, {-1, 0}}; vector, PairHash>> islands; unordered_set, PairHash> done; for (int r = 0; r < A.size(); ++r) { for (int c = 0; c < A[0].size(); ++c) { if (A[r][c] == 0 || done.count(make_pair(r, c))) { continue; } vector> s{{r, c}}; unordered_set, PairHash> lookup(s.cbegin(), s.cend()); while (!s.empty()) { auto node = s.back(); s.pop_back(); for (const auto& d : directions) { pair nei = make_pair(node.first + d.first, node.second + d.second); if (0 > nei.first || nei.first >= A.size() || 0 > nei.second || nei.second >= A[0].size() || lookup.count(nei) || A[nei.first][nei.second] == 0) { continue; } s.emplace_back(nei); lookup.emplace(nei); } } for (const auto& node : lookup) { done.emplace(node); } islands.emplace_back(move(lookup)); if (islands.size() == 2) { break; } } } return islands; } }; | Tags: Array,Breadth-First Search,Depth-First Search,Matrix - |- Name: Merge Two Binary Trees | Code: // Time: O(n) // Space: O(h) /** * Definition for a binary tree node. * struct TreeNode { * int val; * TreeNode *left; * TreeNode *right; * TreeNode(int x) : val(x), left(NULL), right(NULL) {} * }; */ class Solution { public: TreeNode* mergeTrees(TreeNode* t1, TreeNode* t2) { if (!t1) { return t2; } if (!t2) { return t1; } t1->val += t2->val; t1->left = mergeTrees(t1->left, t2->left); t1->right = mergeTrees(t1->right, t2->right); return t1; } }; | Tags: Binary Tree,Breadth-First Search,Depth-First Search,Tree - |- Name: Sum of Square Numbers | Code: // Time: O(sqrt(c) * logc) // Space: O(1) class Solution { public: bool judgeSquareSum(int c) { for (long long a = 0; a * a <= c; ++a) { auto b = static_cast(sqrt(c - a * a)); if (a * a + b * b == c) { return true; } } return false; } }; | Tags: Binary Search,Math,Two Pointers - source_sentence: >- Name: Count Paths That Can Form a Palindrome in a Tree | Code: // Time: O(n) // Space: O(n) // iterative dfs, freq table class Solution { public: long long countPalindromePaths(vector& parent, string s) { unordered_map cnt; cnt[0] = 1; vector> adj(size(parent)); for (int u = 0; u < size(parent); ++u) { if (parent[u] != -1) { adj[parent[u]].emplace_back(u); } } const auto& iter_dfs = [&]() { int64_t result = 0; vector> stk = {{0, 0}}; while (!empty(stk)) { auto [u, mask] = stk.back(); stk.pop_back(); if (u) { mask ^= 1 << (s[u] - 'a'); for (int i = 0; i < 26; ++i) { if (cnt.count(mask ^ (1 << i))) { result += cnt[mask ^ (1 << i)]; } } result += cnt[mask]++; } for (const auto& v : adj[u]) { stk.emplace_back(v, mask); } } return result; }; return iter_dfs(); } }; // Time: O(n) // Space: O(n) // dfs, freq table class Solution2 { public: long long countPalindromePaths(vector& parent, string s) { unordered_map cnt; cnt[0] = 1; vector> adj(size(parent)); for (int u = 0; u < size(parent); ++u) { if (parent[u] != -1) { adj[parent[u]].emplace_back(u); } } const function dfs = [&](int u, int mask) { int64_t result = 0; if (u) { mask ^= 1 << (s[u] - 'a'); for (int i = 0; i < 26; ++i) { if (cnt.count(mask ^ (1 << i))) { result += cnt[mask ^ (1 << i)]; } } result += cnt[mask]++; } for (const auto& v : adj[u]) { result += dfs(v, mask); } return result; }; return dfs(0, 0); } }; | Tags: Bit Manipulation,Bitmask,Depth-First Search,Dynamic Programming,Tree sentences: - |- Name: Recover a Tree From Preorder Traversal | Code: // Time: O(n) // Space: O(h) /** * Definition for a binary tree node. * struct TreeNode { * int val; * TreeNode *left; * TreeNode *right; * TreeNode(int x) : val(x), left(NULL), right(NULL) {} * }; */ // iterative stack solution class Solution { public: TreeNode* recoverFromPreorder(string S) { int i = 0; vector stack; while (i < S.length()) { int j = S.find_first_not_of("-", i); int level = j - i; i = j; while (stack.size() > level) { stack.pop_back(); } j = S.find_first_of("-", i); auto node = new TreeNode(stoi(S.substr(i, j - i))); i = j; if (!stack.empty()) { if (!stack.back()->left) { stack.back()->left = node; } else{ stack.back()->right = node; } } stack.emplace_back(node); } return stack.front(); } }; // Time: O(n) // Space: O(h) // recursive solution class Solution2 { public: TreeNode* recoverFromPreorder(string S) { int i = 0; return recoverFromPreorderHelper(S, 0, &i); } private: TreeNode* recoverFromPreorderHelper(const string& S, int level, int *i) { int j = S.find_first_not_of("-", *i); if (level != j - *i) { return nullptr; } *i = j; j = S.find_first_of("-", *i); auto node = new TreeNode(stoi(S.substr(*i, j - *i))); *i = j; node->left = recoverFromPreorderHelper(S, level + 1, i); node->right = recoverFromPreorderHelper(S, level + 1, i); return node; } }; | Tags: Binary Tree,Depth-First Search,String,Tree - |- Name: Separate the Digits in an Array | Code: // Time: O(n * logr) // Space: O(1) // array class Solution { public: vector separateDigits(vector& nums) { vector result; for (int i = size(nums) - 1; i >= 0; --i) { for (int x = nums[i]; x; x /= 10) { result.emplace_back(x % 10); } } reverse(begin(result), end(result)); return result; } }; // Time: O(n * logr) // Space: O(logr), r = max(nums) // array class Solution2 { public: vector separateDigits(vector& nums) { vector result; for (const auto& x : nums) { for (const auto& c : to_string(x)) { result.emplace_back(c - '0'); } } return result; } }; | Tags: Array,Simulation - |- Name: Strobogrammatic Number II | Code: // Time: O(n * 5^(n/2)) // Space: O(n) class Solution { public: vector findStrobogrammatic(int n) { static const unordered_map lookup{{"0", "0"}, {"1", "1"}, {"6", "9"}, {"8", "8"}, {"9", "6"}}; vector result = {""}; if (n % 2) { result = {"0", "1", "8"}; } for (int i = n % 2; i < n; i += 2) { vector new_result; for (const auto& num : result) { for (const auto& [a, b] : lookup) { if (i != n - 2 || a != "0") { new_result.emplace_back(a + num + b); } } } result = move(new_result); } return result; } }; // Time: O(n * 5^(n/2)) // Space: O(n) class Solution2 { public: vector findStrobogrammatic(int n) { return findStrobogrammaticRecu(n, n); } private: vector findStrobogrammaticRecu(const int n, int k) { static const unordered_map lookup{{"0", "0"}, {"1", "1"}, {"6", "9"}, {"8", "8"}, {"9", "6"}}; if (k == 0) { return {""}; } else if (k == 1) { return {"0", "1", "8"}; } vector result; for (const auto& num : findStrobogrammaticRecu(n, k - 2)) { for (const auto& kvp : lookup) { if (n != k || kvp.first != "0") { result.emplace_back(kvp.first + num + kvp.second); } } } return result; } }; | Tags: Array,Recursion,String - source_sentence: |- Name: Most Frequent IDs | Code: // Time: O(nlogn) // Space: O(n) // heap class Solution { public: vector mostFrequentIDs(vector& nums, vector& freq) { vector result; unordered_map cnt; priority_queue> max_heap; for (int i = 0; i < size(nums); ++i) { cnt[nums[i]] += freq[i]; max_heap.emplace(cnt[nums[i]], nums[i]); while (!empty(max_heap) && max_heap.top().first != cnt[max_heap.top().second]) { max_heap.pop(); } result.emplace_back(!empty(max_heap) ? max_heap.top().first : 0); } return result; } }; // Time: O(nlogn) // Space: O(n) // bst class Solution2 { public: vector mostFrequentIDs(vector& nums, vector& freq) { vector result; unordered_map cnt; map bst; for (int i = 0; i < size(nums); ++i) { if (--bst[cnt[nums[i]]] == 0) { bst.erase(cnt[nums[i]]); } cnt[nums[i]] += freq[i]; ++bst[cnt[nums[i]]]; result.emplace_back(rbegin(bst)->first); } return result; } }; | Tags: Array,Hash Table,Heap (Priority Queue),Ordered Set sentences: - |- Name: Design HashSet | Code: // Time: O(1) // Space: O(n) class MyHashSet { public: /** Initialize your data structure here. */ MyHashSet() : data_(10000) { } void add(int key) { auto& list = data_[key % data_.size()]; auto it = find(list.begin(), list.end(), key); if (it == list.end()) { list.emplace_back(key); } } void remove(int key) { auto& list = data_[key % data_.size()]; auto it = find(list.begin(), list.end(), key); if (it != list.end()) { list.erase(it); } } /** Returns true if this set did not already contain the specified element */ bool contains(int key) { auto& list = data_[key % data_.size()]; auto it = find(list.begin(), list.end(), key); return it != list.end(); } private: vector> data_; }; /** * Your MyHashSet object will be instantiated and called as such: * MyHashSet obj = new MyHashSet(); * obj.add(key); * obj.remove(key); * bool param_3 = obj.contains(key); */ | Tags: Array,Design,Hash Function,Hash Table,Linked List - |- Name: Max Area of Island | Code: // Time: O(m * n) // Space: O(m * n), the max depth of dfs may be m * n class Solution { public: int maxAreaOfIsland(vector>& grid) { int result = 0; for (int i = 0; i < grid.size(); ++i) { for (int j = 0; j < grid[0].size(); ++j) { int area = 0; if (dfs(i, j, &grid, &area)) { result = max(result, area); } } } return result; } private: bool dfs(const int i, const int j, vector> *grid, int *area) { static const vector> directions{{-1, 0}, { 1, 0}, { 0, 1}, { 0, -1}}; if (i < 0 || i >= grid->size() || j < 0 || j >= (*grid)[0].size() || (*grid)[i][j] <= 0) { return false; } (*grid)[i][j] *= -1; ++(*area); for (const auto& d : directions) { dfs(i + d.first, j + d.second, grid, area); } return true; } }; | Tags: Array,Breadth-First Search,Depth-First Search,Matrix,Union Find - |- Name: Stone Game VII | Code: // Time: O(n^2) // Space: O(n) class Solution { public: int stoneGameVII(vector& stones) { vector prefix(1); for (const auto& stone : stones) { prefix.emplace_back(prefix.back() + stone); } const auto& score = [&prefix](int i, int j) { return prefix[j + 1] - prefix[i]; }; vector> dp(2, vector(size(stones))); for (int i = size(stones) - 1; i >= 0; --i) { for (int j = i + 1; j < size(stones); ++j) { dp[i % 2][j] = max(score(i + 1, j) - dp[(i + 1) % 2][j], score(i, j - 1) - dp[i % 2][j - 1]); } } return dp[0][size(stones) - 1]; } }; | Tags: Array,Dynamic Programming,Game Theory,Math - source_sentence: |- Name: Arithmetic Slices | Code: // Time: O(n) // Space: O(1) class Solution { public: int numberOfArithmeticSlices(vector& A) { int res = 0, i = 0; for (int i = 0; i + 2 < A.size(); ++i) { const auto start = i; while (i + 2 < A.size() && A[i + 2] + A[i] == 2 * A[i + 1]) { res += (i++) - start + 1; } } return res; } }; | Tags: Array,Dynamic Programming,Sliding Window sentences: - >- Name: Prefix and Suffix Search | Code: // Time: ctor: O(w * l^2), w is the number of words, l is the word length on average // search: O(p + s) , p is the length of the prefix, s is the length of the suffix, // Space: O(t), t is the number of trie nodes class WordFilter { public: WordFilter(vector words) { for (int i = 0; i < words.size(); ++i) { for (int j = 0; j <= words[i].length(); ++j) { trie_.insert(words[i].substr(j) + SEPARATOR + words[i], i); } } } int f(string prefix, string suffix) { return trie_.find(suffix + SEPARATOR + prefix); } private: struct TrieNode { int weight; vector leaves; TrieNode() : weight(0), leaves(27) {} void insert(const string& s, const int weight) { auto* p = this; p->weight = weight; for (const auto& c : s) { if (!p->leaves[c - 'a']) { p->leaves[c - 'a'] = new TrieNode; } p = p->leaves[c - 'a']; p->weight = weight; } } int find(const string& s) const { auto* p = this; for (const auto& c : s) { if (!p->leaves[c - 'a']) { return -1; } p = p->leaves[c - 'a']; } return p->weight; } ~TrieNode() { for (auto& node : leaves) { if (node) { delete node; } } } }; const string SEPARATOR = "{"; // ascii code of 'z' + 1 TrieNode trie_; }; // Time: ctor: O(w * l), w is the number of words, l is the word length on average // search: O(p + s + max(m, n)), p is the length of the prefix, s is the length of the suffix, // m is the number of the prefix match, n is the number of the suffix match // Space: O(w * l) class WordFilter2 { public: WordFilter(vector words) { for (int i = words.size() - 1; i >= 0; --i) { auto word = words[i]; prefix_trie_.insert(word, i); reverse(word.begin(), word.end()); suffix_trie_.insert(word, i); } } int f(string prefix, string suffix) { const auto& prefix_match = prefix_trie_.find(prefix); reverse(suffix.begin(), suffix.end()); const auto& suffix_match = suffix_trie_.find(suffix); int i = 0, j = 0; while (i != prefix_match.size() && j != suffix_match.size()) { if (prefix_match[i] == suffix_match[j]) { return prefix_match[i]; } else if (prefix_match[i] > suffix_match[j]) { ++i; } else { ++j; } } return -1; } private: struct TrieNode { vector words; // index of words vector leaves; TrieNode() : leaves(26) {} void insert(const string& s, const int i) { auto* p = this; p->words.emplace_back(i); for (const auto& c : s) { if (!p->leaves[c - 'a']) { p->leaves[c - 'a'] = new TrieNode; } p = p->leaves[c - 'a']; p->words.emplace_back(i); } } const vector& find(const string& s) const { auto* p = this; for (const auto& c : s) { if (!p->leaves[c - 'a']) { static const vector empty; return empty; } p = p->leaves[c - 'a']; } return p->words; } ~TrieNode() { for (auto& node : leaves) { if (node) { delete node; } } } }; TrieNode prefix_trie_, suffix_trie_; }; /** * Your WordFilter object will be instantiated and called as such: * WordFilter obj = new WordFilter(words); * int param_1 = obj.f(prefix,suffix); */ | Tags: Array,Design,Hash Table,String,Trie - |- Name: Reverse Odd Levels of Binary Tree | Code: // Time: O(n) // Space: O(n) // bfs class Solution { public: TreeNode* reverseOddLevels(TreeNode* root) { vector q = {root}; for (int parity = 0; !empty(q); parity ^= 1) { if (parity) { for (int left = 0, right = size(q) - 1; left < right; ++left, --right) { swap(q[left]->val, q[right]->val); } } if (!q[0]->left) { break; } vector new_q; for (const auto& node : q) { new_q.emplace_back(node->left); new_q.emplace_back(node->right); } q = move(new_q); } return root; } }; | Tags: Binary Tree,Breadth-First Search,Depth-First Search,Tree - |- Name: Arithmetic Subarrays | Code: // Time: O(n * q) // Space: O(n) class Solution { public: vector checkArithmeticSubarrays(vector& nums, vector& l, vector& r) { vector result(size(l)); for (int i = 0; i < size(l); ++i) { result[i] = isArith(vector(cbegin(nums) + l[i], cbegin(nums) + r[i] + 1)); } return result; } private: bool isArith(const vector& nums) { unordered_set lookup(cbegin(nums), cend(nums)); int mn = *min_element(cbegin(nums), cend(nums)); int mx = *max_element(cbegin(nums), cend(nums)); if (mx == mn) { return true; } if ((mx - mn) % (size(nums) - 1)) { return false; } int d = (mx - mn) / (size(nums) - 1); for (int i = mn; i <= mx; i += d) { if (!lookup.count(i)) { return false; } } return true; } }; | Tags: Array,Hash Table,Sorting pipeline_tag: feature-extraction library_name: sentence-transformers metrics: - pearson_cosine - spearman_cosine model-index: - name: SentenceTransformer based on microsoft/unixcoder-base results: - task: type: semantic-similarity name: Semantic Similarity dataset: name: unixcoder leetcode eval type: unixcoder_leetcode_eval metrics: - type: pearson_cosine value: 0.8844784116914098 name: Pearson Cosine - type: spearman_cosine value: 0.8840382708528214 name: Spearman Cosine license: mit language: - en --- # SentenceTransformer based on microsoft/unixcoder-base This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/unixcoder-base](https://huggingface.co/microsoft/unixcoder-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. ## Model Details ### Model Description - **Model Type:** Sentence Transformer - **Base model:** [microsoft/unixcoder-base](https://huggingface.co/microsoft/unixcoder-base) - **Maximum Sequence Length:** 256 tokens - **Output Dimensionality:** 768 dimensions - **Similarity Function:** Cosine Similarity ### Model Sources - **Documentation:** [Sentence Transformers Documentation](https://sbert.net) - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) ### Full Model Architecture ``` SentenceTransformer( (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'RobertaModel'}) (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) ) ``` ## Usage ### Direct Usage (Sentence Transformers) First install the Sentence Transformers library: ```bash pip install -U sentence-transformers ``` Then you can load this model and run inference. ```python from sentence_transformers import SentenceTransformer # Download from the 🤗 Hub model = SentenceTransformer("sentence_transformers_model_id") # Run inference sentences = [ 'Name: Arithmetic Slices | Code: // Time: O(n)\n// Space: O(1)\n\nclass Solution {\npublic:\n int numberOfArithmeticSlices(vector& A) {\n int res = 0, i = 0;\n for (int i = 0; i + 2 < A.size(); ++i) {\n const auto start = i;\n while (i + 2 < A.size() && A[i + 2] + A[i] == 2 * A[i + 1]) {\n res += (i++) - start + 1;\n }\n }\n return res;\n }\n};\n | Tags: Array,Dynamic Programming,Sliding Window', 'Name: Arithmetic Subarrays | Code: // Time: O(n * q)\n// Space: O(n)\n\nclass Solution {\npublic:\n vector checkArithmeticSubarrays(vector& nums, vector& l, vector& r) {\n vector result(size(l));\n for (int i = 0; i < size(l); ++i) {\n result[i] = isArith(vector(cbegin(nums) + l[i], cbegin(nums) + r[i] + 1));\n }\n return result;\n }\n\nprivate:\n bool isArith(const vector& nums) {\n unordered_set lookup(cbegin(nums), cend(nums));\n int mn = *min_element(cbegin(nums), cend(nums));\n int mx = *max_element(cbegin(nums), cend(nums));\n if (mx == mn) {\n return true;\n }\n if ((mx - mn) % (size(nums) - 1)) {\n return false;\n }\n int d = (mx - mn) / (size(nums) - 1);\n for (int i = mn; i <= mx; i += d) {\n if (!lookup.count(i)) {\n return false;\n }\n }\n return true;\n }\n};\n | Tags: Array,Hash Table,Sorting', 'Name: Reverse Odd Levels of Binary Tree | Code: // Time: O(n)\n// Space: O(n)\n\n// bfs\nclass Solution {\npublic:\n TreeNode* reverseOddLevels(TreeNode* root) {\n vector q = {root};\n for (int parity = 0; !empty(q); parity ^= 1) {\n if (parity) {\n for (int left = 0, right = size(q) - 1; left < right; ++left, --right) {\n swap(q[left]->val, q[right]->val);\n }\n }\n if (!q[0]->left) {\n break;\n }\n vector new_q;\n for (const auto& node : q) {\n new_q.emplace_back(node->left);\n new_q.emplace_back(node->right);\n }\n q = move(new_q);\n }\n return root;\n }\n};\n | Tags: Binary Tree,Breadth-First Search,Depth-First Search,Tree', ] embeddings = model.encode(sentences) print(embeddings.shape) # [3, 768] # Get the similarity scores for the embeddings similarities = model.similarity(embeddings, embeddings) print(similarities) # tensor([[1.0000, 0.7905, 0.5824], # [0.7905, 1.0000, 0.6028], # [0.5824, 0.6028, 1.0000]]) ``` ## Evaluation ### Metrics #### Semantic Similarity * Dataset: `unixcoder_leetcode_eval` * Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) | Metric | Value | |:--------------------|:----------| | pearson_cosine | 0.8845 | | **spearman_cosine** | **0.884** | ## Training Details ### Training Dataset #### Unnamed Dataset * Size: 4,088 training samples * Columns: text1, text2, and score * Approximate statistics based on the first 1000 samples: | | text1 | text2 | score | |:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:----------------------------------------------------------------| | type | string | string | float | | details |
  • min: 51 tokens
  • mean: 224.9 tokens
  • max: 256 tokens
|
  • min: 77 tokens
  • mean: 226.23 tokens
  • max: 256 tokens
|
  • min: 0.14
  • mean: 0.43
  • max: 1.0
| * Samples: | text1 | text2 | score | |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------| | Name: As Far from Land as Possible \| Code: // Time: O(m * n)
// Space: O(m * n)

class Solution {
public:
int maxDistance(vector>& grid) {
static const vector> directions{{0, 1}, {1, 0}, {0, -1}, {-1, 0}};
queue> q;
for (int i = 0; i < grid.size(); ++i) {
for (int j = 0; j < grid[i].size(); ++j) {
if (grid[i][j]) {
q.emplace(i, j);
}
}
}
if (q.size() == grid.size() * grid[0].size()) {
return -1;
}
int level = -1;
while (!q.empty()) {
queue> next_q;
while (!q.empty()) {
const auto [x, y] = q.front(); q.pop();
for (const auto& [dx, dy] : directions) {
const auto& nx = x + dx;
const auto& ny = y + dy;
if (!(0 <= nx && nx < grid.size() &&
...
| Name: Maximum Manhattan Distance After K Changes \| Code: // Time: O(n)
// Space: O(1)

// greedy
class Solution {
public:
int maxDistance(string s, int k) {
int result = 0;
for (int i = 0, x = 0, y = 0; i < size(s); ++i) {
if (s[i] == 'E') {
++x;
} else if (s[i] == 'W') {
--x;
} else if (s[i] == 'N') {
++y;
} else if (s[i] == 'S') {
--y;
}
result = max(result, min(abs(x) + abs(y) + 2 * k, i + 1));
}
return result;
}
};
\| Tags: Counting,Hash Table,Math,String
| 0.3427242051079267 | | Name: Wiggle Sort II \| Code: // Time: O(n) ~ O(n^2), O(n) on average.
// Space: O(1)

// Tri Partition (aka Dutch National Flag Problem) with virtual index solution. (44ms)
class Solution {
public:
void wiggleSort(vector& nums) {
int mid = (nums.size() - 1) / 2;
nth_element(nums.begin(), nums.begin() + mid, nums.end()); // O(n) ~ O(n^2) time
reversedTriPartitionWithVI(nums, nums[mid]); // O(n) time, O(1) space
}

void reversedTriPartitionWithVI(vector& nums, int val) {
const int N = nums.size() / 2 * 2 + 1;
#define Nums(i) nums[(1 + 2 * (i)) % N]
for (int i = 0, j = 0, n = nums.size() - 1; j <= n;) {
if (Nums(j) > val) {
swap(Nums(i++), Nums(j++));
} else if (Nums(j) < val) {
swap(Nums(j), Nums(n--));
} else {
++j;
}
}
}
};

// Time: O(n) ~ O(n^2)
// Space: O(n)
// Tri Partition (aka Dutch National Flag Pro...
| Name: Array With Elements Not Equal to Average of Neighbors \| Code: // Time: O(n) ~ O(n^2), O(n) on average
// Space: O(1)

// Tri Partition (aka Dutch National Flag Problem) with virtual index solution
class Solution {
public:
vector rearrangeArray(vector& nums) {
int mid = (size(nums) - 1) / 2;
nth_element(begin(nums), begin(nums) + mid, end(nums)); // O(n) ~ O(n^2) time
reversedTriPartitionWithVI(nums, nums[mid]); // O(n) time, O(1) space
return nums;
}

private:
void reversedTriPartitionWithVI(vector& nums, int val) {
const int N = size(nums) / 2 * 2 + 1;
#define Nums(i) nums[(1 + 2 * (i)) % N]
for (int i = 0, j = 0, n = size(nums) - 1; j <= n;) {
if (Nums(j) > val) {
swap(Nums(i++), Nums(j++));
} else if (Nums(j) < val) {
swap(Nums(j), Nums(n--));
} else {
++j;
}
}
}
};

// Time: O(nlogn)
...
| 0.7248856548541956 | | Name: Minimum Time to Visit a Cell In a Grid \| Code: // Time: O(m * n * log(m * n))
// Space: O(m * n)

// dijkstra's algorithm
class Solution {
public:
int minimumTime(vector>& grid) {
static const vector> DIRECTIONS = {{1, 0}, {0, 1}, {-1, 0}, {0, -1}};
if (min(grid[0][1], grid[1][0]) > 1) {
return -1;
}
const auto& dijkstra = [&](const pair& start, const pair& target) {
vector> best(size(grid), vector(size(grid[0]), numeric_limits::max()));
best[start.first][start.second] = 0;
using Data = tuple;
priority_queue, greater> min_heap;
min_heap.emplace(0, start.first, start.second);
while (!empty(min_heap)) {
const auto [curr, i, j] = min_heap.top(); min_heap.pop();
if (best[i][j] < curr) {
continue;
...
| Name: Sentence Similarity III \| Code: // Time: O(n)
// Space: O(1)

class Solution {
public:
bool areSentencesSimilar(string sentence1, string sentence2) {
if (size(sentence1) > size(sentence2)) {
swap(sentence1, sentence2);
}
int count = 0;
for (int step = 0; step < 2; ++step) {
for (int i = 0; i <= size(sentence1); ++i) {
char c1 = i != size(sentence1) ? sentence1[step == 0 ? i : size(sentence1) - 1 - i] : ' ';
char c2 = i != size(sentence2) ? sentence2[step == 0 ? i : size(sentence2) - 1 - i] : ' ';
if (c1 != c2) {
break;
}
if (c1 == ' ') {
++count;
}
}
}
return count >= count_if(cbegin(sentence1), cend(sentence1),
[](char x) { return x == ' '; }) + 1;
}
};
\| Tags: Array,String,Two Pointers
| 0.2964020586887101 | * Loss: [CoSENTLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters: ```json { "scale": 20.0, "similarity_fct": "pairwise_cos_sim" } ``` ### Training Hyperparameters #### Non-Default Hyperparameters - `eval_strategy`: steps - `learning_rate`: 2e-05 - `num_train_epochs`: 2 - `warmup_steps`: 102 - `use_cpu`: True - `data_seed`: 42 - `remove_unused_columns`: False - `load_best_model_at_end`: True - `dataloader_pin_memory`: False - `gradient_checkpointing`: True #### All Hyperparameters
Click to expand - `overwrite_output_dir`: False - `do_predict`: False - `eval_strategy`: steps - `prediction_loss_only`: True - `per_device_train_batch_size`: 8 - `per_device_eval_batch_size`: 8 - `per_gpu_train_batch_size`: None - `per_gpu_eval_batch_size`: None - `gradient_accumulation_steps`: 1 - `eval_accumulation_steps`: None - `torch_empty_cache_steps`: None - `learning_rate`: 2e-05 - `weight_decay`: 0.0 - `adam_beta1`: 0.9 - `adam_beta2`: 0.999 - `adam_epsilon`: 1e-08 - `max_grad_norm`: 1.0 - `num_train_epochs`: 2 - `max_steps`: -1 - `lr_scheduler_type`: linear - `lr_scheduler_kwargs`: {} - `warmup_ratio`: 0.0 - `warmup_steps`: 102 - `log_level`: passive - `log_level_replica`: warning - `log_on_each_node`: True - `logging_nan_inf_filter`: True - `save_safetensors`: True - `save_on_each_node`: False - `save_only_model`: False - `restore_callback_states_from_checkpoint`: False - `no_cuda`: False - `use_cpu`: True - `use_mps_device`: False - `seed`: 42 - `data_seed`: 42 - `jit_mode_eval`: False - `use_ipex`: False - `bf16`: False - `fp16`: False - `fp16_opt_level`: O1 - `half_precision_backend`: auto - `bf16_full_eval`: False - `fp16_full_eval`: False - `tf32`: None - `local_rank`: 0 - `ddp_backend`: None - `tpu_num_cores`: None - `tpu_metrics_debug`: False - `debug`: [] - `dataloader_drop_last`: False - `dataloader_num_workers`: 0 - `dataloader_prefetch_factor`: None - `past_index`: -1 - `disable_tqdm`: False - `remove_unused_columns`: False - `label_names`: None - `load_best_model_at_end`: True - `ignore_data_skip`: False - `fsdp`: [] - `fsdp_min_num_params`: 0 - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} - `fsdp_transformer_layer_cls_to_wrap`: None - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} - `deepspeed`: None - `label_smoothing_factor`: 0.0 - `optim`: adamw_torch_fused - `optim_args`: None - `adafactor`: False - `group_by_length`: False - `length_column_name`: length - `ddp_find_unused_parameters`: None - `ddp_bucket_cap_mb`: None - `ddp_broadcast_buffers`: False - `dataloader_pin_memory`: False - `dataloader_persistent_workers`: False - `skip_memory_metrics`: True - `use_legacy_prediction_loop`: False - `push_to_hub`: False - `resume_from_checkpoint`: None - `hub_model_id`: None - `hub_strategy`: every_save - `hub_private_repo`: None - `hub_always_push`: False - `hub_revision`: None - `gradient_checkpointing`: True - `gradient_checkpointing_kwargs`: None - `include_inputs_for_metrics`: False - `include_for_metrics`: [] - `eval_do_concat_batches`: True - `fp16_backend`: auto - `push_to_hub_model_id`: None - `push_to_hub_organization`: None - `mp_parameters`: - `auto_find_batch_size`: False - `full_determinism`: False - `torchdynamo`: None - `ray_scope`: last - `ddp_timeout`: 1800 - `torch_compile`: False - `torch_compile_backend`: None - `torch_compile_mode`: None - `include_tokens_per_second`: False - `include_num_input_tokens_seen`: False - `neftune_noise_alpha`: None - `optim_target_modules`: None - `batch_eval_metrics`: False - `eval_on_start`: False - `use_liger_kernel`: False - `liger_kernel_config`: None - `eval_use_gather_object`: False - `average_tokens_across_devices`: False - `prompts`: None - `batch_sampler`: batch_sampler - `multi_dataset_batch_sampler`: proportional - `router_mapping`: {} - `learning_rate_mapping`: {}
### Training Logs | Epoch | Step | Training Loss | unixcoder_leetcode_eval_spearman_cosine | |:----------:|:-------:|:-------------:|:---------------------------------------:| | 1.6634 | 850 | 2.6601 | - | | **1.7613** | **900** | **2.5066** | **0.8875** | | 1.8591 | 950 | 2.3788 | - | | 1.9569 | 1000 | 2.34 | 0.8840 | * The bold row denotes the saved checkpoint. ### Framework Versions - Python: 3.12.11 - Sentence Transformers: 5.1.0 - Transformers: 4.55.2 - PyTorch: 2.8.0+cu126 - Accelerate: 1.10.0 - Datasets: 4.0.0 - Tokenizers: 0.21.4 ## Citation ### BibTeX #### Sentence Transformers ```bibtex @inproceedings{reimers-2019-sentence-bert, title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", author = "Reimers, Nils and Gurevych, Iryna", booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", month = "11", year = "2019", publisher = "Association for Computational Linguistics", url = "https://arxiv.org/abs/1908.10084", } ``` #### CoSENTLoss ```bibtex @online{kexuefm-8847, title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT}, author={Su Jianlin}, year={2022}, month={Jan}, url={https://kexue.fm/archives/8847}, } ```