Lambdajava实现

这里只告诉说明Lambda的计算，后面的mart大家随便用其他的都可以，这里详细写了Lambda是如何计算得来，java版本的实现。代码如下：
样本的格式如下：

public class LambdaCalculate {

/*** @param position:doc在一次query序列中的位置* @param lable:doc在一次query中的等级，点击？加购？下单？* @return :返回这个文档的dcg值* */
public static Double doc_dcg(Integer position,Integer lable){return (Math.pow(2, lable)-1)/(Math.log(position+1)/Math.log(2));
}/*** @param lists:query的一个序列* @return :query的整体dcg，就是各个doc的dcg累加* */
public static Double query_dcg(List<QueryUnit<String,Integer,Integer,Double>> lists){Double query_dcg = 0.0D;for(int i=0;i<lists.size();i++){QueryUnit<String,Integer,Integer,Double> unit = lists.get(i);query_dcg +=doc_dcg(i+1, unit.getLabelValue());}return query_dcg;
}/*** @param lists:query的一个序列* @return :计算理想序列下的DCG* */
public static Double ideal_dcg(List<QueryUnit<String,Integer,Integer,Double>> lists){List<QueryUnit<String,Integer,Integer,Double>> lists2 = new ArrayList<>();lists2.addAll(lists);Collections.sort(lists2, new Comparator<QueryUnit<String,Integer,Integer,Double>>() {@Overridepublic int compare(QueryUnit<String,Integer,Integer,Double> o1, QueryUnit<String,Integer,Integer,Double> o2) {return o2.getLabelValue()-o1.getLabelValue();}});Double ideal_dcg = query_dcg(lists2);return ideal_dcg;
}/*** @param lists:query的一个序列* @return :计算NDCG,首先要计算理想状态下dcg1，然后计算现实中的dcg2,ndcg = dcg2/dcg1* */
public static Double query_ndcg(List<QueryUnit<String,Integer,Integer,Double>> lists){//现实中的query dcgDouble real_dcg = query_dcg(lists);//针对lists数据，按照lable进行排序Double ideal_dcg = ideal_dcg(lists);return real_dcg/ideal_dcg;
}/*** @param lists:query的一个序列* @param  swapMap:交换位置的两个doc的位置数据比如（1,4）（4,1） 1与4互换位置* @return :返回互换后的query dcg值* */
public static Double query_swap_dcg(List<QueryUnit<String,Integer,Integer,Double>> lists, Map<Integer,Integer> swapMap){Double query_swap_dcg = 0.0D;for(int i=0;i<lists.size();i++){Integer swap_position = swapMap.get(i+1);if(swap_position!=null){Integer swap_lable = lists.get(swap_position-1).getLabelValue();query_swap_dcg += doc_dcg(i+1, swap_lable);}else{query_swap_dcg+=doc_dcg(i+1, lists.get(i).getLabelValue());}}return query_swap_dcg;
}/*** @param lists:query的一个序列* @param swapMap:交换位置的两个doc的位置数据比如（1,4）（4,1） 1与4互换位置* @return :计算NDCG,首先要计算理想状态下dcg1，然后计算现实中的dcg2,ndcg = dcg2/dcg1* */
public static Double query_swap_ndcg(List<QueryUnit<String,Integer,Integer,Double>> lists,Map<Integer,Integer> swapMap){//交换位置后的现实dcgDouble real_swap_dcg = query_swap_dcg(lists, swapMap);//理想状态下的dcgDouble ideal_dcg = ideal_dcg(lists);return real_swap_dcg/ideal_dcg;
}/*** @param lists:query的一个序列* @param swapMap:交换位置的两个doc的位置数据比如（1,4）（4,1） 1与4互换位置* @return :返回deltaNDCG* */
public static Double deltaNDCG(List<QueryUnit<String,Integer,Integer,Double>> lists,Map<Integer,Integer> swapMap){Double query_swap_ndcg = query_swap_ndcg(lists, swapMap);Double query_ndcg = query_ndcg(lists);return Math.abs(query_ndcg-query_swap_ndcg);
}/*** @param si:doci预测得分，一般在第一次模型没有的时候，都是0* @param sj:docj预测得分，一般在第一次模型没有的时候，都是0* @param sigma:这个值只是影响曲线的陡峭度，默认这里我选1* @return :返回值betaij ，表示doci比docj差的概率* */
public static Double betaij(Integer sigma,Double si,Double sj){if(sigma == null){sigma = 1;}return 1/(1+Math.pow(Math.E, sigma*(si-sj)));
}/*** @param lists:query的一个序列* @param currentIndex:当前要交换位置的那个doc的位置* @param swapMap: 交换位置的两个doc的位置数据比如（1,4）（4,1） 1与4互换位置* @param sigma:这个值只是影响曲线的陡峭度，默认这里我选1* @return :返回该doc交换一次后的lambda值.* */
public static Double lambdaij(List<QueryUnit<String,Integer,Integer,Double>> lists,Integer currentIndex,Map<Integer,Integer> swapMap,Integer sigma){Integer targetIndex = swapMap.get(currentIndex);QueryUnit<String,Integer,Integer,Double> currentUnit = lists.get(currentIndex-1);QueryUnit<String,Integer,Integer,Double> targetUnit = lists.get(targetIndex-1);Integer currentLable = currentUnit.getLabelValue();Integer targetLable = targetUnit.getLabelValue();Double lambdaij = 0.0D;if(currentLable<=targetLable){lambdaij = -betaij(sigma,currentUnit.getDocscore(),targetUnit.getDocscore())*deltaNDCG(lists, swapMap);}else{lambdaij = betaij(sigma,currentUnit.getDocscore(),targetUnit.getDocscore())*deltaNDCG(lists, swapMap);}return lambdaij;
}/*** @param lists:query的一个序列* @param currentIndex:当前要交换位置的那个doc的位置* @param sigma:这个值只是影响曲线的陡峭度，默认这里我选1* @return :返回一个doc与各个位置都交互完后的lambda* */
public static Double doc_lambdaij(List<QueryUnit<String,Integer,Integer,Double>> lists,Integer currentIndex,Integer sigma){Double doc_lambdaij = 0.0D;for(int i=0;i<lists.size();i++){Integer iindex = i+1;QueryUnit<String,Integer,Integer,Double> currentUint = lists.get(currentIndex-1);QueryUnit<String,Integer,Integer,Double> targetUint = lists.get(i);Integer currentLable = currentUint.getLabelValue();Integer tergetLable = targetUint.getLabelValue();if(currentLable ==tergetLable){doc_lambdaij+=0.0D;}else{Map<Integer,Integer> swapMap = new HashMap<>();swapMap.put(currentIndex, iindex);swapMap.put(iindex, currentIndex);doc_lambdaij += lambdaij(lists, currentIndex, swapMap, sigma);}}return doc_lambdaij;
}/*** @param lists：query的一个序列,序列的单元是QueryUnit，第一个是query_id,第二是：doc位置，从1开始.第三个是：当前doc的lable。第四个是：当前文档分数* @param sigma: 这个值只是影响曲线的陡峭度，默认这里我选1* @return :返回了一个query下整个序列的lambda* */
public static Map<Integer,Double> query_lambdaij(List<QueryUnit<String,Integer,Integer,Double>> lists,Integer sigma){Map<Integer,Double> query_lambdaMap = new HashMap<>();for(int i=0;i<lists.size();i++){QueryUnit<String,Integer,Integer,Double> unit = lists.get(i);Integer position = unit.getPosition();Double doc_lambda =  doc_lambdaij(lists, i+1, sigma);query_lambdaMap.put(position, doc_lambda);}return query_lambdaMap;
}/*** @param dataList:样本训练集的数据集* @param querySize:样本query一个批次是多少条样本* @return :返回更新了lambda值后的list* */
public static List<List<String>> updateLambda(List<List<String>> dataList,Integer querySize){Integer rowLength = dataList.get(0).size();List<QueryUnit<String,Integer,Integer,Double>> queryList = new ArrayList<>();List<Integer> idList = new ArrayList<>();for(int i=0;i<dataList.size();i++){List<String> row = dataList.get(i);idList.add(i);Integer id = Integer.parseInt(row.get(0));QueryUnit<String,Integer,Integer,Double> unit = new QueryUnit<>();unit.setQueryId(row.get(2));Integer position = id%10==0?10:id%10;unit.setPosition(position);unit.setLabelValue(Integer.parseInt(row.get(1)));unit.setDocscore(Double.parseDouble(row.get(rowLength-2)));queryList.add(unit);if(id%querySize==0){//每到一个批次结束的时候，就开始进行计算lambda，并进行lambda更新Map<Integer,Double> ndcg = LambdaCalculate.query_lambdaij(queryList, 1);for(int j=0;j<ndcg.size();j++){Integer idIndex = idList.get(j);dataList.get(idIndex).set(rowLength-1,ndcg.get(j+1).toString());}idList.clear();queryList.clear();}}return dataList;
}/*** @param dataList:传入的是训练数据集，特征列数15列，0是样本ID，1是样本标签 2是样本query_id,3-12是特征，13是样本得分 14存lambda值* @param learningRate:学习率* @param node:传入叶子节点* @return :返回数据训练集，并把数据集的score列进行更新完毕。* */
public static List<List<String>> updateScore(TreeNode node,List<List<String>> dataList,Double learningRate){List<LeafUnit<Integer,Double>> valueList = node.getList();Integer rowLength = dataList.get(0).size();Double sum =0.0D;Integer count = 0;for(LeafUnit<Integer,Double> unit:valueList){sum += unit.getValue();count++;}Double gama = sum/count;Double increment = gama*learningRate;for(LeafUnit<Integer,Double> unit:valueList){//下面这句话的意思，就是我们把新创建的这颗树叶子节点的lambda均值乘以一个学习率后，加到上一颗树的得分上，第一棵树上一个树是0，所以他们的得分值都是0Double newValue = Double.parseDouble(dataList.get(unit.getIndex()).get(rowLength-2))+increment;dataList.get(unit.getIndex()).set(rowLength-2,newValue.toString());}return dataList;
}

}

搜索排序LambdaMART中Lambda的计算过程java版本相关推荐

java项目极验验证_有关极验验证SDK的使用过程-Java版本
在这里我会一步一步的实现极验验证配置到自己的项目上的详细过程(Java版本)! 首先,我们看一下我们要实现的预期效果: (1),打开服务器,进入到登陆页面 (2),点击提交按钮,进入验证界面 (3), ...
LeetCode 153. 寻找旋转排序数组中的最小值【c++/java详细题解】
目录 1.题目 2.思路 3.c++代码 4.java代码 1.题目已知一个长度为 n 的数组,预先按照升序排列,经由 1 到 n 次旋转后,得到输入数组.例如,原数组 nums = [0,1, ...
LeetCode删除排序数组中的重复项（Java实现）
原题: 给定一个排序数组,你需要在原地删除重复出现的元素,使得每个元素只出现一次,返回移除后数组的新长度. 不要使用额外的数组空间,你必须在原地修改输入数组并在使用 O(1) 额外空间的条件 ...
算法-- 删除排序链表中的重复元素（Java）
题目: 给定一个已排序的链表的头 head , 删除所有重复的元素,使每个元素只出现一次 .返回已排序的链表 . 示例 1: 输入:head = [1,1,2] 输出:[1,2] 示例 2: 输入: ...
网易实习面经中的算法题（java版本含注释）
目录前言 7. 整数反转(中等) 9. 回文数(简单) 14. 最长公共前缀(简单) 20. 有效的括号(简单) 23. 合并K个升序链表(困难) 64. 最小路径和(中等) 103. 二叉树的锯齿 ...
Transformer 在美团搜索排序中的实践
美团搜索是美团 App 连接用户与商家的一种重要方式,而排序策略则是搜索链路的关键环节,对搜索展示效果起着至关重要的效果.目前,美团的搜索排序流程为多层排序,分别是粗排.精排.异构排序等,多层排序的流 ...
美团搜索排序设计方案
一.线上篇随着业务的发展,美团的商家和团购数正在飞速增长.这一背景下,搜索排序的重要性显得更加突出:排序的优化能帮助用户更便捷地找到满足其需求的商家和团购,改进用户体验,提升转化效果. 和传统网页搜索 ...
干货 | XGBoost在携程搜索排序中的应用
作者简介曹城,携程搜索部门高级研发工程师,主要负责携程搜索的个性化推荐和搜索排序等工作. 一.前言在互联网高速发展的今天,越来越复杂的特征被应用到搜索中,对于检索模型的排序,基本的业务规则排序或者 ...
排序 np_干货 | XGBoost在携程搜索排序中的应用
作者简介曹城,携程搜索部门高级研发工程师,主要负责携程搜索的个性化推荐和搜索排序等工作. 一.前言在互联网高速发展的今天,越来越复杂的特征被应用到搜索中,对于检索模型的排序,基本的业务规则排序或者 ...

搜索排序LambdaMART中Lambda的计算过程java版本

Lambdajava实现

搜索排序LambdaMART中Lambda的计算过程java版本相关推荐

最新文章

热门文章