publicvoidaddValue(int v) { // TODO: some code goes here this.tuples ++; //System.out.println(this); buckets[(v - min) / range] ++; } publicdoubleestimateSelectivity(Predicate.Op op, int v) {
// TODO: some code goes here
if (op == Predicate.Op.EQUALS) { if (v < min || v > max) return0; intindex= (v - min) / range; inth= buckets[index]; return (h * 1.0 / range) / tuples; } if (op == Predicate.Op.NOT_EQUALS) {
return1.0 - estimateSelectivity(Predicate.Op.EQUALS, v); } if (op == Predicate.Op.GREATER_THAN) { if (v < min) return1; if (v > max) return0; intindex= (v - min) / range; inth= buckets[index]; intsum=0; for (inti= index + 1; i < buckets.length; i++) { if (i == buckets.length - 1) { sum += (max - i * range - min + 1) * buckets[i]; } else sum += buckets[i] * range; } return (((index + 1) * range + min - 1 - v) * buckets[index] + sum + 0.0) / tuples; } if (op == Predicate.Op.LESS_THAN) { if (v < min) return0; if (v > max) return1; intindex= (v - min) / range; inth= buckets[index]; intsum=0; for (inti=0; i < index; i++) { sum += buckets[i] * range; } return ((v - range * index - min) * buckets[index] + sum + 0.0) / tuples; } if (op == Predicate.Op.GREATER_THAN_OR_EQ) { return estimateSelectivity(Predicate.Op.GREATER_THAN, v ) + estimateSelectivity(Predicate.Op.EQUALS, v); } if (op == Predicate.Op.LESS_THAN_OR_EQ) { return estimateSelectivity(Predicate.Op.LESS_THAN, v) + estimateSelectivity(Predicate.Op.EQUALS, v); } return -1.0; }
Ecercise 2 TableStats
这个练习主要实现对每一张表的各种统计数据的保存,这个类需要实现的最主要方法就是给定筛选条件中的field op value,需要返回这个比较操作的估计耗费,这就需要用到我们刚才实现的类。为了初始化IntHistogram或StringHistogram,我们需要数据流的最大值和最小值,然后不断向其中添加元素,所以这里就需要对表中的元组进行两次遍历,第一次统计出最大值最小值,并为每一个Field初始化直方图,然后第二遍遍历向其中添加元素。
1. j = set of join nodes 2. for (i in 1...|j|): 3. for s in {all length i subsets of j} 4. bestPlan = {} 5. for s' in {all length d-1 subsets of s} 6. subplan = optjoin(s') 7. plan = best way to join (s-s') to subplan 8. if (cost(plan) < cost(bestPlan)) 9. bestPlan = plan 10. optjoin(s) = bestPlan 11. return optjoin(j)