
leveldb 学习记录(一) skiplist
leveldb 学习记录(二) Slice
leveldb 学习记录(三) MemTable 与 Immutable Memtable
leveldb 学习记录(四) skiplist补完

KV数据库中 大部分是采用内存存储,如果中途发生意外情况,没有dump到磁盘的记录就可能会丢失,但是如果采用log记录操作便可以按照log记录进行这部分的数据恢复



record :=      checksum: uint32          // crc32c of type and data[]
                  length: uint16
                  type: uint8                           // One of FULL, FIRST, MIDDLE, LAST
                  data: uint8[length]



当一个RECORD在一个BLOCK内 那么它的类型是FULL




不同的RECORD 分配如下

BLOCK1  RECORDA整个数据都在BLOCK1中,所以他的类型是FULL 。接着是 RECORDB的部分数据 类型为FIRST


BLOCK3  首先是RECORDB的数据,类型是LAST。 紧接着是RECORDC,这部分数据类型为FULL


对应的相关LOG 数据结构如下

 1 enum RecordType {
 2   // Zero is reserved for preallocated files
 3   kZeroType = 0,
 5   kFullType = 1,
 7   // For fragments
 8   kFirstType = 2,
 9   kMiddleType = 3,
10   kLastType = 4
11 };
12 static const int kMaxRecordType = kLastType;
14 static const int kBlockSize = 32768;
16 // Header is checksum (4 bytes), type (1 byte), length (2 bytes).
17 static const int kHeaderSize = 4 + 1 + 2;
19 }
20 }



 1 class Writer {
 2  public:
 3   // Create a writer that will append data to "*dest".
 4   // "*dest" must be initially empty.
 5   // "*dest" must remain live while this Writer is in use.
 6   explicit Writer(WritableFile* dest);
 7   ~Writer();
 9   Status AddRecord(const Slice& slice);
11  private:
12   WritableFile* dest_;
13   int block_offset_;       // Current offset in block
15   // crc32c values for all supported record types.  These are
16   // pre-computed to reduce the overhead of computing the crc of the
17   // record type stored in the header.
18   uint32_t type_crc_[kMaxRecordType + 1];
20   Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
22   // No copying allowed
23   Writer(const Writer&);
24   void operator=(const Writer&);
25 };


 1 Status Writer::AddRecord(const Slice& slice) {
 2   const char* ptr = slice.data();
 3   size_t left = slice.size();
 5   // Fragment the record if necessary and emit it.  Note that if slice
 6   // is empty, we still want to iterate once to emit a single
 7   // zero-length record
 8   Status s;
 9   bool begin = true;
10   do {
11     const int leftover = kBlockSize - block_offset_;        //剩余要填充的数据长度 是一个BLOCK的长度减去块内已经填充的长度
12     assert(leftover >= 0);
13     if (leftover < kHeaderSize) {                            //要填充的长度大于7 则在下一个BLOCK进行记录 (因为checksum 4字节  length2字节  type 1字节,光是记录信息已经需要7个字节)
14       // Switch to a new block
15       if (leftover > 0) {
16         // Fill the trailer (literal below relies on kHeaderSize being 7)
17         assert(kHeaderSize == 7);
18         dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
19       }
20       block_offset_ = 0;
21     }
23     // Invariant: we never leave < kHeaderSize bytes in a block.
24     assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
26     const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
27     const size_t fragment_length = (left < avail) ? left : avail;        //根据能否在本BLOCK填充完毕 选择填充长度为left 或者 avail
29     RecordType type;
30     const bool end = (left == fragment_length);
31     if (begin && end) {                                //beg end在用一个BLOCK里 record的type肯定是FULL
32       type = kFullType;
33     } else if (begin) {                                //本BLOCK只有beg 那么record的type    就是FIRST
34       type = kFirstType;
35     } else if (end) {                                //本BLOCK只有end 那么record的TYPE就是last
36       type = kLastType;
37     } else {
38       type = kMiddleType;                            //本BLOCK 没有beg end  那么record填充了整个BLOCK type是MIDDLE
39     }
41     s = EmitPhysicalRecord(type, ptr, fragment_length);        //提交到log文件记录
42     ptr += fragment_length;
43     left -= fragment_length;
44     begin = false;
45   } while (s.ok() && left > 0);
46   return s;
47 }
49 Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
50   assert(n <= 0xffff);  // Must fit in two bytes
51   assert(block_offset_ + kHeaderSize + n <= kBlockSize);
53   // Format the header
54   char buf[kHeaderSize];
55   buf[4] = static_cast<char>(n & 0xff);                //长度低8位
56   buf[5] = static_cast<char>(n >> 8);                //长度高8位
57   buf[6] = static_cast<char>(t);                    //type
59   // Compute the crc of the record type and the payload.
60   uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);                //校验和
61   crc = crc32c::Mask(crc);                 // Adjust for storage
62   EncodeFixed32(buf, crc);
64   // Write the header and the payload
65   Status s = dest_->Append(Slice(buf, kHeaderSize));        //数据信息写入
66   if (s.ok()) {
67     s = dest_->Append(Slice(ptr, n));                        //数据写入
68     if (s.ok()) {
69       s = dest_->Flush();
70     }
71   }
72   block_offset_ += kHeaderSize + n;
73   return s;
74 }



日志读取代码中还有一个Reporter 类用于报告错误

 1 class Reader {
 2  public:
 3   // Interface for reporting errors.
 4   class Reporter {
 5    public:
 6     virtual ~Reporter();
 8     // Some corruption was detected.  "size" is the approximate number
 9     // of bytes dropped due to the corruption.
10     virtual void Corruption(size_t bytes, const Status& status) = 0;
11   };
13   // Create a reader that will return log records from "*file".
14   // "*file" must remain live while this Reader is in use.
15   //
16   // If "reporter" is non-NULL, it is notified whenever some data is
17   // dropped due to a detected corruption.  "*reporter" must remain
18   // live while this Reader is in use.
19   //
20   // If "checksum" is true, verify checksums if available.
21   //
22   // The Reader will start reading at the first record located at physical
23   // position >= initial_offset within the file.
24   Reader(SequentialFile* file, Reporter* reporter, bool checksum,
25          uint64_t initial_offset);
27   ~Reader();
29   // Read the next record into *record.  Returns true if read
30   // successfully, false if we hit end of the input.  May use
31   // "*scratch" as temporary storage.  The contents filled in *record
32   // will only be valid until the next mutating operation on this
33   // reader or the next mutation to *scratch.
34   bool ReadRecord(Slice* record, std::string* scratch);
36   // Returns the physical offset of the last record returned by ReadRecord.
37   //
38   // Undefined before the first call to ReadRecord.
39   uint64_t LastRecordOffset();
41  private:
42   SequentialFile* const file_;
43   Reporter* const reporter_;
44   bool const checksum_;
45   char* const backing_store_;
46   Slice buffer_;
47   bool eof_;   // Last Read() indicated EOF by returning < kBlockSize
49   // Offset of the last record returned by ReadRecord.
50   uint64_t last_record_offset_;
51   // Offset of the first location past the end of buffer_.
52   uint64_t end_of_buffer_offset_;
54   // Offset at which to start looking for the first record to return
55   uint64_t const initial_offset_;
57   // Extend record types with the following special values
58   enum {
59     kEof = kMaxRecordType + 1,
60     // Returned whenever we find an invalid physical record.
61     // Currently there are three situations in which this happens:
62     // * The record has an invalid CRC (ReadPhysicalRecord reports a drop)
63     // * The record is a 0-length record (No drop is reported)
64     // * The record is below constructor's initial_offset (No drop is reported)
65     kBadRecord = kMaxRecordType + 2
66   };
68   // Skips all blocks that are completely before "initial_offset_".
69   //
70   // Returns true on success. Handles reporting.
71   bool SkipToInitialBlock();
73   // Return type, or one of the preceding special values
74   unsigned int ReadPhysicalRecord(Slice* result);
76   // Reports dropped bytes to the reporter.
77   // buffer_ must be updated to remove the dropped bytes prior to invocation.
78   void ReportCorruption(size_t bytes, const char* reason);
79   void ReportDrop(size_t bytes, const Status& reason);
81   // No copying allowed
82   Reader(const Reader&);
83   void operator=(const Reader&);
84 };

关键函数是bool Reader::ReadRecord(Slice* record, std::string* scratch)

我的理解中 只要除开完全被 initial_offset_长度覆盖的BLOCK ,

剩下的BLOCK依次读取记录,根据type是FULL MIDDLE FIRST LAST 决定是否继续读取即可


bool Reader::ReadRecord(Slice* record, std::string* scratch) {if (last_record_offset_ < initial_offset_) {                    //实际上整个工程中initial_offset_一直为0  ,if (!SkipToInitialBlock()) {                                //block_start_location圆整为包含initial_offset_的BLOCK的偏移return false;}}scratch->clear();record->clear();bool in_fragmented_record = false;// Record offset of the logical record that we're reading// 0 is a dummy value to make compilers happyuint64_t prospective_record_offset = 0;Slice fragment;while (true) {uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();const unsigned int record_type = ReadPhysicalRecord(&fragment);switch (record_type) {case kFullType:                                //一次性读取FULL类型的record 直接返回成功if (in_fragmented_record) {// Handle bug in earlier versions of log::Writer where// it could emit an empty kFirstType record at the tail end// of a block followed by a kFullType or kFirstType record// at the beginning of the next block.if (scratch->empty()) {in_fragmented_record = false;} else {ReportCorruption(scratch->size(), "partial record without end(1)");}}prospective_record_offset = physical_record_offset;scratch->clear();*record = fragment;last_record_offset_ = prospective_record_offset;return true;case kFirstType:                        //读取到FIRST类型的record  string.assign  然后继续if (in_fragmented_record) {// Handle bug in earlier versions of log::Writer where// it could emit an empty kFirstType record at the tail end// of a block followed by a kFullType or kFirstType record// at the beginning of the next block.if (scratch->empty()) {in_fragmented_record = false;} else {ReportCorruption(scratch->size(), "partial record without end(2)");}}prospective_record_offset = physical_record_offset;scratch->assign(fragment.data(), fragment.size());in_fragmented_record = true;break;case kMiddleType:                            //读取到MIDDLE类型的record  string.append  然后继续if (!in_fragmented_record) {ReportCorruption(fragment.size(),"missing start of fragmented record(1)");} else {scratch->append(fragment.data(), fragment.size());}break;case kLastType:                            //读取到LAST 类型record string.appendif (!in_fragmented_record) {ReportCorruption(fragment.size(),"missing start of fragmented record(2)");} else {scratch->append(fragment.data(), fragment.size());*record = Slice(*scratch);last_record_offset_ = prospective_record_offset;return true;}break;case kEof:if (in_fragmented_record) {ReportCorruption(scratch->size(), "partial record without end(3)");scratch->clear();}return false;case kBadRecord:if (in_fragmented_record) {ReportCorruption(scratch->size(), "error in middle of record");in_fragmented_record = false;scratch->clear();}break;default: {char buf[40];snprintf(buf, sizeof(buf), "unknown record type %u", record_type);ReportCorruption((fragment.size() + (in_fragmented_record ? scratch->size() : 0)),buf);in_fragmented_record = false;scratch->clear();break;}}}return false;




leveldb 学习记录(四)Log文件相关推荐

  1. levelDB学习记录

    文章目录 levelDB学习记录 博客 LevelDB源码分析 1.起步 2.Visual Studio Code 3.基本思路 LevelDB四个接口 磁盘的特点 状态机 状态机的四个概念 初步设计 ...

  2. MySQL学习记录 (四) ----- SQL数据管理语句(DML)

    相关文章: <MySQL学习记录 (一) ----- 有关数据库的基本概念和MySQL常用命令> <MySQL学习记录 (二) ----- SQL数据查询语句(DQL)> &l ...

  3. Kafka学习记录(四)——消费者

    Kafka学习记录(四)--消费者 目录 Kafka学习记录(四)--消费者 对应课程 Kafka消费者工作流程 消费方式和流程 消费者组原理 消费者组初始化流程 消费者组详细消费流程 重要参数 ka ...

  4. ROS学习记录9——urdf文件的创建与使用

    零.前言 在学习这一节之前,至少得明白如何创建一个工作空间,如何编译工作空间,如何运行工作空间里的文件(把工作空间的环境目录添加到环境里).如果有不会的,请看鄙人的ROS学习记录3. 本节默认工作空间 ...

  5. 【Vim】学习笔记四 多文件编辑、可视模式、视窗操作

    文章目录 1. 多文件编辑 (1) 使用vim编辑多个文件 (2) 进入vim后打开新文件 (3) 恢复文件 2. 可视模式命令简介 3. 视窗操作 (1) 视窗操作简介 4. 创建加密文档 5. 在 ...

  6. 【故障诊断发展学习记录四——数字孪生与控制系统健康管理(DT PHM)】

    数字数字 目录 1. 数字孪生的起源 1.1 数字工程 1.2  模型贯穿决策 1.3 数字工程路线图 1.4 数字工程战略目标 2. 美军数字工程 2.1 生态系统全视图 2.2 支持采办的的完整视 ...

  7. gRPC学习记录(四)--官方Demo

    了解proto3后,接下来看官方Demo作为训练,这里建议看一遍之后自己动手搭建出来,一方面巩固之前的知识,一方面是对整个流程更加熟悉. 官方Demo地址: https://github.com/gr ...

  8. grpc简单使用 java_gRPC学习记录(四)-官方Demo - Java 技术驿站-Java 技术驿站

    了解proto3后,接下来看官方Demo作为训练,这里建议看一遍之后自己动手搭建出来,一方面巩固之前的知识,一方面是对整个流程更加熟悉. 官方Demo地址: https://github.com/gr ...

  9. python程序命令行输出记录到log文件中

    $ python xxx.py args1 >> data/log.txt第一条命令的含义是:将xxx.py这个python文件中print()原本输出到console的内容重定向到&qu ...


  1. python3之request用法_如何扩展python3 BaseHTTPRequestHandler类,以便可以使用成员variab调用函数...
  2. android 图片作为背景图片,Android设置网络图片为手机背景图片的方法
  3. auto_ptr动态分配对象
  4. 在Ubuntu中安装HBase
  5. 前端实现炫酷动效_20个网页动效设计的炫酷神器
  6. 【C语言进阶深度学习记录】二十六 C语言中的字符串与字符数组的详细分析
  7. linux bash函数里面调用命令行,Linux-在gnome-terminal -x中运行bash函数
  8. TVM:交叉编译和RPC
  9. requirejs教程(一):基本用法
  10. vue项目中使用特殊字体
  11. 《概率论与数理统计》重学笔记
  12. 公司估值:格雷厄姆的公司价值评估模型及应用——成长估值(一)
  13. 网友上海求职指南2007
  14. Window下Ribbit MQ安装
  15. eclipse neno 将jar包自动导入WEB-INF\lib
  16. 麦肯锡精英的48个工作习惯~书摘
  17. 关于我在编程里学表白这件事。。。。【python表白代码】
  18. php表单验证_PHP表单验证:简介
  19. 罗玉凤(凤姐)在网上评论说华为早就该垮了!大家怎么看?
  20. BI到底是什么,是否所有企业都适合上BI?


  1. 50 【Go版本变化】
  2. 知识管理系列---3.程序实现
  3. buildroot的使用简介【转】
  4. 网易云信,发送验证码短信C#版代码
  5. Mybatis sql注入问题
  6. poj 1118 Lining Up(水题)
  7. ASP.Net MVC框架配置分析详解
  8. Flutter入门三部曲(3) - 数据传递/状态管理 | 掘金技术征文
  9. C++与C#混合编程
  10. 26Exchange Server 2010跨站点部署-内外网邮件流测试