I did another test and find some confused question. I have about 12GB bson file and use mongoimport tool to import to mongodb.
Here are my two test case:
- If i create index after the document insertion complete, the index size is normal. It’s size is about 11GB
zhangruian-rs_0:PRIMARY> db.user_1400005918.stats()
{
"ns" : "pushdb.user_1400005918",
"size" : 2017049569,
"count" : 92558532,
"avgObjSize" : 21,
"storageSize" : 1004810240,
"freeStorageSize" : 1093632,
"capped" : false,
"wiredTiger" : {
"metadata" : {
"formatVersion" : 1
},
"creationString" : "access_pattern_hint=none,allocation_size=4KB,app_metadata=(formatVersion=1),assert=(commit_timestamp=none,durable_timestamp=none,read_timestamp=none,write_timestamp=off),block_allocation=best,block_compressor=snappy,cache_resident=false,checksum=on,colgroups=,collator=,columns=,dictionary=0,encryption=(keyid=,name=),exclusive=false,extractor=,format=btree,huffman_key=,huffman_value=,ignore_in_memory_cache_size=false,immutable=false,import=(enabled=false,file_metadata=,repair=false),internal_item_max=0,internal_key_max=0,internal_key_truncate=true,internal_page_max=4KB,key_format=q,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=64MB,log=(enabled=false),lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,merge_custom=(prefix=,start_generation=0,suffix=),merge_max=15,merge_min=0),memory_page_image_max=0,memory_page_max=10m,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,prefix_compression_min=4,readonly=false,source=,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,tiered_object=false,tiered_storage=(auth_token=,bucket=,bucket_prefix=,cache_directory=,local_retention=300,name=,object_target_size=10M),type=file,value_format=u,verbose=[],write_timestamp_usage=none",
"type" : "file",
"uri" : "statistics:table:pushdb/collection-0--4493473792861060111",
"LSM" : {
"bloom filter false positives" : 0,
"bloom filter hits" : 0,
"bloom filter misses" : 0,
"bloom filter pages evicted from cache" : 0,
"bloom filter pages read into cache" : 0,
"bloom filters in the LSM tree" : 0,
"chunks in the LSM tree" : 0,
"highest merge generation in the LSM tree" : 0,
"queries that could have benefited from a Bloom filter that did not exist" : 0,
"sleep for LSM checkpoint throttle" : 0,
"sleep for LSM merge throttle" : 0,
"total size of bloom filters" : 0
},
"block-manager" : {
"allocations requiring file extension" : 42109,
"blocks allocated" : 46786,
"blocks freed" : 9754,
"checkpoint size" : 1003700224,
"file allocation unit size" : 4096,
"file bytes available for reuse" : 1093632,
"file magic number" : 120897,
"file major version number" : 1,
"file size in bytes" : 1004810240,
"minor version number" : 0
},
"btree" : {
"btree checkpoint generation" : 2368,
"btree clean tree checkpoint expiration time" : NumberLong("9223372036854775807"),
"btree compact pages reviewed" : 0,
"btree compact pages selected to be rewritten" : 0,
"btree compact pages skipped" : 0,
"btree skipped by compaction as process would not reduce size" : 0,
"column-store fixed-size leaf pages" : 0,
"column-store internal pages" : 0,
"column-store variable-size RLE encoded values" : 0,
"column-store variable-size deleted values" : 0,
"column-store variable-size leaf pages" : 0,
"fixed-record size" : 0,
"maximum internal page key size" : 368,
"maximum internal page size" : 4096,
"maximum leaf page key size" : 2867,
"maximum leaf page size" : 32768,
"maximum leaf page value size" : 67108864,
"maximum tree depth" : 4,
"number of key/value pairs" : 0,
"overflow pages" : 0,
"pages rewritten by compaction" : 0,
"row-store empty values" : 0,
"row-store internal pages" : 0,
"row-store leaf pages" : 0
},
"cache" : {
"bytes currently in the cache" : 27746,
"bytes dirty in the cache cumulative" : 66852720,
"bytes read into cache" : 9802642216,
"bytes written from cache" : 2983019028,
"checkpoint blocked page eviction" : 0,
"checkpoint of history store file blocked non-history store page eviction" : 0,
"data source pages selected for eviction unable to be evicted" : 177,
"eviction gave up due to detecting an out of order on disk value behind the last update on the chain" : 0,
"eviction gave up due to detecting an out of order tombstone ahead of the selected on disk update" : 0,
"eviction gave up due to detecting an out of order tombstone ahead of the selected on disk update after validating the update chain" : 0,
"eviction gave up due to detecting out of order timestamps on the update chain after the selected on disk update" : 0,
"eviction walk passes of a file" : 21077,
"eviction walk target pages histogram - 0-9" : 10958,
"eviction walk target pages histogram - 10-31" : 5597,
"eviction walk target pages histogram - 128 and higher" : 0,
"eviction walk target pages histogram - 32-63" : 2750,
"eviction walk target pages histogram - 64-128" : 1772,
"eviction walk target pages reduced due to history store cache pressure" : 0,
"eviction walks abandoned" : 3,
"eviction walks gave up because they restarted their walk twice" : 7415,
"eviction walks gave up because they saw too many pages and found no candidates" : 3467,
"eviction walks gave up because they saw too many pages and found too few candidates" : 567,
"eviction walks reached end of tree" : 17747,
"eviction walks restarted" : 0,
"eviction walks started from root of tree" : 11452,
"eviction walks started from saved location in tree" : 9625,
"hazard pointer blocked page eviction" : 8,
"history store table insert calls" : 0,
"history store table insert calls that returned restart" : 0,
"history store table out-of-order resolved updates that lose their durable timestamp" : 0,
"history store table out-of-order updates that were fixed up by reinserting with the fixed timestamp" : 0,
"history store table reads" : 0,
"history store table reads missed" : 0,
"history store table reads requiring squashed modifies" : 0,
"history store table truncation by rollback to stable to remove an unstable update" : 0,
"history store table truncation by rollback to stable to remove an update" : 0,
"history store table truncation to remove an update" : 0,
"history store table truncation to remove range of updates due to key being removed from the data page during reconciliation" : 0,
"history store table truncation to remove range of updates due to out-of-order timestamp update on data page" : 0,
"history store table writes requiring squashed modifies" : 0,
"in-memory page passed criteria to be split" : 2144,
"in-memory page splits" : 1054,
"internal pages evicted" : 329,
"internal pages split during eviction" : 3,
"leaf pages split during eviction" : 1132,
"modified pages evicted" : 1435,
"overflow pages read into cache" : 0,
"page split during eviction deepened the tree" : 1,
"page written requiring history store records" : 0,
"pages read into cache" : 117303,
"pages read into cache after truncate" : 1,
"pages read into cache after truncate in prepare state" : 0,
"pages requested from the cache" : 159242466,
"pages seen by eviction walk" : 6732638,
"pages written from cache" : 46486,
"pages written requiring in-memory restoration" : 702,
"the number of times full update inserted to history store" : 0,
"the number of times reverse modify inserted to history store" : 0,
"tracked dirty bytes in the cache" : 0,
"unmodified pages evicted" : 139182
},
"cache_walk" : {
"Average difference between current eviction generation when the page was last considered" : 0,
"Average on-disk page image size seen" : 0,
"Average time in cache for pages that have been visited by the eviction server" : 0,
"Average time in cache for pages that have not been visited by the eviction server" : 0,
"Clean pages currently in cache" : 0,
"Current eviction generation" : 0,
"Dirty pages currently in cache" : 0,
"Entries in the root page" : 0,
"Internal pages currently in cache" : 0,
"Leaf pages currently in cache" : 0,
"Maximum difference between current eviction generation when the page was last considered" : 0,
"Maximum page size seen" : 0,
"Minimum on-disk page image size seen" : 0,
"Number of pages never visited by eviction server" : 0,
"On-disk page image sizes smaller than a single allocation unit" : 0,
"Pages created in memory and never written" : 0,
"Pages currently queued for eviction" : 0,
"Pages that could not be queued for eviction" : 0,
"Refs skipped during cache traversal" : 0,
"Size of the root page" : 0,
"Total number of pages currently in cache" : 0
},
"checkpoint-cleanup" : {
"pages added for eviction" : 9,
"pages removed" : 0,
"pages skipped during tree walk" : 944508,
"pages visited" : 2444803
},
"compression" : {
"compressed page maximum internal page size prior to compression" : 4096,
"compressed page maximum leaf page size prior to compression " : 131072,
"compressed pages read" : 117272,
"compressed pages written" : 41769,
"page written failed to compress" : 0,
"page written was too small to compress" : 4717
},
"cursor" : {
"Total number of entries skipped by cursor next calls" : 0,
"Total number of entries skipped by cursor prev calls" : 0,
"Total number of entries skipped to position the history store cursor" : 0,
"Total number of times a search near has exited due to prefix config" : 0,
"bulk loaded cursor insert calls" : 0,
"cache cursors reuse count" : 185289,
"close calls that result in cache" : 185289,
"create calls" : 14,
"cursor next calls that skip due to a globally visible history store tombstone" : 0,
"cursor next calls that skip greater than or equal to 100 entries" : 0,
"cursor next calls that skip less than 100 entries" : 277675602,
"cursor prev calls that skip due to a globally visible history store tombstone" : 0,
"cursor prev calls that skip greater than or equal to 100 entries" : 0,
"cursor prev calls that skip less than 100 entries" : 1,
"insert calls" : 92558532,
"insert key and value bytes" : 2462974393,
"modify" : 0,
"modify key and value bytes affected" : 0,
"modify value bytes modified" : 0,
"next calls" : 277675602,
"open cursor count" : 0,
"operation restarted" : 0,
"prev calls" : 1,
"remove calls" : 0,
"remove key bytes removed" : 0,
"reserve calls" : 0,
"reset calls" : 649903,
"search calls" : 0,
"search history store calls" : 0,
"search near calls" : 279327,
"truncate calls" : 0,
"update calls" : 0,
"update key and value bytes" : 0,
"update value size change" : 0
},
"reconciliation" : {
"approximate byte size of timestamps in pages written" : 177205344,
"approximate byte size of transaction IDs in pages written" : 535336,
"dictionary matches" : 0,
"fast-path pages deleted" : 0,
"internal page key bytes discarded using suffix compression" : 71589,
"internal page multi-block writes" : 214,
"internal-page overflow keys" : 0,
"leaf page key bytes discarded using prefix compression" : 0,
"leaf page multi-block writes" : 1268,
"leaf-page overflow keys" : 0,
"maximum blocks required for a page" : 1,
"overflow values written" : 0,
"page checksum matches" : 0,
"page reconciliation calls" : 1952,
"page reconciliation calls for eviction" : 714,
"pages deleted" : 9,
"pages written including an aggregated newest start durable timestamp " : 3503,
"pages written including an aggregated newest stop durable timestamp " : 0,
"pages written including an aggregated newest stop timestamp " : 0,
"pages written including an aggregated newest stop transaction ID" : 0,
"pages written including an aggregated newest transaction ID " : 397,
"pages written including an aggregated oldest start timestamp " : 3313,
"pages written including an aggregated prepare" : 0,
"pages written including at least one prepare" : 0,
"pages written including at least one start durable timestamp" : 11404,
"pages written including at least one start timestamp" : 11404,
"pages written including at least one start transaction ID" : 47,
"pages written including at least one stop durable timestamp" : 0,
"pages written including at least one stop timestamp" : 0,
"pages written including at least one stop transaction ID" : 0,
"records written including a prepare" : 0,
"records written including a start durable timestamp" : 11075334,
"records written including a start timestamp" : 11075334,
"records written including a start transaction ID" : 66917,
"records written including a stop durable timestamp" : 0,
"records written including a stop timestamp" : 0,
"records written including a stop transaction ID" : 0
},
"session" : {
"object compaction" : 0,
"tiered operations dequeued and processed" : 0,
"tiered operations scheduled" : 0,
"tiered storage local retention time (secs)" : 0,
"tiered storage object size" : 0
},
"transaction" : {
"race to read prepared update retry" : 0,
"rollback to stable history store records with stop timestamps older than newer records" : 0,
"rollback to stable inconsistent checkpoint" : 0,
"rollback to stable keys removed" : 0,
"rollback to stable keys restored" : 0,
"rollback to stable restored tombstones from history store" : 0,
"rollback to stable restored updates from history store" : 0,
"rollback to stable skipping delete rle" : 0,
"rollback to stable skipping stable rle" : 0,
"rollback to stable sweeping history store keys" : 0,
"rollback to stable updates removed from history store" : 0,
"transaction checkpoints due to obsolete pages" : 0,
"update conflicts" : 0
}
},
"nindexes" : 15,
"indexBuilds" : [ ],
"totalIndexSize" : 11148996608,
"totalSize" : 12153806848,
"indexSizes" : {
"_id_" : 1232883712,
"_id_1_tags.0_1_tags.1_1_tags.2_1_tags.3_1_tags.4_1_tags.5_1_tags.6_1_tags.7_1_tags.8_1_tags.9_1" : 2455584768,
"tagsv2_1" : 417800192,
"tags.4_1" : 417800192,
"_id_1_tagsv2_1" : 1334550528,
"tags.0_1" : 417800192,
"tags.2_1" : 417800192,
"tags.3_1" : 417800192,
"_id_hashed" : 1520025600,
"tags.1_1" : 417800192,
"tags.7_1" : 417800192,
"tags.8_1" : 417800192,
"tags.9_1" : 427950080,
"tags.5_1" : 417800192,
"tags.6_1" : 417800192
},
"scaleFactor" : 1,
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1667976189, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1667976189, 1)
}