// 从1亿个数中找到最大的1000个数 void find_max_data(int *source_data, int length, int k) { int count = 0; multiset<int> set; multiset<int>::iterator it; for(int i = 0; i < length; i++) { if(count < k) { set.insert(source_data[i]); } else { it = set.begin(); if(*it < source_data[i]) { set.erase(it); set.insert(source_data[i]); } } count++; } it = set.begin(); // 打印 for (; it != set.end(); it++) { cout<<*it<<endl; } }
这种情况当然是使用堆排序了,一次构造堆+999次重建堆,在一亿个元素的情况下时间复杂度接近o(n)
/* 一亿个数找最大的1000个数,要求效率高占用内存少。 */ #include <array> (2816)#include <algorithm> using namespace std; const int NUM_SET_SIZE = 100000000; const int MAX_AMOUNT = 1000; array<int, MAX_AMOUNT> find_max_data(array<int, NUM_SET_SIZE> num_set) { array<int, MAX_AMOUNT> result; make_heap(num_set.begin(), num_set.end()); // 构造堆 for (int i = 0; i < MAX_AMOUNT; i++) // 将last - 1处的值与first处进行交换,并使[first, last - 2]处成为一个有效堆 pop_heap(num_set.begin(), num_set.end() - i); auto riter = num_set.rbegin(); for (int i = 0; i < MAX_AMOUNT; i++) result[i] = *riter++; return result; }
/* 基数排序 */ #include <iostream> #include <stdlib.h> #include <ctime> #include <algorithm> #include <cassert> #include "windows.h" using namespace std; #define MAXN 100000000 #define MAXM 1000 void find_max_data0( int dest[], int m, int src[], int n ) { fill( dest, dest + n, -INT_MAX ); for ( int i = 0; i < n; i++ ) { for ( int j = 0; j < m; j++ ) { if ( src[i] > dest[j] ) { dest[j] = src[i]; break; } } } } void find_max_data1( int dest[], int m, int src[], int n ) { make_heap( src, src + n ); for( int i = 0; i < m; i++ ) { dest[i] = src[0]; pop_heap( src, src + n-- ); } } void radix_sort( int *src, int n ) { int *tmp = new int[MAXN]; int cnt[256]; for( int i = 0; i < 4; i++ ) { memset( cnt, 0, sizeof(cnt) ); for( int j = 0; j < MAXN; j++ ) { cnt[ ( src[j] >> (8 * i) ) & 0xff ]++; } for ( int j = 1; j < 256; j++ ) { cnt[j] = cnt[j - 1] + cnt[j]; } for( int j = MAXN - 1; j >= 0; j-- ) { tmp[ -- cnt[ ( src[j] >> (8 * i) ) & 0xff ] ] = src[j]; swap( src, tmp ); } } delete [] tmp; } void find_max_data2( int dest[], int m, int src[], int n ) { radix_sort( src, n ); int j = 0; for( int i = n - 1; i >= n - m; i-- ) { dest[j++] = src[i]; } } int src[MAXN]; int main() { ( (unsigned int) time( NULL ) ); src[i] = abs( i * 6516187 ); DWORD t1, t2; t1 = GetTickCount(); find_max_data2( dest2, MAXM, src, MAXN ); t2 = GetTickCount(); printf( "find_max_data2 time cost:%dn", t2 - t1 ); for( int i = 0; i < MAXM; i++ ) { printf( "n" ); return 0; } }