import java.io.*;import java.util.*;import java.util.Map.Entry;public class Top10IPs { public static void main(String[] args) throws IOException { String inputFilePath = "path/to/large/file.txt"; String tempDirectory = "path/to/temp/directory/"; // Step 1: Split the large file into smaller chunks List<String> chunkFiles = splitFile(inputFilePath, tempDirectory); // Step 2: Count IPs in each chunk and save intermediate results List<String> resultFiles = new ArrayList<>(); for (String chunkFile : chunkFiles) { String resultFile = countIPsInChunk(chunkFile, tempDirectory); resultFiles.add(resultFile); } // Step 3: Merge intermediate results and find top 10 IPs List<Entry<String, Integer>> top10IPs = mergeResults(resultFiles); // Print the top 10 IPs for (Entry<String, Integer> entry : top10IPs) { System.out.println(entry.getKey() + ": " + entry.getValue()); } } // Method to split the large file into smaller chunks public static List<String> splitFile(String inputFilePath, String tempDirectory) throws IOException { List<String> chunkFiles = new ArrayList<>(); int chunkSize = 1000000; // Adjust the chunk size as needed int chunkIndex = 0; BufferedReader reader = new BufferedReader(new FileReader(inputFilePath)); String line; while ((line = reader.readLine()) != null) { String chunkFilePath = tempDirectory + "chunk_" + chunkIndex + ".txt"; PrintWriter writer = new PrintWriter(new FileWriter(chunkFilePath, true)); int lineCount = 0; while (lineCount < chunkSize && line != null) { writer.println(line); line = reader.readLine(); lineCount++; } writer.close(); chunkFiles.add(chunkFilePath); chunkIndex++; } reader.close(); return chunkFiles; } // Method to count IPs in each chunk and save intermediate results public static String countIPsInChunk(String chunkFilePath, String tempDirectory) throws IOException { Map<String, Integer> ipCountMap = new HashMap<>(); BufferedReader reader = new BufferedReader(new FileReader(chunkFilePath)); String line; while ((line = reader.readLine()) != null) { ipCountMap.put(line, ipCountMap.getOrDefault(line, 0) + 1); } reader.close(); String resultFilePath = tempDirectory + "result_" + chunkFilePath.substring(chunkFilePath.lastIndexOf('_') + 1); PrintWriter writer = new PrintWriter(new FileWriter(resultFilePath)); for (Entry<String, Integer> entry : ipCountMap.entrySet()) { writer.println(entry.getKey() + "," + entry.getValue()); } writer.close(); return resultFilePath; } // Method to merge intermediate results and find top 10 IPs public static List<Entry<String, Integer>> mergeResults(List<String> resultFiles) throws IOException { Map<String, Integer> ipCountMap = new HashMap<>(); for (String resultFile : resultFiles) { BufferedReader reader = new BufferedReader(new FileReader(resultFile)); String line; while ((line = reader.readLine()) != null) { String[] parts = line.split(","); String ip = parts[0]; int count = Integer.parseInt(parts[1]); ipCountMap.put(ip, ipCountMap.getOrDefault(ip, 0) + count); } reader.close(); } // Find the top 10 IPs PriorityQueue<Entry<String, Integer>> minHeap = new PriorityQueue<>(Map.Entry.comparingByValue()); for (Entry<String, Integer> entry : ipCountMap.entrySet()) { minHeap.offer(entry); if (minHeap.size() > 10) { minHeap.poll(); } } List<Entry<String, Integer>> top10IPs = new ArrayList<>(minHeap); top10IPs.sort((e1, e2) -> Integer.compare(e2.getValue(), e1.getValue())); return top10IPs; }}代码说明splitFile方法:将大文件分割成多个较小的文件,每个文件包含一定数量的IP地址。countIPsInChunk方法:统计每个小文件中的IP访问次数,并将结果保存到一个中间结果文件中。mergeResults方法:合并所有中间结果文件,并找出访问次数排名前十的IP地址。这段代码假设已经根据具体需求调整了块大小和文件路径。此方法有效地处理了大文件,并找出了访问次数最多的前十个IP地址。注意事项内存管理:在处理过程中要密切注意内存的使用情况,以避免内存溢出。磁盘I/O:优化磁盘I/O操作可以显著提高处理效率。数据一致性:在处理大文件时,要确保数据的完整性和一致性。错误处理:添加适当的错误处理逻辑以应对文件读取、写入或排序过程中可能出现的异常情况。通过上述方法,我们可以在不耗尽机器内存的情况下,有效地处理大文件并找出访问次数排名前十的IP地址。15. 算法题:在长度为N的有序数组中快速查找所有值为M的元素下标(M可能重复出现)要在长度为N的有序数组中快速查找所有值为M的元素下标,可以使用二分查找来找到值为M的第一个和最后一个位置,然后再遍历这些位置之间的元素获取所有的下标。这种方法的时间复杂度是O(log N) + O(k),其中k是值为M的元素的数量。下面是一个Java实现:import java.util.ArrayList;import java.util.List;public class FindIndices { public static void main(String[] args) { int[] nums = {1, 2, 2, 2, 3, 4, 5}; int target = 2; List<Integer> indices = findAllIndices(nums, target); System.out.println(indices); // 输出:[1, 2, 3] } public static List<Integer> findAllIndices(int[] nums, int target) { List<Integer> indices = new ArrayList<>(); // 辅助方法,找到target的第一个和最后一个位置 int firstIndex = findFirst(nums, target); int lastIndex = findLast(nums, target); // 如果找到的第一个位置是-1,说明数组中没有target if (firstIndex == -1) { return indices; } // 遍历从firstIndex到lastIndex的范围,添加所有位置到结果列表中 for (int i = firstIndex; i <= lastIndex; i++) { indices.add(i); } return indices; } // 辅助方法,找到target的第一个位置 private static int findFirst(int[] nums, int target) { int left = 0; int right = nums.length - 1; int result = -1; while (left <= right) { int mid = left + (right - left) / 2; if (nums[mid] == target) { result = mid; right = mid - 1; // 继续在左边搜索 } else if (nums[mid] < target) { left = mid + 1; } else { right = mid - 1; } } return result; } // 辅助方法,找到target的最后一个位置 private static int findLast(int[] nums, int target) { int left = 0; int right = nums.length - 1; int result = -1; while (left <= right) { int mid = left + (right - left) / 2; if (nums[mid] == target) { result = mid; left = mid + 1; // 继续在右边搜索 } else if (nums[mid] < target) { left = mid + 1; } else { right = mid - 1; } } return result; }}代码说明findAllIndices方法用于找到所有值为target的元素的下标。findFirst方法用于找到target在数组中的第一个位置。findLast方法用于找到target在数组中的最后一个位置。如果找到的第一个位置是-1,说明数组中没有target,直接返回空列表。否则,遍历从firstIndex到lastIndex的范围,将所有的下标添加到结果列表中。面经原帖有三毛六站神发布,答案由程序员Hasity整理。