Random Gutenberg is a Twitter bot which tweets random sentences from random Project Gutenberg eBooks.
RandomGutenberg.java
package org.adrianwalker.randomgutenberg; import java.io.BufferedReader; import java.io.InputStreamReader; import static java.lang.String.format; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Random; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.ZipInputStream; import org.apache.log4j.Logger; import twitter4j.Status; import twitter4j.Twitter; import twitter4j.TwitterFactory; public final class RandomGutenberg { private static final Logger LOGGER = Logger.getLogger(RandomGutenberg.class); private static final int MAX_OFFSET = 3934400; private static final String GUTENBERG_ROBOT_URL = "http://www.gutenberg.org/robot/harvest?offset=%s&filetypes[]=txt"; private static final Pattern HREF_PATTERN = Pattern.compile("href=\"(http://.*)\""); private static final Pattern SENTENCE_ENDINGS_PATTERN = Pattern.compile("(?<=\\s?\"?[.!?]\"?\\s?)"); private static final Pattern WORD_ENDINGS_PATTERN = Pattern.compile("\\s"); private static final int MAX_TRIES = 3; private static final int MAX_TWEET_LENGTH = 140; private static final int MIN_WORD_COUNT = 3; public static void main(final String[] args) { Random randomNumberGenerator = new Random(new Random(System.currentTimeMillis()).nextLong()); String eBookText = null; for (int tries = 0; tries < MAX_TRIES; tries++) { try { eBookText = getRandomEbookText(randomNumberGenerator); break; } catch (final Throwable t) { LOGGER.error("Error getting eBook text", t); } } if (null == eBookText) { return; } String sentence = getRandomSentence(eBookText, randomNumberGenerator); try { tweet(sentence); } catch (final Throwable t) { LOGGER.error("Error sending tweet", t); } } private static String getRandomSentence(final String eBookText, final Random randomNumberGenerator) { List<String> sentences = new ArrayList<>(Arrays.asList(SENTENCE_ENDINGS_PATTERN.split(eBookText))); Iterator<String> sentenceIterator = sentences.iterator(); while (sentenceIterator.hasNext()) { String sentence = sentenceIterator.next(); String[] words = WORD_ENDINGS_PATTERN.split(sentence); int sentenceLength = sentence.length(); int wordCount = words.length; if (wordCount < MIN_WORD_COUNT || sentenceLength > MAX_TWEET_LENGTH) { sentenceIterator.remove(); } } String sentence = sentences.get(randomNumberGenerator.nextInt(sentences.size())); return sentence; } private static String getRandomEbookText(final Random randomNumberGenerator) throws Throwable { int offset = randomNumberGenerator.nextInt(MAX_OFFSET); URL url = new URL(format(GUTENBERG_ROBOT_URL, offset)); BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream())); List<String> hrefs = new ArrayList<>(); String line; while (null != (line = reader.readLine())) { Matcher matcher = HREF_PATTERN.matcher(line); if (matcher.find()) { hrefs.add(matcher.group(1)); } } reader.close(); String randomHref = hrefs.get(randomNumberGenerator.nextInt(hrefs.size())); url = new URL(randomHref); ZipInputStream zis = new ZipInputStream(url.openStream()); zis.getNextEntry(); reader = new BufferedReader(new InputStreamReader(zis)); StringBuilder eBookBuffer = new StringBuilder(); while (null != (line = reader.readLine())) { if (eBookBuffer.length() > 0) { eBookBuffer.append(" "); } line = line.trim(); eBookBuffer.append(line); } reader.close(); String eBookText = eBookBuffer.toString(); return eBookText; } private static Status tweet(final String sentence) throws Throwable { String message = sentence; Twitter twitter = TwitterFactory.getSingleton(); Status status = twitter.updateStatus(message); return status; } }
Source Code
- Code available in GitHub - random-gutenberg