Skip to content
Permalink
Browse files

Initial commit for twitter-text 3.0.0 (#265)

* Initial commit for twitter-text 3.0.0

* remove js from travis test

* package version change
  • Loading branch information...
kaushlakers authored and leeaustinadams committed Oct 10, 2018
1 parent 4b8fc4a commit 9537bdf15f935b40fed0ea3ea44cee860fbb8880
Showing with 7,458 additions and 825 deletions.
  1. +0 −1 .travis.yml
  2. +0 −2 README.md
  3. +8 −0 config/README.md
  4. +30 −0 config/v3.json
  5. +4 −1 conformance/Rakefile
  6. +38 −0 conformance/extract.yml
  7. +5 −0 conformance/tld_lib.yml
  8. +251 −17 conformance/validate.yml
  9. +15 −0 java/CHANGELOG.md
  10. +4 −3 java/README.md
  11. +8 −2 java/docs/api/allclasses-frame.html
  12. +8 −2 java/docs/api/allclasses-noframe.html
  13. +7 −2 java/docs/api/com/twitter/twittertext/Autolink.LinkAttributeModifier.html
  14. +7 −2 java/docs/api/com/twitter/twittertext/Autolink.LinkTextModifier.html
  15. +7 −2 java/docs/api/com/twitter/twittertext/Autolink.html
  16. +7 −2 java/docs/api/com/twitter/twittertext/Extractor.Entity.Type.html
  17. +7 −2 java/docs/api/com/twitter/twittertext/Extractor.Entity.html
  18. +7 −2 java/docs/api/com/twitter/twittertext/Extractor.html
  19. +7 −2 java/docs/api/com/twitter/twittertext/HitHighlighter.html
  20. +7 −2 java/docs/api/com/twitter/twittertext/Range.html
  21. +7 −2 java/docs/api/com/twitter/twittertext/Regex.html
  22. +7 −2 java/docs/api/com/twitter/twittertext/TldLists.html
  23. +9 −4 java/docs/api/com/twitter/twittertext/TwitterTextConfiguration.TwitterTextWeightedRange.html
  24. +33 −8 java/docs/api/com/twitter/twittertext/TwitterTextConfiguration.html
  25. +274 −0 java/docs/api/com/twitter/twittertext/TwitterTextEmojiRegex.html
  26. +7 −2 java/docs/api/com/twitter/twittertext/TwitterTextParseResults.html
  27. +26 −5 java/docs/api/com/twitter/twittertext/TwitterTextParser.html
  28. +10 −7 java/docs/api/com/twitter/twittertext/Validator.html
  29. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Autolink.LinkAttributeModifier.html
  30. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Autolink.LinkTextModifier.html
  31. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Autolink.html
  32. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Extractor.Entity.Type.html
  33. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Extractor.Entity.html
  34. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Extractor.html
  35. +5 −0 java/docs/api/com/twitter/twittertext/class-use/HitHighlighter.html
  36. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Range.html
  37. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Regex.html
  38. +5 −0 java/docs/api/com/twitter/twittertext/class-use/TldLists.html
  39. +5 −0 ...docs/api/com/twitter/twittertext/class-use/TwitterTextConfiguration.TwitterTextWeightedRange.html
  40. +5 −0 java/docs/api/com/twitter/twittertext/class-use/TwitterTextConfiguration.html
  41. +5 −0 java/docs/api/com/twitter/twittertext/class-use/TwitterTextParseResults.html
  42. +5 −0 java/docs/api/com/twitter/twittertext/class-use/TwitterTextParser.html
  43. +5 −0 java/docs/api/com/twitter/twittertext/class-use/Validator.html
  44. +8 −2 java/docs/api/com/twitter/twittertext/package-frame.html
  45. +13 −4 java/docs/api/com/twitter/twittertext/package-summary.html
  46. +8 −2 java/docs/api/com/twitter/twittertext/package-tree.html
  47. +5 −0 java/docs/api/com/twitter/twittertext/package-use.html
  48. +8 −3 java/docs/api/constant-values.html
  49. +8 −4 java/docs/api/deprecated-list.html
  50. +7 −2 java/docs/api/help-doc.html
  51. +22 −4 java/docs/api/index-all.html
  52. +1 −1 java/docs/api/index.html
  53. +8 −2 java/docs/api/overview-tree.html
  54. +4 −0 java/docs/api/script.js
  55. +5 −0 java/docs/api/stylesheet.css
  56. +6 −4 java/pom.xml
  57. +4 −0 java/src/main/java/com/twitter/Regex.java
  58. +4 −0 java/src/main/java/com/twitter/twittertext/Autolink.java
  59. +4 −0 java/src/main/java/com/twitter/twittertext/Extractor.java
  60. +4 −0 java/src/main/java/com/twitter/twittertext/HitHighlighter.java
  61. +4 −0 java/src/main/java/com/twitter/twittertext/Range.java
  62. +32 −11 java/src/main/java/com/twitter/twittertext/Regex.java
  63. +9 −0 java/src/main/java/com/twitter/twittertext/TldLists.java
  64. +23 −0 java/src/main/java/com/twitter/twittertext/TwitterTextConfiguration.java
  65. +165 −0 java/src/main/java/com/twitter/twittertext/TwitterTextEmojiRegex.java
  66. +4 −0 java/src/main/java/com/twitter/twittertext/TwitterTextParseResults.java
  67. +51 −9 java/src/main/java/com/twitter/twittertext/TwitterTextParser.java
  68. +4 −0 java/src/main/java/com/twitter/twittertext/Validator.java
  69. +4 −0 java/src/test/java/com/twitter/twittertext/AutolinkTest.java
  70. +97 −2 java/src/test/java/com/twitter/twittertext/ConformanceTest.java
  71. +4 −0 java/src/test/java/com/twitter/twittertext/ExtractorTest.java
  72. +15 −5 java/src/test/java/com/twitter/twittertext/RegexTest.java
  73. +4 −0 java/src/test/java/com/twitter/twittertext/TwitterTextConfigurationTest.java
  74. +43 −0 java/src/test/java/com/twitter/twittertext/TwitterTextEmojiRegexTest.java
  75. +4 −0 java/src/test/java/com/twitter/twittertext/TwitterTextParserTest.java
  76. +9 −5 java/src/test/java/com/twitter/twittertext/ValidatorTest.java
  77. +4 −0 java/src/test/java/com/twitter/twittertext/benchmark/Benchmark.java
  78. +2 −7 js/.babelrc
  79. +15 −0 js/.eslintrc.json
  80. +1 −1 js/.gitignore
  81. +1 −0 js/.npmrc
  82. +16 −0 js/CHANGELOG.md
  83. +4 −0 js/Gruntfile.js
  84. +2 −2 js/README.md
  85. +8 −5 js/Rakefile
  86. +20 −7 js/package.json
  87. +4 −0 js/pkg/twitter-text-2.0.0.min.js
  88. +16 −21 js/rollup.config.js
  89. +25 −0 js/scripts/babelPreset.js
  90. +26 −2 js/scripts/buildConfig.js
  91. +4 −0 js/src/.prettierrc
  92. +8 −2 js/src/autoLink.js
  93. +4 −1 js/src/autoLinkCashtags.js
  94. +13 −5 js/src/autoLinkEntities.js
  95. +5 −1 js/src/autoLinkHashtags.js
  96. +8 −2 js/src/autoLinkUrlsCustom.js
  97. +5 −1 js/src/autoLinkUsernamesOrLists.js
  98. +5 −1 js/src/autoLinkWithJSON.js
  99. +52 −0 js/src/configs.js
  100. +10 −4 js/src/convertUnicodeIndices.js
  101. +5 −1 js/src/extractCashtags.js
  102. +7 −3 js/src/extractCashtagsWithIndices.js
  103. +7 −4 js/src/extractEntitiesWithIndices.js
  104. +5 −1 js/src/extractHashtags.js
  105. +7 −3 js/src/extractHashtagsWithIndices.js
  106. +41 −33 js/src/extractHtmlAttrsFromOptions.js
  107. +5 −1 js/src/extractMentions.js
  108. +7 −3 js/src/extractMentionsOrListsWithIndices.js
  109. +5 −1 js/src/extractMentionsWithIndices.js
  110. +6 −3 js/src/extractReplies.js
  111. +5 −1 js/src/extractUrls.js
  112. +11 −7 js/src/extractUrlsWithIndices.js
  113. +6 −2 js/src/getTweetLength.js
  114. +5 −1 js/src/getUnicodeTextLength.js
  115. +5 −1 js/src/hasInvalidCharacters.js
  116. +7 −3 js/src/hitHighlight.js
  117. +11 −4 js/src/htmlEscape.js
  118. +5 −2 js/src/index.js
  119. +6 −2 js/src/isInvalidTweet.js
  120. +5 −1 js/src/isValidHashtag.js
  121. +8 −2 js/src/isValidList.js
  122. +5 −1 js/src/isValidTweetText.js
  123. +19 −11 js/src/isValidUrl.js
  124. +5 −1 js/src/isValidUsername.js
  125. +5 −1 js/src/lib/clone.js
  126. +10 −4 js/src/lib/convertUnicodeIndices.js
  127. +5 −1 js/src/lib/getCharacterWeight.js
  128. +6 −2 js/src/lib/idna.js
  129. +11 −4 js/src/lib/objectAssignPolyfill.js
  130. +15 −8 js/src/lib/regexSupplant.js
  131. +7 −3 js/src/lib/stringSupplant.js
  132. +9 −3 js/src/linkTextWithEntity.js
  133. +5 −1 js/src/linkToCashtag.js
  134. +5 −1 js/src/linkToHashtag.js
  135. +6 −2 js/src/linkToMentionAndList.js
  136. +5 −1 js/src/linkToText.js
  137. +8 −2 js/src/linkToTextWithSymbol.js
  138. +5 −1 js/src/linkToUrl.js
  139. +5 −1 js/src/modifyIndicesFromUTF16ToUnicode.js
  140. +5 −1 js/src/modifyIndicesFromUnicodeToUTF16.js
  141. +38 −21 js/src/parseTweet.js
  142. +5 −1 js/src/regexp/astralLetterAndMarks.js
  143. +4 −0 js/src/regexp/astralNumerals.js
  144. +4 −0 js/src/regexp/atSigns.js
  145. +5 −1 js/src/regexp/bmpLetterAndMarks.js
  146. +4 −0 js/src/regexp/bmpNumerals.js
  147. +4 −0 js/src/regexp/cashtag.js
  148. +4 −0 js/src/regexp/codePoint.js
  149. +4 −0 js/src/regexp/cyrillicLettersAndMarks.js
  150. +6 −0 js/src/regexp/directionalMarkersGroup.js
  151. +4 −0 js/src/regexp/endHashtagMatch.js
  152. +4 −0 js/src/regexp/endMentionMatch.js
  153. +20 −9 js/src/regexp/extractUrl.js
  154. +4 −0 js/src/regexp/hashSigns.js
  155. +9 −4 js/src/regexp/hashtagAlpha.js
  156. +12 −1 js/src/regexp/hashtagAlphaNumeric.js
  157. +8 −4 js/src/regexp/hashtagBoundary.js
  158. +4 −0 js/src/regexp/hashtagSpecialChars.js
  159. +5 −1 js/src/regexp/index.js
  160. +7 −1 js/src/regexp/invalidChars.js
  161. +5 −1 js/src/regexp/invalidCharsGroup.js
  162. +11 −4 js/src/regexp/invalidDomainChars.js
  163. +4 −0 js/src/regexp/invalidUrlWithoutProtocolPrecedingChars.js
  164. +4 −0 js/src/regexp/latinAccentChars.js
  165. +5 −1 js/src/regexp/nonBmpCodePairs.js
  166. +4 −0 js/src/regexp/punct.js
  167. +5 −1 js/src/regexp/rtlChars.js
  168. +4 −0 js/src/regexp/spaces.js
  169. +4 −0 js/src/regexp/spacesGroup.js
  170. +4 −0 js/src/regexp/urlHasHttps.js
  171. +4 −0 js/src/regexp/urlHasProtocol.js
  172. +4 −0 js/src/regexp/validAsciiDomain.js
  173. +22 −15 js/src/regexp/validCCTLD.js
  174. +4 −0 js/src/regexp/validCashtag.js
  175. +4 −0 js/src/regexp/validDomain.js
  176. +7 −1 js/src/regexp/validDomainChars.js
  177. +7 −4 js/src/regexp/validDomainName.js
  178. +100 −91 js/src/regexp/validGTLD.js
  179. +4 −0 js/src/regexp/validGeneralUrlPathChars.js
  180. +4 −0 js/src/regexp/validHashtag.js
  181. +8 −4 js/src/regexp/validMentionOrList.js
  182. +4 −0 js/src/regexp/validMentionPrecedingChars.js
  183. +4 −0 js/src/regexp/validPortNumber.js
  184. +4 −0 js/src/regexp/validPunycode.js
  185. +5 −4 js/src/regexp/validReply.js
  186. +7 −4 js/src/regexp/validSubdomain.js
  187. +14 −1 js/src/regexp/validTcoUrl.js
  188. +15 −11 js/src/regexp/validUrlBalancedParens.js
  189. +9 −5 js/src/regexp/validUrlPath.js
  190. +4 −0 js/src/regexp/validUrlPathEndingChars.js
  191. +12 −2 js/src/regexp/validUrlPrecedingChars.js
  192. +4 −0 js/src/regexp/validUrlQueryChars.js
  193. +4 −0 js/src/regexp/validUrlQueryEndingChars.js
  194. +8 −4 js/src/regexp/validateUrlAuthority.js
  195. +4 −0 js/src/regexp/validateUrlDecOctet.js
  196. +9 −1 js/src/regexp/validateUrlDomain.js
  197. +4 −0 js/src/regexp/validateUrlDomainSegment.js
  198. +4 −0 js/src/regexp/validateUrlDomainTld.js
  199. +7 −5 js/src/regexp/validateUrlFragment.js
  200. +5 −4 js/src/regexp/validateUrlHost.js
  201. +5 −4 js/src/regexp/validateUrlIp.js
  202. +7 −4 js/src/regexp/validateUrlIpv4.js
  203. +4 −0 js/src/regexp/validateUrlIpv6.js
  204. +7 −4 js/src/regexp/validateUrlPath.js
  205. +5 −6 js/src/regexp/validateUrlPchar.js
  206. +4 −0 js/src/regexp/validateUrlPctEncoded.js
  207. +4 −0 js/src/regexp/validateUrlPort.js
  208. +7 −4 js/src/regexp/validateUrlQuery.js
  209. +4 −0 js/src/regexp/validateUrlScheme.js
  210. +4 −0 js/src/regexp/validateUrlSubDelims.js
  211. +4 −0 js/src/regexp/validateUrlSubDomainSegment.js
  212. +8 −4 js/src/regexp/validateUrlUnencoded.js
  213. +9 −5 js/src/regexp/validateUrlUnicodeAuthority.js
  214. +9 −1 js/src/regexp/validateUrlUnicodeDomain.js
  215. +4 −0 js/src/regexp/validateUrlUnicodeDomainSegment.js
  216. +4 −0 js/src/regexp/validateUrlUnicodeDomainTld.js
  217. +5 −4 js/src/regexp/validateUrlUnicodeHost.js
  218. +4 −0 js/src/regexp/validateUrlUnicodeSubDomainSegment.js
  219. +4 −0 js/src/regexp/validateUrlUnreserved.js
  220. +5 −6 js/src/regexp/validateUrlUserinfo.js
  221. +8 −2 js/src/removeOverlappingEntities.js
  222. +4 −1 js/src/splitTags.js
  223. +12 −6 js/src/tagAttrs.js
  224. +23 −2 js/test/conformance.html
  225. +4 −0 js/test/node_tests.js
  226. +6 −1 js/test/test.html
  227. +6 −11 js/test/tests.js
  228. +3,347 −0 js/yarn.lock
  229. +12 −1 objc/CHANGELOG.md
  230. +3 −3 objc/Rakefile
  231. +47 −1 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL.xcodeproj/project.pbxproj
  232. +1 −1 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL.xcodeproj/xcshareddata/xcschemes/IFUnicodeURL.xcscheme
  233. +5 −0 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL/IDNSDK/nameprep.c
  234. +5 −0 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL/IDNSDK/puny.c
  235. +5 −0 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL/IDNSDK/race.c
  236. +5 −0 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL/IDNSDK/toxxx.c
  237. +5 −0 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL/IDNSDK/util.c
  238. +4 −0 objc/ThirdParty/IFUnicodeURL/IFUnicodeURL/NSURL+IFUnicodeURL.m
  239. +3 −1 objc/ThirdParty/IFUnicodeURL/Tests/NSURL+IFUnicodeURLTest.m
  240. +26 −0 objc/TwitterText.xcodeproj/project.pbxproj
  241. +1 −1 objc/TwitterText.xcodeproj/xcshareddata/xcschemes/TwitterText.xcscheme
  242. +5 −8 objc/lib/TwitterText.h
  243. +191 −109 objc/lib/TwitterText.m
  244. +40 −0 objc/lib/TwitterTextEmoji.h
  245. +26 −0 objc/lib/TwitterTextEmoji.m
  246. +5 −9 objc/lib/TwitterTextEntity.h
  247. +6 −8 objc/lib/TwitterTextEntity.m
  248. +16 −0 objc/tests/TwitterTextEmojiTests.h
  249. +73 −0 objc/tests/TwitterTextEmojiTests.m
  250. +125 −10 objc/tests/TwitterTextTests.m
  251. +79 −0 objc/tests/json-conformance/extract.json
  252. +296 −19 objc/tests/json-conformance/validate.json
  253. +32 −0 objc/twitter-text.podspec
  254. +11 −1 rb/CHANGELOG.md
  255. +1 −1 rb/Rakefile
  256. +5 −0 rb/lib/twitter-text.rb
  257. +4 −0 rb/lib/twitter-text/autolink.rb
  258. +17 −3 rb/lib/twitter-text/configuration.rb
  259. +4 −0 rb/lib/twitter-text/deprecation.rb
  260. +27 −0 rb/lib/twitter-text/emoji_regex.rb
  261. +43 −11 rb/lib/twitter-text/extractor.rb
  262. +4 −0 rb/lib/twitter-text/hash_helper.rb
  263. +6 −2 rb/lib/twitter-text/hit_highlighter.rb
  264. +28 −5 rb/lib/twitter-text/regex.rb
  265. +13 −9 rb/lib/twitter-text/rewriter.rb
  266. +4 −0 rb/lib/twitter-text/unicode.rb
  267. +39 −15 rb/lib/twitter-text/validation.rb
  268. +4 −0 rb/lib/twitter-text/weighted_range.rb
  269. +4 −0 rb/spec/autolinking_spec.rb
  270. +45 −0 rb/spec/configuration_spec.rb
  271. +4 −0 rb/spec/extractor_spec.rb
  272. +4 −0 rb/spec/hithighlighter_spec.rb
  273. +38 −0 rb/spec/regex_spec.rb
  274. +4 −0 rb/spec/rewriter_spec.rb
  275. +13 −0 rb/spec/spec_helper.rb
  276. +9 −5 rb/spec/test_urls.rb
  277. +4 −0 rb/spec/twitter_text_spec.rb
  278. +4 −0 rb/spec/unicode_spec.rb
  279. +22 −2 rb/spec/validation_spec.rb
  280. +20 −3 rb/test/conformance_test.rb
  281. +5 −2 rb/twitter-text.gemspec
  282. +5 −0 unicode_regex/unicode_regex_groups.scala
@@ -7,6 +7,5 @@ matrix:
- env: TWITTER_TEXT_DIR=rb
rvm: 2.4.2
- env: TWITTER_TEXT_DIR=java
- env: TWITTER_TEXT_DIR=js
- env: TWITTER_TEXT_DIR=objc
script: "./.travis.sh"
@@ -1,5 +1,3 @@
[![Build Status](https://img.shields.io/travis/twitter/twitter-text/master.svg)](https://travis-ci.org/twitter/twitter-text) [![Maven Central](https://img.shields.io/maven-central/v/com.twitter.twittertext/twitter-text.svg)](http://search.maven.org/#search%7Cgav%7C1%7Cg%3A%22com.twitter.twittertext%22%20AND%20a%3A%22twitter-text%22) [![Gem](https://img.shields.io/gem/v/twitter-text.svg)](https://rubygems.org/gems/twitter-text) [![npm](https://img.shields.io/npm/v/twitter-text.svg)](https://www.npmjs.com/package/twitter-text) [![CocoaPods](https://img.shields.io/cocoapods/v/twitter-text.svg)](http://cocoapods.org/?q=twitter-text) [![Bower](https://img.shields.io/bower/v/twitter-text.svg)](http://bower.io/search/?q=twitter-text)

twitter-text
============

@@ -13,6 +13,7 @@ The configuration format is a JSON string. The JSON can have the following prope
* `maxWeightedTweetLength` (required, integer, min value 0)
* `scale` (required, integer, min value 1)
* `defaultWeight` (required, integer, min value 0)
* `emojiParsingEnabled` (optional, boolean)
* `transformedURLLength` (integer, min value 0)
* `ranges` (array of range items)

@@ -48,6 +49,13 @@ The Tweet length is the (`weighted length` / `scale`).
The default weight applied to all code points. This is overridden in
one or more range items.

### emojiParsingEnabled

When set to true, the weighted Tweet length considers all emoji as a
single code point (with a default weight of 200), including longer
grapheme clusters combined by zero-width joiners. When set to false,
Tweet length is calculated by weighing individual Unicode code points.

### transformedURLLength

The length counted for URLs against the total weight of the Tweet. In
@@ -0,0 +1,30 @@
{
"version": 3,
"maxWeightedTweetLength": 280,
"scale": 100,
"defaultWeight": 200,
"emojiParsingEnabled": true,
"transformedURLLength": 23,
"ranges": [
{
"start": 0,
"end": 4351,
"weight": 100
},
{
"start": 8192,
"end": 8205,
"weight": 100
},
{
"start": 8208,
"end": 8223,
"weight": 100
},
{
"start": 8242,
"end": 8247,
"weight": 100
}
]
}
@@ -39,7 +39,10 @@ package com.twitter.twittertext;
import java.util.Arrays;
import java.util.List;
public class TldLists {
public final class TldLists {
private TldLists() {
}
public static final List<String> GTLDS = Arrays.asList(
#{yml["generic"].map {|el| " \"#{el}\""}.join(",\n")}
);
@@ -664,6 +664,44 @@ tests:
- url: "http://foobar.پاکستان/"
indices: [42, 64]

urls_with_directional_markers:
- description: "Extract URLs from RTL text"
text: "\U00002066\U0000202Atest abcdef.com پاکستان http://twitter.com/\U0000202C\U00002069"
expected:
- url: "abcdef.com"
indices: [7, 17]
- url: "http://twitter.com/"
indices: [26, 45]

- description: "Extract URLs from RTL text with embedded directional marks"
text: "This is a test \U00002066\U0000202Atwitter.com\U0000202C\U00002069 \U00002066\U0000202Ahttp://foobar.پاکستان/\U0000202C\U00002069⁩ قطر فلسطين عمان"
expected:
- url: "twitter.com"
indices: [17, 28]
- url: "http://foobar.پاکستان/"
indices: [33, 55]

tco_urls_with_params:
- description: "Extract valid URL with params: https://t.co/UqIyJAJTfo?amp=1"
text: "text https://t.co/UqIyJAJTfo?amp=1"
expected: ["https://t.co/UqIyJAJTfo?amp=1"]

- description: "Extract valid URL with params: https://t.co/UqIyJAJTfo?type=js"
text: "text https://t.co/UqIyJAJTfo?type=js"
expected: ["https://t.co/UqIyJAJTfo?type=js"]

- description: "Extract valid URL with params: https://t.co/UqIyJAJTfo?ssr=true"
text: "text https://t.co/UqIyJAJTfo?ssr=true"
expected: ["https://t.co/UqIyJAJTfo?ssr=true"]

- description: "Extract a valid URL with params: https://t.co/asdfdf?a=b#123"
text: "text https://t.co/asdfdf?a=b#123"
expected: ["https://t.co/asdfdf?a=b#123"]

- description: "Extract a valid URL with params: https://t.co/sadfasdf?a=b&c=d"
text: "text https://t.co/sadfasdf?a=b&c=d"
expected: ["https://t.co/sadfasdf?a=b&c=d"]

hashtags:
- description: "Extract hashtag after emoji without variant selector (uFE0E or uFE0F)"
text: "a ✌#hashtag here"
@@ -343,6 +343,7 @@ generic:
- 新闻
- 政府
- 政务
- 招聘
- 手表
- 手机
- 我爱你
@@ -598,6 +599,7 @@ generic:
- srl
- spreadbetting
- spot
- sport
- spiegel
- space
- soy
@@ -942,6 +944,7 @@ generic:
- locker
- loans
- loan
- llc
- lixil
- living
- live
@@ -1047,6 +1050,7 @@ generic:
- info
- infiniti
- industries
- inc
- immobilien
- immo
- imdb
@@ -1364,6 +1368,7 @@ generic:
- cheap
- chat
- chase
- charity
- channel
- chanel
- cfd

0 comments on commit 9537bdf

Please sign in to comment.
You can’t perform that action at this time.