001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.ArraySorter;
022import org.apache.commons.lang3.ArrayUtils;
023import org.apache.commons.lang3.StringUtils;
024
025/**
026 * A matcher class that can be queried to determine if a character array
027 * portion matches.
028 * <p>
029 * This class comes complete with various factory methods.
030 * If these do not suffice, you can subclass and implement your own matcher.
031 * </p>
032 *
033 * @since 2.2
034 * @deprecated As of 3.6, use Apache Commons Text
035 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
036 * StringMatcherFactory</a> instead
037 */
038@Deprecated
039public abstract class StrMatcher {
040
041    /**
042     * Matches the comma character.
043     */
044    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
045    /**
046     * Matches the tab character.
047     */
048    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
049    /**
050     * Matches the space character.
051     */
052    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
053    /**
054     * Matches the same characters as StringTokenizer,
055     * namely space, tab, newline, formfeed.
056     */
057    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
058    /**
059     * Matches the String trim() whitespace characters.
060     */
061    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
062    /**
063     * Matches the double quote character.
064     */
065    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
066    /**
067     * Matches the double quote character.
068     */
069    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
070    /**
071     * Matches the single or double quote character.
072     */
073    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
074    /**
075     * Matches no characters.
076     */
077    private static final StrMatcher NONE_MATCHER = new NoMatcher();
078
079    /**
080     * Returns a matcher which matches the comma character.
081     *
082     * @return a matcher for a comma
083     */
084    public static StrMatcher commaMatcher() {
085        return COMMA_MATCHER;
086    }
087
088    /**
089     * Returns a matcher which matches the tab character.
090     *
091     * @return a matcher for a tab
092     */
093    public static StrMatcher tabMatcher() {
094        return TAB_MATCHER;
095    }
096
097    /**
098     * Returns a matcher which matches the space character.
099     *
100     * @return a matcher for a space
101     */
102    public static StrMatcher spaceMatcher() {
103        return SPACE_MATCHER;
104    }
105
106    /**
107     * Matches the same characters as StringTokenizer,
108     * namely space, tab, newline and formfeed.
109     *
110     * @return the split matcher
111     */
112    public static StrMatcher splitMatcher() {
113        return SPLIT_MATCHER;
114    }
115
116    /**
117     * Matches the String trim() whitespace characters.
118     *
119     * @return the trim matcher
120     */
121    public static StrMatcher trimMatcher() {
122        return TRIM_MATCHER;
123    }
124
125    /**
126     * Returns a matcher which matches the single quote character.
127     *
128     * @return a matcher for a single quote
129     */
130    public static StrMatcher singleQuoteMatcher() {
131        return SINGLE_QUOTE_MATCHER;
132    }
133
134    /**
135     * Returns a matcher which matches the double quote character.
136     *
137     * @return a matcher for a double quote
138     */
139    public static StrMatcher doubleQuoteMatcher() {
140        return DOUBLE_QUOTE_MATCHER;
141    }
142
143    /**
144     * Returns a matcher which matches the single or double quote character.
145     *
146     * @return a matcher for a single or double quote
147     */
148    public static StrMatcher quoteMatcher() {
149        return QUOTE_MATCHER;
150    }
151
152    /**
153     * Matches no characters.
154     *
155     * @return a matcher that matches nothing
156     */
157    public static StrMatcher noneMatcher() {
158        return NONE_MATCHER;
159    }
160
161    /**
162     * Constructor that creates a matcher from a character.
163     *
164     * @param ch  the character to match, must not be null
165     * @return a new Matcher for the given char
166     */
167    public static StrMatcher charMatcher(final char ch) {
168        return new CharMatcher(ch);
169    }
170
171    /**
172     * Constructor that creates a matcher from a set of characters.
173     *
174     * @param chars  the characters to match, null or empty matches nothing
175     * @return a new matcher for the given char[]
176     */
177    public static StrMatcher charSetMatcher(final char... chars) {
178        if (ArrayUtils.isEmpty(chars)) {
179            return NONE_MATCHER;
180        }
181        if (chars.length == 1) {
182            return new CharMatcher(chars[0]);
183        }
184        return new CharSetMatcher(chars);
185    }
186
187    /**
188     * Constructor that creates a matcher from a string representing a set of characters.
189     *
190     * @param chars  the characters to match, null or empty matches nothing
191     * @return a new Matcher for the given characters
192     */
193    public static StrMatcher charSetMatcher(final String chars) {
194        if (StringUtils.isEmpty(chars)) {
195            return NONE_MATCHER;
196        }
197        if (chars.length() == 1) {
198            return new CharMatcher(chars.charAt(0));
199        }
200        return new CharSetMatcher(chars.toCharArray());
201    }
202
203    /**
204     * Constructor that creates a matcher from a string.
205     *
206     * @param str  the string to match, null or empty matches nothing
207     * @return a new Matcher for the given String
208     */
209    public static StrMatcher stringMatcher(final String str) {
210        if (StringUtils.isEmpty(str)) {
211            return NONE_MATCHER;
212        }
213        return new StringMatcher(str);
214    }
215
216    /**
217     * Constructor.
218     */
219    protected StrMatcher() {
220    }
221
222    /**
223     * Returns the number of matching characters, zero for no match.
224     * <p>
225     * This method is called to check for a match.
226     * The parameter {@code pos} represents the current position to be
227     * checked in the string {@code buffer} (a character array which must
228     * not be changed).
229     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
230     * </p>
231     * <p>
232     * The character array may be larger than the active area to be matched.
233     * Only values in the buffer between the specified indices may be accessed.
234     * </p>
235     * <p>
236     * The matching code may check one character or many.
237     * It may check characters preceding {@code pos} as well as those
238     * after, so long as no checks exceed the bounds specified.
239     * </p>
240     * <p>
241     * It must return zero for no match, or a positive number if a match was found.
242     * The number indicates the number of characters that matched.
243     * </p>
244     *
245     * @param buffer  the text content to match against, do not change
246     * @param pos  the starting position for the match, valid for buffer
247     * @param bufferStart  the first active index in the buffer, valid for buffer
248     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
249     * @return the number of matching characters, zero for no match
250     */
251    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
252
253    /**
254     * Returns the number of matching characters, zero for no match.
255     * <p>
256     * This method is called to check for a match.
257     * The parameter {@code pos} represents the current position to be
258     * checked in the string {@code buffer} (a character array which must
259     * not be changed).
260     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
261     * </p>
262     * <p>
263     * The matching code may check one character or many.
264     * It may check characters preceding {@code pos} as well as those after.
265     * </p>
266     * <p>
267     * It must return zero for no match, or a positive number if a match was found.
268     * The number indicates the number of characters that matched.
269     * </p>
270     *
271     * @param buffer  the text content to match against, do not change
272     * @param pos  the starting position for the match, valid for buffer
273     * @return the number of matching characters, zero for no match
274     * @since 2.4
275     */
276    public int isMatch(final char[] buffer, final int pos) {
277        return isMatch(buffer, pos, 0, buffer.length);
278    }
279
280    /**
281     * Class used to define a set of characters for matching purposes.
282     */
283    static final class CharSetMatcher extends StrMatcher {
284        /** The set of characters to match. */
285        private final char[] chars;
286
287        /**
288         * Constructor that creates a matcher from a character array.
289         *
290         * @param chars  the characters to match, must not be null
291         */
292        CharSetMatcher(final char[] chars) {
293            this.chars = ArraySorter.sort(chars.clone());
294        }
295
296        /**
297         * Returns whether or not the given character matches.
298         *
299         * @param buffer  the text content to match against, do not change
300         * @param pos  the starting position for the match, valid for buffer
301         * @param bufferStart  the first active index in the buffer, valid for buffer
302         * @param bufferEnd  the end index of the active buffer, valid for buffer
303         * @return the number of matching characters, zero for no match
304         */
305        @Override
306        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
307            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
308        }
309    }
310
311    /**
312     * Class used to define a character for matching purposes.
313     */
314    static final class CharMatcher extends StrMatcher {
315        /** The character to match. */
316        private final char ch;
317
318        /**
319         * Constructor that creates a matcher that matches a single character.
320         *
321         * @param ch  the character to match
322         */
323        CharMatcher(final char ch) {
324            this.ch = ch;
325        }
326
327        /**
328         * Returns whether or not the given character matches.
329         *
330         * @param buffer  the text content to match against, do not change
331         * @param pos  the starting position for the match, valid for buffer
332         * @param bufferStart  the first active index in the buffer, valid for buffer
333         * @param bufferEnd  the end index of the active buffer, valid for buffer
334         * @return the number of matching characters, zero for no match
335         */
336        @Override
337        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
338            return ch == buffer[pos] ? 1 : 0;
339        }
340    }
341
342    /**
343     * Class used to define a set of characters for matching purposes.
344     */
345    static final class StringMatcher extends StrMatcher {
346        /** The string to match, as a character array. */
347        private final char[] chars;
348
349        /**
350         * Constructor that creates a matcher from a String.
351         *
352         * @param str  the string to match, must not be null
353         */
354        StringMatcher(final String str) {
355            chars = str.toCharArray();
356        }
357
358        /**
359         * Returns whether or not the given text matches the stored string.
360         *
361         * @param buffer  the text content to match against, do not change
362         * @param pos  the starting position for the match, valid for buffer
363         * @param bufferStart  the first active index in the buffer, valid for buffer
364         * @param bufferEnd  the end index of the active buffer, valid for buffer
365         * @return the number of matching characters, zero for no match
366         */
367        @Override
368        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
369            final int len = chars.length;
370            if (pos + len > bufferEnd) {
371                return 0;
372            }
373            for (int i = 0; i < chars.length; i++, pos++) {
374                if (chars[i] != buffer[pos]) {
375                    return 0;
376                }
377            }
378            return len;
379        }
380
381        @Override
382        public String toString() {
383            return super.toString() + ' ' + Arrays.toString(chars);
384        }
385
386    }
387
388    /**
389     * Class used to match no characters.
390     */
391    static final class NoMatcher extends StrMatcher {
392
393        /**
394         * Constructs a new instance of {@link NoMatcher}.
395         */
396        NoMatcher() {
397        }
398
399        /**
400         * Always returns {@code false}.
401         *
402         * @param buffer  the text content to match against, do not change
403         * @param pos  the starting position for the match, valid for buffer
404         * @param bufferStart  the first active index in the buffer, valid for buffer
405         * @param bufferEnd  the end index of the active buffer, valid for buffer
406         * @return the number of matching characters, zero for no match
407         */
408        @Override
409        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
410            return 0;
411        }
412    }
413
414    /**
415     * Class used to match whitespace as per trim().
416     */
417    static final class TrimMatcher extends StrMatcher {
418
419        /**
420         * Constructs a new instance of {@link TrimMatcher}.
421         */
422        TrimMatcher() {
423        }
424
425        /**
426         * Returns whether or not the given character matches.
427         *
428         * @param buffer  the text content to match against, do not change
429         * @param pos  the starting position for the match, valid for buffer
430         * @param bufferStart  the first active index in the buffer, valid for buffer
431         * @param bufferEnd  the end index of the active buffer, valid for buffer
432         * @return the number of matching characters, zero for no match
433         */
434        @Override
435        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
436            return buffer[pos] <= 32 ? 1 : 0;
437        }
438    }
439
440}