8
\$\begingroup\$

The goal of my code is to sort data into two categories. It must use a local copy of the initial data from Collar (Top View).csv. My code creates a Collection of items called Collars using the initial data file, then moves each Collar into its respective category based upon its E dimension. I would like feedback on if I could do this more efficiently and readable, but other feedback is welcomed.

Option Explicit
Option Base 1
Dim CollarCol As New Collection
Dim BatchNum As String

' Calls for creation of a collection of collars and then calls that to be sorted.
Sub SortButton_Click()

    ' Clear current values
    Range("D3:L30").Clear

    ' Create local copy. Cannot open live copies of files.
    FileCopy "O:\IQC_Inspection\EngineeringData\Collar (Top View).csv", _
        ActiveWorkbook.Path + "\" + "Collar (Top View).csv"

    ' Get user input for desired batch number
    On Error GoTo ErrorHandler
    BatchNum = InputBox(Prompt:="Enter batch number: ")
    If (BatchNum = 0) Then Exit Sub ' exit for cancel button

    Set CollarCol = New Collection
    Call PopulateCollarCol
    Call SortCollarCol
    Exit Sub

ErrorHandler:
    MsgBox Err & ": " & Error(Err)

End Sub


' Populates the Collection named CollarCol
Private Sub PopulateCollarCol()

    Workbooks.Open ActiveWorkbook.Path + "\" + "Collar (Top View).csv"
    Dim Index As Integer, EndIndex As Integer
    Dim NewCollar As Collar
    EndIndex = FindEnd(BatchNum)

    For Index = FindStart(BatchNum) To EndIndex
        Set NewCollar = New Collar
        ' If first measure, add to collection
        If (Cells(Index, 11) = 0) Then '
            NewCollar.SetBatchNum (Cells(Index, 9))
            NewCollar.SetSerialNum (Cells(Index, 10))
            NewCollar.SetDimE (Cells(Index, 13))
            CollarCol.Add Item:=NewCollar, key:=CStr(NewCollar.GetSerialNum)
        Else ' see if remeasure is done for DimE
            If (Cells(Index, 15) <> " ") Then
                Dim EditCollar As New Collar
                Set EditCollar = CollarCol.Item(CStr(Cells(Index, 10)))
                ' make sure remeasure is done for DimE
                EditCollar.SetDimE (Cells(Index, 13))
            End If
        End If
    Next Index

    Workbooks("Collar (Top View).csv").Close

End Sub ' PopulateCollarCol


' Returns the first row of the given string
Function FindStart(ToFind As String) As Integer

    ' find bottom of batch
    Dim Rng As Range
    If Trim(ToFind) <> "" Then
        With Sheets("Collar (Top View)").Range("I2:I30000")
            Set Rng = .Find(What:=ToFind, _
                            after:=.Cells(.Cells.Count), _
                            LookIn:=xlValues, _
                            LookAt:=xlWhole, _
                            SearchOrder:=xlByRows, _
                            searchdirection:=xlPrevious, _
                            MatchCase:=False)
            If Not Rng Is Nothing Then
                FindStart = Rng.Row ' found bottom
            Else
                MsgBox "Nothing found"
                Exit Function
            End If
        End With
    End If

    ' Loop past remeasures
    Do While (Cells(FindStart, 11) = 1)
        FindStart = FindStart - 1
    Loop

    ' Loop while batch number is the same
    Do While (Cells(FindStart - 1, 9) = ToFind)
        If Cells(FindStart - 1, 10) < Cells(FindStart, 10) Or _
        Cells(FindStart, 11) = 1 Then
            FindStart = FindStart - 1
        Else
            Exit Do
        End If
    Loop

End Function ' FindStart


Function FindEnd(ToFind As String) As Integer

    ' find bottom of batch
    Dim Rng As Range
    If Trim(ToFind) <> "" Then
        With Sheets("Collar (Top View)").Range("I2:I30000")
            Set Rng = .Find(What:=ToFind, _
                            after:=.Cells(.Cells.Count), _
                            LookIn:=xlValues, _
                            LookAt:=xlWhole, _
                            SearchOrder:=xlByRows, _
                            searchdirection:=xlPrevious, _
                            MatchCase:=False)
            If Not Rng Is Nothing Then
                FindEnd = Rng.Row ' found bottom
            Else
                MsgBox "Error finding end of batch."
                Exit Function
            End If
        End With
    End If

End Function ' FindEnd


' Takes CollarCol and places each collar into its respective list
Private Sub SortCollarCol()

    Dim BlueIndex As Integer, YellowIndex As Integer
    Dim Index As Integer
    Dim CurCollar As New Collar
    BlueIndex = 3
    YellowIndex = 3

    For Index = 1 To CollarCol.Count
        Set CurCollar = CollarCol.Item(Index)
        If (CurCollar.GetDimE < 0.062055555) Then
            Cells(BlueIndex, 4) = CurCollar.GetBatchNum
            Cells(BlueIndex, 5) = CurCollar.GetSerialNum
            Cells(BlueIndex, 6) = CurCollar.GetDimE
            BlueIndex = BlueIndex + 1
        Else ' Bucket 2
            Cells(YellowIndex, 9) = CurCollar.GetBatchNum
            Cells(YellowIndex, 10) = CurCollar.GetSerialNum
            Cells(YellowIndex, 11) = CurCollar.GetDimE
            YellowIndex = YellowIndex + 1
        End If
    Next Index

End Sub ' SortCollarCol


'Returns boolean true if an object is within a collection
Public Function InCollection(col As Collection, key As String) As Boolean
  Dim var As Variant
  Dim errNumber As Long

  InCollection = False
  Set var = Nothing

  Err.Clear
  On Error Resume Next
    var = col.Item(key)
    errNumber = CLng(Err.Number)
  On Error GoTo 0

  '5 is not in, 0 and 438 represent incollection
  If errNumber = 5 Then ' it is 5 if not in collection
    InCollection = False
  Else
    InCollection = True
  End If

End Function

Here is data that I would be an example. Each column is in a spreadsheet and the first column starts as 'B'

1-Jan-14    8:43:48 worker1 QQ  SAQ20   Z   R   143 3   0   1   2.72E-02    2.71E-02                
1-Jan-14    8:43:48 worker1 QQ  SAQ20   Z   R   143 4   0   1   2.75E-02    2.73E-02                
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 1   0       6.20E-02    6.19E-02    2.77E-02    2.76E-02    1.19E-02    1.35E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 3   0       0.062127182 6.18E-02    2.77E-02    2.78E-02    0.010853701 1.47E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 4   0       6.20E-02    6.20E-02    2.76E-02    2.75E-02    0.011244671 1.45E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 5   0       6.19E-02    6.20E-02    2.78E-02    2.75E-02    1.29E-02    1.29E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 6   0       6.20E-02    6.20E-02    2.79E-02    2.76E-02    1.20E-02    1.36E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 7   0       6.21E-02    6.20E-02    2.75E-02    2.74E-02    1.19E-02    1.38E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 8   0       6.17E-02    6.17E-02    2.75E-02    2.75E-02    1.34E-02    1.20E-02
2-Jan-14    7:08:39 worker1 QQ  SA3054  Z   R   150 9   0       6.16E-02    6.16E-02    2.73E-02    2.77E-02    1.30E-02    1.23E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 10  0       0.061871287 6.19E-02    2.75E-02    2.74E-02    1.19E-02    1.36E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 11  0       6.17E-02    6.19E-02    2.77E-02    2.76E-02    0.012293416 1.33E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 12  0       0.062024465 0.062002266 2.76E-02    2.75E-02    1.16E-02    1.41E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 13  0       6.19E-02    6.17E-02    2.74E-02    2.76E-02    1.29E-02    1.26E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 14  0       6.19E-02    6.16E-02    2.74E-02    2.78E-02    1.30E-02    1.23E-02
2-Jan-14    7:08:39 worker1 QQ  SAQ20   Z   R   150 15  0       6.18E-02    6.19E-02    2.75E-02    2.74E-02    1.25E-02    1.31E-02
3-Jan-14    7:34:05 worker1 QQ  SAQ20   Z   R   181 1   0       6.21E-02    6.19E-02    2.73E-02    2.71E-02    1.34E-02    0.012262073
3-Jan-14    7:34:05 worker1 QQ  SAQ20   Z   R   181 2   0       6.20E-02    6.22E-02    2.71E-02    2.70E-02    1.32E-02    1.28E-02
\$\endgroup\$
0

2 Answers 2

8
\$\begingroup\$

General Impression

  • It's better than most VBA I see. I think you generally did a pretty good job.
  • Event Handlers shouldn't have very much code in them. How would you run this code "headless" (without a person interacting with the UI) if you needed to? I would consider breaking the SortButton_Click() event procedure into at least one or two more subroutines.
  • I would actually recommend moving almost all of this logic into a class module. Keep your code behinds clean of all business logic. Code behinds should be mainly responsible for dealing with UI events and calling on classes that hold the business logic.
  • The string literal Collar (Top View) shows up a lot. Extract a constant to store it in. Be careful however, you use it in two different contexts. In some places it refers to a file name and in others it is a sheet name. So, you actually need two different constants. It's perfectly okay to let one constant reference the other though. This is completely legit and compilable code.

    Private Const sheetName As String = "Collar (Top View)"
    Private Const fileName As String = sheetName & ".csv"
    

SortButton_Click

  • You're turning the error handling on pretty late. What happens if there's an issue with the FileCopy command? The code will break on that line. Probably not what you want to happen. Generally speaking, if you're using On Error GoTo it should be the first line after the sub declaration.
  • Be explicit about scope.

     Sub SortButton_Click()
    

    Scope is public by default in VBA, unlike .Net where it's private by default. That alone is a good reason to be explicit about how things are scope. It will reduce confusion for anyone (including yourself) who may move between the two languages. It's one less thing to remember.

    Also, did you actually mean to make this Public? I can't think of a good reason for an event handler to be public. If you need to call the code inside of it, it would be much better to extract the logic into a public subroutine of it's own.

  • Using Range all on its own implicitly calls ActiveSheet.Range. It's always better to be explicit and in turn, it's rarely recommended to work on the active worksheet. There might not be another option here though. This could be one of those rare times.

  • Give this a newline for readability.

    BatchNum = InputBox(Prompt:="Enter batch number: ")  
    If (BatchNum = 0) Then Exit Sub ' exit for cancel button
    
    BatchNum = InputBox(Prompt:="Enter batch number: ")  
    
    If (BatchNum = 0) Then Exit Sub ' exit for cancel button
    

    Speaking of readability, you might want to ditch the one-line If in favor of the more verbose If block syntax.

PopulateCollarCol

  • You've repeated this code from the click event.

    ActiveWorkbook.Path + "\" + "Collar (Top View).csv"
    

    You should be passing that filepath into the subroutine as an argument simply to keep the code DRY, but there's another issue here. What if the user clicks on a different workbook after the click event starts, but before the code execution gets here? You could execute in a different path. (Unlikely, but possible.)

  • When I first saw this, I expected to give my spiel about implicitly declared variants, but you declared these variables correctly. Well done. I see this get screwed up a lot, but you didn't.

    Dim Index As Integer, EndIndex As Integer
    
  • Nothing guarantees that someone won't come behind you and call this function before there is a valid (Not Nothing) Collar collection to work with before you add it. It's a potential bug. A simple fix would be

    If CollarCol Is Nothing Then Set CollarCol = New Collection
    
  • If you want to make this more efficient, don't open and close the csv file as a workbook. Opening a workbook is an expensive operation. Instead, use an adodb recordset to read in the closed file and loop through the recordset instead. Here is one example of how to get the data into a recordset.

FindStart & FindEnd

I'm a bit torn on these. On one hand, they do one thing and do it well. They're also nicely decoupled. On the other, they share some copy/pasted code. DRYing these out to share the common code would couple them together in a way I'm not sure I care for. You could have FindStart() call FindEnd() if you chose to do so.

Now, assuming that by "efficient" you meant you want to squeeze every last bit of performance out of the code you could do something along the following lines, but I'm not sure I'd really recommend it. Take it for what it's worth to you.

To find the starting index, you first have to find the ending index. You also call both of these functions in rapid succession. This means that you're .Finding the same value twice in a row. What you could do (and again, I'm not sure I recommend actually doing this) is take advantage of passing arguments ByRef and turn your two functions into a single Sub that overwrites the values of some out parameters. This is more efficient because the find only happens once, but readability suffers. It's not often you'll see people use this type of method to return values.

Private Sub FindEndPoints(ByVal ToFind As String, ByRef outStartIndex As Integer, ByRef outEndIndex As Integer)

    ' find bottom of batch
    Dim Rng As Range
    If Trim(ToFind) <> "" Then
        With Sheets("Collar (Top View)").Range("I2:I30000")
            Set Rng = .Find(What:=ToFind, _
                            after:=.Cells(.Cells.Count), _
                            LookIn:=xlValues, _
                            LookAt:=xlWhole, _
                            SearchOrder:=xlByRows, _
                            searchdirection:=xlPrevious, _
                            MatchCase:=False)
            If Not Rng Is Nothing Then
                outEndIndex = Rng.Row ' found bottom
            Else
                MsgBox "Nothing found"
                Exit Function
            End If
        End With
    End If

    ' Loop past remeasures
    outStartIndex = outEndIndex
    Do While (Cells(outStartIndex, 11) = 1)
        outStartIndex = outStartIndex - 1
    Loop

    ' Loop while batch number is the same
    Do While (Cells(outStartIndex - 1, 9) = ToFind)
        If Cells(outStartIndex - 1, 10) < Cells(outStartIndex, 10) Or _
        Cells(outStartIndex, 11) = 1 Then
            outStartIndex = outStartIndex - 1
        Else
            Exit Do
        End If
    Loop

End Sub

Which you could call like so:

Dim Index As Integer, EndIndex As Integer

FindEndPoints(BatchNum, Index, EndIndex) 'The index variables will be set after this line executes

Like I said, readability/understandability suffers. That's why you don't often see people do this, but if you're after pure speed, this is the way to go.

InCollection

You received a (very good) review that focuses on just this function already, but there are a few things to note still.

  1. You've not used this function anywhere in the code you've shown us. If you're not using it, remove it.
  2. The function is useful beyond this code behind. It should probably live somewhere you could re-use it throughout your project(s). Perhaps as part of a Custom collection class or a *.bas module.
  3. You're re-inventing the wheel. The built in collection object doesn't handle keys very well (as I'm sure you're aware). There is an alternative in the Scripting Runtime Library. If this functionality is indeed important and needed, I recommend using a Scripting.Dictionary instead of a Collection. It has a built in Exists function that does exactly what your InCollection function does.
\$\endgroup\$
2
  • 1
    \$\begingroup\$ Well technically a CSV file is opened when you're reading it - just not in Excel ;) \$\endgroup\$ Commented Jan 21, 2015 at 2:42
  • \$\begingroup\$ Review completed now. @Mat'sMug I added an explanation of my earlier comment on your answer. \$\endgroup\$
    – RubberDuck
    Commented Jan 21, 2015 at 14:57
3
\$\begingroup\$

@RubberDuck gave a pretty awesome review here, I'm just going to cover dissect the InCollection function, which was left out.

I like that you are sticking a comment at the top of every procedure - that's very good practice. One little nitpick about this one though:

'Returns boolean true if an object is within a collection

The value true is always going to be a Boolean value, and since the function's signature already specifies a Boolean return type, it's best to just leave it out. Also the comment is a little bit misleading - it doesn't reaturn true if an object is within a collection: it returns true when a specified key exists in a specified collection. Hence I'd rephrase it as such:

'Returns True when specified key exists in specified collection.
Public Function InCollection(col As Collection, key As String) As Boolean

Parameters are implicitly passed ByRef, which means the function is empowered with the ability to change the reference of col and the value of key before the execution flow returns to the caller: even if you're not doing that, it's best to pass parameters ByVal, and when you intend parameters to be passed by reference, to pass them explicitly ByRef.

Procedure names should always be verbs - they are things your program does, that's why. A better name might be IsInCollection - the only better alternative I can think of would be to implement your own collection/enumerable class, and make it have a Contains, or ContainsKey method... that might be overkill if this is the only time you're ever going to need to find a key in a collection. On the other hand, having a "toolbox" with frequently used class modules ready to import into any VBA project, can be handy.

But I digress. Let's dive into the procedure's body.

  Dim var As Variant

I like that you're declaring the Variant type explicitly. As you may know, all VBA variables are of Variant type if you don't specify a type - being explicit is always a good thing.

The name isn't very good though: var is to Variant such as str is to String - and both are awful meaningless variable names. Naming things after their intent rather than their type usually helps with meaning. In this case value might be a better name.

  Dim errNumber As Long

Don't need. I'll get back to that one.

  InCollection = False
  Set var = Nothing

Don't need either. Any Boolean is False by default, so the function would return False if you never assigned its return value. It's good that you're explicitly assigning a value though, but assigning the return type's default value as the first executable line of code in the function feels wrong.

The type name of an unassigned Variant is Empty, and IsObject(var) would return False before that assignment. By setting it to Nothing, you have changed its type name to Nothing, and IsObject(var) would return True.

Since at this point we don't know yet exactly what we're going to get from the collection, we might as well leave it alone and let it be an Empty value.

  Err.Clear

If executable code above that line raised an error, this line wouldn't execute, because execution would jump right out of the procedure since no On Error statement was encountered before. Hence, Err.Number can only be 0 at this point, and calling Err.Clear is useless. This line has no effect whatsoever, and can be removed.

  On Error Resume Next
    var = col.Item(key)
    errNumber = CLng(Err.Number)
  On Error GoTo 0

Now you're treating var (which the VBA runtime now thinks is an Object) as a normal value type.

And here we are. I like that you are treating On Error Resume Next / On Error GoTo 0 as if it were a code block - that's pure awesomeness, because it makes it very clear what the "scope" of "resume next" is.

However On Error Resume Next / On Error GoTo 0 should only be used when you're expecting an error that you're ready to ignore. That isn't what's going on here: you are storing the error number in errNumber, to handle it later. That's poor error handling.

Err.Number is already a Long: the type conversion CLng is redundant, and can be safely removed.

Actually I would remove errNumber altogether. But I'll get back to that in a minute.

  '5 is not in, 0 and 438 represent incollection
  If errNumber = 5 Then ' it is 5 if not in collection
    InCollection = False
  Else
    InCollection = True
  End If

This If...Else block clearly belongs in an error-handling subroutine*. You're handling error number 5 "Invalid procedure call or argument", and letting error 438 "Object doesn't support this property or method" mean yes, that key is in the collection. I think it's wrong.

Your function should only ever return True when it's absolutely 100% certain that the specified key was found.

You will encounter runtime error 450 "Wrong number of arguments or invalid property assignment" if the collection item with the specified key is actually an object reference - your code will correctly return True in that case, but in a somewhat awkward way.

In reality, the only error you should have to deal with is 5 "Invalid procedure call or argument" which means that the specified key isn't referring to anything in the collection. Any other error is due to code that isn't doing exactly what you're expecting it to be doing - and returning True when that happens is plain wrong.


So, how should it be handled then? First, make the very first executable line of code in the function be an On Error statement that will redirect execution flow to an error-handling subroutine:

Private Function IsInCollection(ByVal col As Collection, ByVal key As String) As Boolean
    Dim result As Boolean
    On Error GoTo CleanFail

       'function logic goes here

CleanExit:
    IsInCollection = result
    Exit Function

CleanFail:
    result = False
    Resume CleanExit

End Function

The result variable is indeed superfluous, but it allows for a single spot to assign the function's return value. The CleanExit subroutine is responsible for assigning the function's return value, regarldess of the outcome - if the function runs into an error, it's still the exit point and only place where the function's return value is assigned. CleanFail only runs when any error occurs, in which case it explicitly sets the result to False. Simple, straightforward, foul-proof.

So how would the logic be implemented for this to work? We don't really care about the actual collection item - we only care whether it's there or not. Here's how I would do it:

result = TypeName(col(key)) <> vbNullString

Why TypeName? Because I know what type I'm getting: a String, regardless of whether the value is an Object or not. If the key exists, I'm getting the name of the type of the corresponding value. If it doesn't, runtime error 5 is handled in the CleanFail subroutine.

And since I don't care about the actual value, I'm not even allocating it a variable - either I get a non-null string (a null string is vbNullString - literally, it's a null string pointer; "" isn't a null string), or I get an error if and only if the key doesn't refer to an item in the collection.


Well, that ended up a longer answer than I anticipated. Good thing @RubberDuck covered pretty much everything else!


*@RubberDuck is misusing the term "subroutine" in his answer - he means "procedure". A subroutine is identified by a label, lives within a procedure and usually contains a returning mechanism, that the Return or Resume/Resume Next keywords provide. The VBA keywords for jumping to a subroutine are GoTo, and GoSub when you're planning on returning to the call site.

\$\endgroup\$
1
  • 1
    \$\begingroup\$ Great answer, but.... Dictionary.Exists()... Will be back to explain. \$\endgroup\$
    – RubberDuck
    Commented Jan 21, 2015 at 10:28

Not the answer you're looking for? Browse other questions tagged or ask your own question.